@yasserkhanorg/e2e-agents 1.2.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/feedback.d.ts +20 -0
- package/dist/agent/feedback.d.ts.map +1 -1
- package/dist/agent/feedback.js +4 -0
- package/dist/esm/agent/feedback.js +3 -0
- package/dist/esm/index.js +1 -1
- package/dist/esm/qa-agent/cli.js +205 -0
- package/dist/esm/qa-agent/orchestrator.js +120 -0
- package/dist/esm/qa-agent/phase1/runner.js +139 -0
- package/dist/esm/qa-agent/phase1/scope.js +126 -0
- package/dist/esm/qa-agent/phase2/agent_browser.js +95 -0
- package/dist/esm/qa-agent/phase2/agent_loop.js +315 -0
- package/dist/esm/qa-agent/phase2/exploration_state.js +76 -0
- package/dist/esm/qa-agent/phase2/tools.js +288 -0
- package/dist/esm/qa-agent/phase2/vision.js +75 -0
- package/dist/esm/qa-agent/phase3/feedback.js +34 -0
- package/dist/esm/qa-agent/phase3/reporter.js +118 -0
- package/dist/esm/qa-agent/phase3/spec_generator.js +62 -0
- package/dist/esm/qa-agent/phase3/verdict.js +66 -0
- package/dist/esm/qa-agent/safe_env.js +23 -0
- package/dist/esm/qa-agent/types.js +3 -0
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/qa-agent/cli.d.ts +3 -0
- package/dist/qa-agent/cli.d.ts.map +1 -0
- package/dist/qa-agent/cli.js +207 -0
- package/dist/qa-agent/orchestrator.d.ts +3 -0
- package/dist/qa-agent/orchestrator.d.ts.map +1 -0
- package/dist/qa-agent/orchestrator.js +123 -0
- package/dist/qa-agent/phase1/runner.d.ts +3 -0
- package/dist/qa-agent/phase1/runner.d.ts.map +1 -0
- package/dist/qa-agent/phase1/runner.js +142 -0
- package/dist/qa-agent/phase1/scope.d.ts +6 -0
- package/dist/qa-agent/phase1/scope.d.ts.map +1 -0
- package/dist/qa-agent/phase1/scope.js +129 -0
- package/dist/qa-agent/phase2/agent_browser.d.ts +35 -0
- package/dist/qa-agent/phase2/agent_browser.d.ts.map +1 -0
- package/dist/qa-agent/phase2/agent_browser.js +99 -0
- package/dist/qa-agent/phase2/agent_loop.d.ts +3 -0
- package/dist/qa-agent/phase2/agent_loop.d.ts.map +1 -0
- package/dist/qa-agent/phase2/agent_loop.js +321 -0
- package/dist/qa-agent/phase2/exploration_state.d.ts +12 -0
- package/dist/qa-agent/phase2/exploration_state.d.ts.map +1 -0
- package/dist/qa-agent/phase2/exploration_state.js +88 -0
- package/dist/qa-agent/phase2/tools.d.ts +28 -0
- package/dist/qa-agent/phase2/tools.d.ts.map +1 -0
- package/dist/qa-agent/phase2/tools.js +292 -0
- package/dist/qa-agent/phase2/vision.d.ts +3 -0
- package/dist/qa-agent/phase2/vision.d.ts.map +1 -0
- package/dist/qa-agent/phase2/vision.js +78 -0
- package/dist/qa-agent/phase3/feedback.d.ts +3 -0
- package/dist/qa-agent/phase3/feedback.d.ts.map +1 -0
- package/dist/qa-agent/phase3/feedback.js +37 -0
- package/dist/qa-agent/phase3/reporter.d.ts +3 -0
- package/dist/qa-agent/phase3/reporter.d.ts.map +1 -0
- package/dist/qa-agent/phase3/reporter.js +121 -0
- package/dist/qa-agent/phase3/spec_generator.d.ts +3 -0
- package/dist/qa-agent/phase3/spec_generator.d.ts.map +1 -0
- package/dist/qa-agent/phase3/spec_generator.js +65 -0
- package/dist/qa-agent/phase3/verdict.d.ts +3 -0
- package/dist/qa-agent/phase3/verdict.d.ts.map +1 -0
- package/dist/qa-agent/phase3/verdict.js +69 -0
- package/dist/qa-agent/safe_env.d.ts +3 -0
- package/dist/qa-agent/safe_env.d.ts.map +1 -0
- package/dist/qa-agent/safe_env.js +26 -0
- package/dist/qa-agent/types.d.ts +122 -0
- package/dist/qa-agent/types.d.ts.map +1 -0
- package/dist/qa-agent/types.js +4 -0
- package/package.json +12 -3
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
3
|
+
// See LICENSE.txt for license information.
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.createExplorationState = createExplorationState;
|
|
6
|
+
exports.recordAction = recordAction;
|
|
7
|
+
exports.recordFinding = recordFinding;
|
|
8
|
+
exports.markFlowExplored = markFlowExplored;
|
|
9
|
+
exports.nextFlow = nextFlow;
|
|
10
|
+
exports.isStuck = isStuck;
|
|
11
|
+
exports.isBudgetExhausted = isBudgetExhausted;
|
|
12
|
+
exports.allFlowsExplored = allFlowsExplored;
|
|
13
|
+
exports.updateCost = updateCost;
|
|
14
|
+
exports.compressActionsLog = compressActionsLog;
|
|
15
|
+
const RECENT_WINDOW = 10;
|
|
16
|
+
const STUCK_THRESHOLD = 3;
|
|
17
|
+
function createExplorationState(flows, timeLimitMs, budgetUSD) {
|
|
18
|
+
return {
|
|
19
|
+
flowsToExplore: [...flows],
|
|
20
|
+
flowsExplored: [],
|
|
21
|
+
currentFlow: null,
|
|
22
|
+
findings: [],
|
|
23
|
+
actionsLog: [],
|
|
24
|
+
recentActions: [],
|
|
25
|
+
tokensUsed: 0,
|
|
26
|
+
costUSD: 0,
|
|
27
|
+
startTime: Date.now(),
|
|
28
|
+
timeLimitMs,
|
|
29
|
+
budgetUSD,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
function recordAction(state, action) {
|
|
33
|
+
state.actionsLog.push(action);
|
|
34
|
+
state.recentActions.push(action);
|
|
35
|
+
if (state.recentActions.length > RECENT_WINDOW) {
|
|
36
|
+
state.recentActions.shift();
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
function recordFinding(state, finding) {
|
|
40
|
+
state.findings.push(finding);
|
|
41
|
+
}
|
|
42
|
+
function markFlowExplored(state, flowId) {
|
|
43
|
+
if (!state.flowsExplored.includes(flowId)) {
|
|
44
|
+
state.flowsExplored.push(flowId);
|
|
45
|
+
}
|
|
46
|
+
state.flowsToExplore = state.flowsToExplore.filter((f) => f.id !== flowId);
|
|
47
|
+
state.currentFlow = null;
|
|
48
|
+
}
|
|
49
|
+
function nextFlow(state) {
|
|
50
|
+
if (state.flowsToExplore.length === 0)
|
|
51
|
+
return null;
|
|
52
|
+
const flow = state.flowsToExplore[0];
|
|
53
|
+
state.currentFlow = flow.id;
|
|
54
|
+
return flow;
|
|
55
|
+
}
|
|
56
|
+
function isStuck(state) {
|
|
57
|
+
if (state.recentActions.length < STUCK_THRESHOLD)
|
|
58
|
+
return false;
|
|
59
|
+
const last = state.recentActions.slice(-STUCK_THRESHOLD);
|
|
60
|
+
const signature = last.map((a) => `${a.type}:${a.target || ''}:${a.value || ''}`);
|
|
61
|
+
return signature.every((s) => s === signature[0]);
|
|
62
|
+
}
|
|
63
|
+
function isBudgetExhausted(state) {
|
|
64
|
+
if (state.costUSD >= state.budgetUSD)
|
|
65
|
+
return true;
|
|
66
|
+
if (Date.now() - state.startTime >= state.timeLimitMs)
|
|
67
|
+
return true;
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
function allFlowsExplored(state) {
|
|
71
|
+
return state.flowsToExplore.length === 0;
|
|
72
|
+
}
|
|
73
|
+
function updateCost(state, inputTokens, outputTokens, cost) {
|
|
74
|
+
state.tokensUsed += inputTokens + outputTokens;
|
|
75
|
+
state.costUSD += cost;
|
|
76
|
+
}
|
|
77
|
+
function compressActionsLog(state, summaryText) {
|
|
78
|
+
// Replace all but the most recent 10 actions with a summary marker
|
|
79
|
+
if (state.actionsLog.length <= 20)
|
|
80
|
+
return;
|
|
81
|
+
const recent = state.actionsLog.slice(-10);
|
|
82
|
+
const compressed = {
|
|
83
|
+
type: 'compressed',
|
|
84
|
+
value: `[Compressed ${state.actionsLog.length - 10} earlier actions] ${summaryText}`,
|
|
85
|
+
timestamp: Date.now(),
|
|
86
|
+
};
|
|
87
|
+
state.actionsLog = [compressed, ...recent];
|
|
88
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type Anthropic from '@anthropic-ai/sdk';
|
|
2
|
+
import type { AgentBrowser } from './agent_browser.js';
|
|
3
|
+
import type { Finding } from '../types.js';
|
|
4
|
+
export declare const TOOL_DEFINITIONS: Anthropic.Tool[];
|
|
5
|
+
export interface ToolContext {
|
|
6
|
+
browser: AgentBrowser;
|
|
7
|
+
baseUrl: string;
|
|
8
|
+
screenshotDir: string;
|
|
9
|
+
screenshotCounter: number;
|
|
10
|
+
currentUrl: string;
|
|
11
|
+
currentFlow: string;
|
|
12
|
+
users?: Array<{
|
|
13
|
+
role: string;
|
|
14
|
+
username: string;
|
|
15
|
+
password: string;
|
|
16
|
+
}>;
|
|
17
|
+
}
|
|
18
|
+
export interface ToolResult {
|
|
19
|
+
output: string;
|
|
20
|
+
finding?: Finding;
|
|
21
|
+
flowDone?: {
|
|
22
|
+
flowId: string;
|
|
23
|
+
status: 'verified-ok' | 'has-issues';
|
|
24
|
+
};
|
|
25
|
+
navigated?: boolean;
|
|
26
|
+
}
|
|
27
|
+
export declare function executeTool(ctx: ToolContext, name: string, input: Record<string, unknown>): ToolResult;
|
|
28
|
+
//# sourceMappingURL=tools.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tools.d.ts","sourceRoot":"","sources":["../../../src/qa-agent/phase2/tools.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,SAAS,MAAM,mBAAmB,CAAC;AAE/C,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,oBAAoB,CAAC;AACrD,OAAO,KAAK,EAAC,OAAO,EAA+B,MAAM,aAAa,CAAC;AAMvE,eAAO,MAAM,gBAAgB,EAAE,SAAS,CAAC,IAAI,EAkI5C,CAAC;AAMF,MAAM,WAAW,WAAW;IACxB,OAAO,EAAE,YAAY,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,KAAK,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAC,CAAC,CAAC;CACrE;AAED,MAAM,WAAW,UAAU;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,QAAQ,CAAC,EAAE;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,aAAa,GAAG,YAAY,CAAA;KAAC,CAAC;IAClE,SAAS,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,wBAAgB,WAAW,CACvB,GAAG,EAAE,WAAW,EAChB,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,UAAU,CAqIZ"}
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
3
|
+
// See LICENSE.txt for license information.
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.TOOL_DEFINITIONS = void 0;
|
|
6
|
+
exports.executeTool = executeTool;
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
// Tool definitions (Anthropic tool_use schema)
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
exports.TOOL_DEFINITIONS = [
|
|
11
|
+
{
|
|
12
|
+
name: 'navigate',
|
|
13
|
+
description: 'Navigate to a URL. Use absolute paths starting with / or full URLs.',
|
|
14
|
+
input_schema: {
|
|
15
|
+
type: 'object',
|
|
16
|
+
properties: {
|
|
17
|
+
url: { type: 'string', description: 'URL or path to navigate to' },
|
|
18
|
+
},
|
|
19
|
+
required: ['url'],
|
|
20
|
+
},
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
name: 'click',
|
|
24
|
+
description: 'Click an element by its accessibility ref (e.g. @e4).',
|
|
25
|
+
input_schema: {
|
|
26
|
+
type: 'object',
|
|
27
|
+
properties: {
|
|
28
|
+
ref: { type: 'string', description: 'Accessibility ref like @e4' },
|
|
29
|
+
},
|
|
30
|
+
required: ['ref'],
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
name: 'fill',
|
|
35
|
+
description: 'Clear a field and type new text into it.',
|
|
36
|
+
input_schema: {
|
|
37
|
+
type: 'object',
|
|
38
|
+
properties: {
|
|
39
|
+
ref: { type: 'string', description: 'Accessibility ref of the input field' },
|
|
40
|
+
value: { type: 'string', description: 'Text to type' },
|
|
41
|
+
},
|
|
42
|
+
required: ['ref', 'value'],
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
name: 'press_key',
|
|
47
|
+
description: 'Press a keyboard key (e.g. Enter, Escape, Tab).',
|
|
48
|
+
input_schema: {
|
|
49
|
+
type: 'object',
|
|
50
|
+
properties: {
|
|
51
|
+
key: { type: 'string', description: 'Key name (Enter, Escape, Tab, etc.)' },
|
|
52
|
+
},
|
|
53
|
+
required: ['key'],
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
name: 'scroll',
|
|
58
|
+
description: 'Scroll the page or a specific element up or down.',
|
|
59
|
+
input_schema: {
|
|
60
|
+
type: 'object',
|
|
61
|
+
properties: {
|
|
62
|
+
direction: { type: 'string', enum: ['up', 'down'] },
|
|
63
|
+
ref: { type: 'string', description: 'Optional element ref to scroll within' },
|
|
64
|
+
},
|
|
65
|
+
required: ['direction'],
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
name: 'go_back',
|
|
70
|
+
description: 'Go back to the previous page.',
|
|
71
|
+
input_schema: {
|
|
72
|
+
type: 'object',
|
|
73
|
+
properties: {},
|
|
74
|
+
required: [],
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
name: 'take_screenshot',
|
|
79
|
+
description: 'Take an annotated screenshot for evidence or vision analysis. Use sparingly (costs tokens).',
|
|
80
|
+
input_schema: {
|
|
81
|
+
type: 'object',
|
|
82
|
+
properties: {
|
|
83
|
+
label: { type: 'string', description: 'Short label for the screenshot (used in filename)' },
|
|
84
|
+
},
|
|
85
|
+
required: ['label'],
|
|
86
|
+
},
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
name: 'get_text',
|
|
90
|
+
description: 'Read the text content of a specific element.',
|
|
91
|
+
input_schema: {
|
|
92
|
+
type: 'object',
|
|
93
|
+
properties: {
|
|
94
|
+
ref: { type: 'string', description: 'Accessibility ref' },
|
|
95
|
+
},
|
|
96
|
+
required: ['ref'],
|
|
97
|
+
},
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
name: 'report_finding',
|
|
101
|
+
description: 'Report a bug, visual issue, UX problem, or gap you discovered. Always include current URL and repro steps.',
|
|
102
|
+
input_schema: {
|
|
103
|
+
type: 'object',
|
|
104
|
+
properties: {
|
|
105
|
+
type: { type: 'string', enum: ['bug', 'visual-regression', 'ux-issue', 'gap'] },
|
|
106
|
+
severity: { type: 'string', enum: ['critical', 'high', 'medium', 'low', 'info'] },
|
|
107
|
+
summary: { type: 'string', description: 'What you found' },
|
|
108
|
+
repro_steps: {
|
|
109
|
+
type: 'array',
|
|
110
|
+
items: { type: 'string' },
|
|
111
|
+
description: 'Steps to reproduce',
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
required: ['type', 'severity', 'summary', 'repro_steps'],
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
name: 'mark_flow_done',
|
|
119
|
+
description: 'Mark the current flow as verified/explored. Call when you are done testing a flow.',
|
|
120
|
+
input_schema: {
|
|
121
|
+
type: 'object',
|
|
122
|
+
properties: {
|
|
123
|
+
flow_id: { type: 'string', description: 'ID of the flow being marked done' },
|
|
124
|
+
status: { type: 'string', enum: ['verified-ok', 'has-issues'] },
|
|
125
|
+
},
|
|
126
|
+
required: ['flow_id', 'status'],
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
name: 'switch_user',
|
|
131
|
+
description: 'Log out and log in as a different user role.',
|
|
132
|
+
input_schema: {
|
|
133
|
+
type: 'object',
|
|
134
|
+
properties: {
|
|
135
|
+
role: { type: 'string', description: 'Role of the user to switch to (e.g. admin, regular, guest)' },
|
|
136
|
+
},
|
|
137
|
+
required: ['role'],
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
];
|
|
141
|
+
function executeTool(ctx, name, input) {
|
|
142
|
+
switch (name) {
|
|
143
|
+
case 'navigate': {
|
|
144
|
+
const url = String(input.url || '');
|
|
145
|
+
const fullUrl = url.startsWith('http') ? url : `${ctx.baseUrl}${url}`;
|
|
146
|
+
// Security: restrict navigation to the configured baseUrl domain
|
|
147
|
+
if (!isAllowedUrl(fullUrl, ctx.baseUrl)) {
|
|
148
|
+
return { output: `Blocked: navigation to "${fullUrl}" is outside the allowed domain (${ctx.baseUrl}).` };
|
|
149
|
+
}
|
|
150
|
+
const output = ctx.browser.open(fullUrl);
|
|
151
|
+
ctx.currentUrl = ctx.browser.getUrl();
|
|
152
|
+
return { output: output || `Navigated to ${ctx.currentUrl}`, navigated: true };
|
|
153
|
+
}
|
|
154
|
+
case 'click': {
|
|
155
|
+
const output = ctx.browser.click(String(input.ref));
|
|
156
|
+
return { output: output || `Clicked ${input.ref}` };
|
|
157
|
+
}
|
|
158
|
+
case 'fill': {
|
|
159
|
+
const ref = String(input.ref);
|
|
160
|
+
const value = String(input.value);
|
|
161
|
+
const output = ctx.browser.fill(ref, value);
|
|
162
|
+
// Redact value for password-like fields to avoid leaking credentials to LLM
|
|
163
|
+
const isSensitive = /password|passwd|pwd|secret|token/i.test(ref);
|
|
164
|
+
const displayValue = isSensitive ? '[REDACTED]' : `"${value}"`;
|
|
165
|
+
return { output: output || `Filled ${ref} with ${displayValue}` };
|
|
166
|
+
}
|
|
167
|
+
case 'press_key': {
|
|
168
|
+
const output = ctx.browser.press(String(input.key));
|
|
169
|
+
return { output: output || `Pressed ${input.key}` };
|
|
170
|
+
}
|
|
171
|
+
case 'scroll': {
|
|
172
|
+
const rawDir = String(input.direction);
|
|
173
|
+
if (rawDir !== 'up' && rawDir !== 'down') {
|
|
174
|
+
return { output: `Invalid scroll direction "${rawDir}". Must be "up" or "down".` };
|
|
175
|
+
}
|
|
176
|
+
const ref = input.ref ? String(input.ref) : undefined;
|
|
177
|
+
const output = ctx.browser.scroll(rawDir, ref);
|
|
178
|
+
return { output: output || `Scrolled ${rawDir}` };
|
|
179
|
+
}
|
|
180
|
+
case 'go_back': {
|
|
181
|
+
const output = ctx.browser.back();
|
|
182
|
+
ctx.currentUrl = ctx.browser.getUrl();
|
|
183
|
+
return { output: output || `Went back to ${ctx.currentUrl}` };
|
|
184
|
+
}
|
|
185
|
+
case 'take_screenshot': {
|
|
186
|
+
ctx.screenshotCounter++;
|
|
187
|
+
const label = String(input.label || 'evidence').replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
188
|
+
const filename = `${String(ctx.screenshotCounter).padStart(3, '0')}-${label}.png`;
|
|
189
|
+
const path = `${ctx.screenshotDir}/${filename}`;
|
|
190
|
+
ctx.browser.screenshot(path);
|
|
191
|
+
return { output: `Screenshot saved: ${path}` };
|
|
192
|
+
}
|
|
193
|
+
case 'get_text': {
|
|
194
|
+
const text = ctx.browser.getText(String(input.ref));
|
|
195
|
+
return { output: text || '(empty)' };
|
|
196
|
+
}
|
|
197
|
+
case 'report_finding': {
|
|
198
|
+
const VALID_TYPES = new Set(['bug', 'visual-regression', 'ux-issue', 'gap']);
|
|
199
|
+
const VALID_SEVERITIES = new Set(['critical', 'high', 'medium', 'low', 'info']);
|
|
200
|
+
const rawType = String(input.type);
|
|
201
|
+
const rawSeverity = String(input.severity);
|
|
202
|
+
if (!VALID_TYPES.has(rawType)) {
|
|
203
|
+
return { output: `Invalid finding type "${rawType}". Must be one of: ${[...VALID_TYPES].join(', ')}.` };
|
|
204
|
+
}
|
|
205
|
+
if (!VALID_SEVERITIES.has(rawSeverity)) {
|
|
206
|
+
return { output: `Invalid severity "${rawSeverity}". Must be one of: ${[...VALID_SEVERITIES].join(', ')}.` };
|
|
207
|
+
}
|
|
208
|
+
if (!Array.isArray(input.repro_steps)) {
|
|
209
|
+
return { output: `Invalid repro_steps: expected an array of strings.` };
|
|
210
|
+
}
|
|
211
|
+
const finding = {
|
|
212
|
+
id: `f-${crypto.randomUUID()}`,
|
|
213
|
+
type: rawType,
|
|
214
|
+
severity: rawSeverity,
|
|
215
|
+
summary: String(input.summary),
|
|
216
|
+
flow: ctx.currentFlow,
|
|
217
|
+
evidence: {
|
|
218
|
+
url: ctx.currentUrl,
|
|
219
|
+
reproSteps: input.repro_steps.map(String),
|
|
220
|
+
},
|
|
221
|
+
timestamp: Date.now(),
|
|
222
|
+
};
|
|
223
|
+
return { output: `Finding recorded: [${finding.severity}] ${finding.summary}`, finding };
|
|
224
|
+
}
|
|
225
|
+
case 'mark_flow_done': {
|
|
226
|
+
const flowId = String(input.flow_id);
|
|
227
|
+
const rawStatus = String(input.status);
|
|
228
|
+
if (rawStatus !== 'verified-ok' && rawStatus !== 'has-issues') {
|
|
229
|
+
return { output: `Invalid status "${rawStatus}". Must be "verified-ok" or "has-issues".` };
|
|
230
|
+
}
|
|
231
|
+
return {
|
|
232
|
+
output: `Flow "${flowId}" marked as ${rawStatus}`,
|
|
233
|
+
flowDone: { flowId, status: rawStatus },
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
case 'switch_user': {
|
|
237
|
+
const role = String(input.role);
|
|
238
|
+
const user = ctx.users?.find((u) => u.role === role);
|
|
239
|
+
if (!user) {
|
|
240
|
+
return { output: `No user configured for role "${role}". Available: ${(ctx.users || []).map((u) => u.role).join(', ')}` };
|
|
241
|
+
}
|
|
242
|
+
// Log out first, then log in as new user
|
|
243
|
+
try {
|
|
244
|
+
ctx.browser.open(`${ctx.baseUrl}/logout`);
|
|
245
|
+
}
|
|
246
|
+
catch {
|
|
247
|
+
// May not be logged in
|
|
248
|
+
}
|
|
249
|
+
ctx.browser.open(`${ctx.baseUrl}/login`);
|
|
250
|
+
// Use snapshot to find login fields, then fill
|
|
251
|
+
const snap = ctx.browser.snapshot();
|
|
252
|
+
const emailRef = extractRef(snap, 'email') || extractRef(snap, 'username') || '@e1';
|
|
253
|
+
const passRef = extractRef(snap, 'password') || '@e2';
|
|
254
|
+
ctx.browser.fill(emailRef, user.username);
|
|
255
|
+
ctx.browser.fill(passRef, user.password);
|
|
256
|
+
ctx.browser.press('Enter');
|
|
257
|
+
ctx.currentUrl = ctx.browser.getUrl();
|
|
258
|
+
// Redact credentials from LLM context — only expose role
|
|
259
|
+
return { output: `Switched to user role: ${user.role}` };
|
|
260
|
+
}
|
|
261
|
+
default:
|
|
262
|
+
return { output: `Unknown tool: ${name}` };
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
function isAllowedUrl(url, baseUrl) {
|
|
266
|
+
// Block dangerous schemes
|
|
267
|
+
const scheme = url.split(':')[0]?.toLowerCase();
|
|
268
|
+
if (scheme && !['http', 'https'].includes(scheme))
|
|
269
|
+
return false;
|
|
270
|
+
// Parse both URLs and compare origins (hostname + port)
|
|
271
|
+
try {
|
|
272
|
+
const target = new URL(url);
|
|
273
|
+
const base = new URL(baseUrl);
|
|
274
|
+
return target.origin === base.origin;
|
|
275
|
+
}
|
|
276
|
+
catch {
|
|
277
|
+
// If URL parsing fails, only allow relative paths (already prefixed with baseUrl)
|
|
278
|
+
return url.startsWith(baseUrl);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
function extractRef(snapshot, fieldHint) {
|
|
282
|
+
// Look for lines containing the hint and extract the @eN ref
|
|
283
|
+
const lines = snapshot.split('\n');
|
|
284
|
+
for (const line of lines) {
|
|
285
|
+
if (line.toLowerCase().includes(fieldHint)) {
|
|
286
|
+
const match = line.match(/@e\d+/);
|
|
287
|
+
if (match)
|
|
288
|
+
return match[0];
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
return undefined;
|
|
292
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vision.d.ts","sourceRoot":"","sources":["../../../src/qa-agent/phase2/vision.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAC,OAAO,EAA+B,MAAM,aAAa,CAAC;AA2BvE,wBAAsB,iBAAiB,CACnC,cAAc,EAAE,MAAM,EACtB,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,MAAM,GACb,OAAO,CAAC,OAAO,EAAE,CAAC,CAoBpB"}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
3
|
+
// See LICENSE.txt for license information.
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.analyzeScreenshot = analyzeScreenshot;
|
|
6
|
+
const fs_1 = require("fs");
|
|
7
|
+
const provider_factory_js_1 = require("../../provider_factory.js");
|
|
8
|
+
const VALID_TYPES = new Set(['bug', 'visual-regression', 'ux-issue', 'gap']);
|
|
9
|
+
const VALID_SEVERITIES = new Set(['critical', 'high', 'medium', 'low', 'info']);
|
|
10
|
+
const VISION_PROMPT = `You are a QA engineer analyzing a screenshot of a web application.
|
|
11
|
+
Look for these categories of issues:
|
|
12
|
+
|
|
13
|
+
1. **Layout issues**: overlapping elements, misaligned content, broken grid, elements outside viewport
|
|
14
|
+
2. **Visual issues**: truncated text, missing icons/images, broken styling, inconsistent spacing
|
|
15
|
+
3. **UX issues**: unclear button labels, confusing navigation, missing feedback states, poor contrast
|
|
16
|
+
4. **State issues**: loading spinners stuck, empty states without messaging, stale data indicators
|
|
17
|
+
5. **Error states**: visible error messages, 404/500 pages, broken components
|
|
18
|
+
|
|
19
|
+
For each issue found, respond with a JSON array of objects:
|
|
20
|
+
[
|
|
21
|
+
{
|
|
22
|
+
"type": "bug" | "visual-regression" | "ux-issue",
|
|
23
|
+
"severity": "critical" | "high" | "medium" | "low" | "info",
|
|
24
|
+
"summary": "description of the issue"
|
|
25
|
+
}
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
If no issues are found, respond with an empty array: []
|
|
29
|
+
|
|
30
|
+
Only report clear, actionable issues. Do not speculate about functionality you cannot see.`;
|
|
31
|
+
async function analyzeScreenshot(screenshotPath, url, flow) {
|
|
32
|
+
const provider = await provider_factory_js_1.LLMProviderFactory.createFromEnv();
|
|
33
|
+
if (!provider.capabilities.vision || !provider.analyzeImage) {
|
|
34
|
+
return [];
|
|
35
|
+
}
|
|
36
|
+
let imageData;
|
|
37
|
+
try {
|
|
38
|
+
imageData = (0, fs_1.readFileSync)(screenshotPath).toString('base64');
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
43
|
+
const response = await provider.analyzeImage([{ base64: imageData, mediaType: 'image/png' }], VISION_PROMPT, { maxTokens: 2000, temperature: 0.1 });
|
|
44
|
+
return parseVisionResponse(response.text, url, flow, screenshotPath);
|
|
45
|
+
}
|
|
46
|
+
function parseVisionResponse(text, url, flow, screenshotPath) {
|
|
47
|
+
// Extract JSON array from response
|
|
48
|
+
const jsonMatch = text.match(/\[[\s\S]*\]/);
|
|
49
|
+
if (!jsonMatch)
|
|
50
|
+
return [];
|
|
51
|
+
try {
|
|
52
|
+
const items = JSON.parse(jsonMatch[0]);
|
|
53
|
+
if (!Array.isArray(items))
|
|
54
|
+
return [];
|
|
55
|
+
return items
|
|
56
|
+
.filter((item) => {
|
|
57
|
+
const t = String(item.type || '');
|
|
58
|
+
const s = String(item.severity || '');
|
|
59
|
+
return VALID_TYPES.has(t) && VALID_SEVERITIES.has(s);
|
|
60
|
+
})
|
|
61
|
+
.map((item) => ({
|
|
62
|
+
id: `v-${Date.now()}-${crypto.randomUUID().slice(0, 8)}`,
|
|
63
|
+
type: String(item.type),
|
|
64
|
+
severity: String(item.severity),
|
|
65
|
+
summary: String(item.summary || 'Visual issue detected'),
|
|
66
|
+
flow,
|
|
67
|
+
evidence: {
|
|
68
|
+
url,
|
|
69
|
+
screenshotPath,
|
|
70
|
+
reproSteps: ['Captured via automated vision analysis'],
|
|
71
|
+
},
|
|
72
|
+
timestamp: Date.now(),
|
|
73
|
+
}));
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
return [];
|
|
77
|
+
}
|
|
78
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../../src/qa-agent/phase3/feedback.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAC,QAAQ,EAAC,MAAM,aAAa,CAAC;AAE1C,wBAAgB,cAAc,CAAC,MAAM,EAAE,QAAQ,GAAG,IAAI,CA8BrD"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
3
|
+
// See LICENSE.txt for license information.
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.submitFeedback = submitFeedback;
|
|
6
|
+
const child_process_1 = require("child_process");
|
|
7
|
+
const logger_js_1 = require("../../logger.js");
|
|
8
|
+
const safe_env_js_1 = require("../safe_env.js");
|
|
9
|
+
function submitFeedback(config) {
|
|
10
|
+
const args = ['e2e-ai-agents', 'feedback'];
|
|
11
|
+
if (config.testsRoot) {
|
|
12
|
+
args.push('--tests-root', config.testsRoot);
|
|
13
|
+
}
|
|
14
|
+
logger_js_1.logger.info('Submitting feedback to calibration system');
|
|
15
|
+
const result = (0, child_process_1.spawnSync)('npx', args, {
|
|
16
|
+
cwd: config.testsRoot || process.cwd(),
|
|
17
|
+
encoding: 'utf-8',
|
|
18
|
+
timeout: 30000,
|
|
19
|
+
env: (0, safe_env_js_1.safeEnv)(),
|
|
20
|
+
});
|
|
21
|
+
if (result.error) {
|
|
22
|
+
logger_js_1.logger.warn('Feedback submission spawn failed', {
|
|
23
|
+
error: result.error.message,
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
else if (result.signal) {
|
|
27
|
+
logger_js_1.logger.warn('Feedback submission killed by signal', {
|
|
28
|
+
signal: result.signal,
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
else if (result.status !== 0) {
|
|
32
|
+
logger_js_1.logger.warn('Feedback submission failed', {
|
|
33
|
+
status: result.status,
|
|
34
|
+
stderr: (result.stderr || '').slice(0, 200),
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { Phase1Result, Phase2Result, Phase3Result, QAConfig, ReleaseVerdict } from '../types.js';
|
|
2
|
+
export declare function generateReport(config: QAConfig, phase1: Phase1Result, phase2: Phase2Result, verdict: ReleaseVerdict, generatedSpecs: string[]): Phase3Result;
|
|
3
|
+
//# sourceMappingURL=reporter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reporter.d.ts","sourceRoot":"","sources":["../../../src/qa-agent/phase3/reporter.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAC,YAAY,EAAE,YAAY,EAAE,YAAY,EAAE,QAAQ,EAAY,cAAc,EAAC,MAAM,aAAa,CAAC;AAE9G,wBAAgB,cAAc,CAC1B,MAAM,EAAE,QAAQ,EAChB,MAAM,EAAE,YAAY,EACpB,MAAM,EAAE,YAAY,EACpB,OAAO,EAAE,cAAc,EACvB,cAAc,EAAE,MAAM,EAAE,GACzB,YAAY,CA8Cd"}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
3
|
+
// See LICENSE.txt for license information.
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.generateReport = generateReport;
|
|
6
|
+
const fs_1 = require("fs");
|
|
7
|
+
const path_1 = require("path");
|
|
8
|
+
function generateReport(config, phase1, phase2, verdict, generatedSpecs) {
|
|
9
|
+
const outputDir = config.outputDir || '.e2e-ai-agents';
|
|
10
|
+
(0, fs_1.mkdirSync)(outputDir, { recursive: true });
|
|
11
|
+
const reportPath = (0, path_1.join)(outputDir, 'qa-report.json');
|
|
12
|
+
const summaryPath = (0, path_1.join)(outputDir, 'qa-summary.md');
|
|
13
|
+
const report = {
|
|
14
|
+
schemaVersion: '1.0.0',
|
|
15
|
+
generatedAt: new Date().toISOString(),
|
|
16
|
+
mode: config.mode,
|
|
17
|
+
config: {
|
|
18
|
+
baseUrl: config.baseUrl,
|
|
19
|
+
timeLimitMinutes: config.timeLimitMinutes,
|
|
20
|
+
budgetUSD: config.budgetUSD,
|
|
21
|
+
},
|
|
22
|
+
phase1,
|
|
23
|
+
phase2,
|
|
24
|
+
phase3: {
|
|
25
|
+
reportPath,
|
|
26
|
+
summaryPath,
|
|
27
|
+
verdict,
|
|
28
|
+
generatedSpecs,
|
|
29
|
+
},
|
|
30
|
+
verdict,
|
|
31
|
+
};
|
|
32
|
+
try {
|
|
33
|
+
(0, fs_1.writeFileSync)(reportPath, JSON.stringify(report, null, 2), 'utf-8');
|
|
34
|
+
}
|
|
35
|
+
catch (err) {
|
|
36
|
+
throw new Error(`Failed to write report to ${reportPath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
37
|
+
}
|
|
38
|
+
const markdown = renderMarkdown(report);
|
|
39
|
+
try {
|
|
40
|
+
(0, fs_1.writeFileSync)(summaryPath, markdown, 'utf-8');
|
|
41
|
+
}
|
|
42
|
+
catch (err) {
|
|
43
|
+
throw new Error(`Failed to write summary to ${summaryPath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
44
|
+
}
|
|
45
|
+
return {
|
|
46
|
+
reportPath,
|
|
47
|
+
summaryPath,
|
|
48
|
+
verdict,
|
|
49
|
+
generatedSpecs,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
function renderMarkdown(report) {
|
|
53
|
+
const v = report.verdict;
|
|
54
|
+
const badge = v.decision === 'go' ? '✅ GO' : v.decision === 'no-go' ? '❌ NO-GO' : '⚠️ CONDITIONAL';
|
|
55
|
+
const lines = [
|
|
56
|
+
`# QA Agent Report — ${badge}`,
|
|
57
|
+
'',
|
|
58
|
+
`**Mode:** ${report.mode}`,
|
|
59
|
+
`**Base URL:** ${report.config.baseUrl}`,
|
|
60
|
+
`**Generated:** ${report.generatedAt}`,
|
|
61
|
+
'',
|
|
62
|
+
`## Verdict`,
|
|
63
|
+
'',
|
|
64
|
+
`**Decision:** ${badge}`,
|
|
65
|
+
`**Reason:** ${v.reason}`,
|
|
66
|
+
'',
|
|
67
|
+
`| Severity | Count |`,
|
|
68
|
+
`|----------|-------|`,
|
|
69
|
+
`| Critical | ${v.criticalFindings} |`,
|
|
70
|
+
`| High | ${v.highFindings} |`,
|
|
71
|
+
`| Medium | ${v.mediumFindings} |`,
|
|
72
|
+
`| Low | ${v.lowFindings} |`,
|
|
73
|
+
'',
|
|
74
|
+
];
|
|
75
|
+
// Phase 1 summary
|
|
76
|
+
const specTotal = report.phase1.specResults.length;
|
|
77
|
+
const specPassed = report.phase1.specResults.reduce((s, r) => s + r.passed, 0);
|
|
78
|
+
const specFailed = report.phase1.specResults.reduce((s, r) => s + r.failed, 0);
|
|
79
|
+
lines.push(`## Phase 1: Scripted Tests`, '', `- Flows identified: ${report.phase1.flows.length}`, `- Specs run: ${specTotal} (${specPassed} passed, ${specFailed} failed)`, '');
|
|
80
|
+
// Phase 2 summary
|
|
81
|
+
lines.push(`## Phase 2: Autonomous Exploration`, '', `- Flows explored: ${report.phase2.flowsExplored.length}`, `- Actions taken: ${report.phase2.actionsCount}`, `- Duration: ${Math.round(report.phase2.durationMs / 1000)}s`, `- Cost: $${report.phase2.costUSD.toFixed(4)}`, `- Tokens: ${report.phase2.tokensUsed}`, '');
|
|
82
|
+
// Findings
|
|
83
|
+
if (report.phase2.findings.length > 0) {
|
|
84
|
+
lines.push(`## Findings`, '');
|
|
85
|
+
for (const f of report.phase2.findings) {
|
|
86
|
+
lines.push(`### [${f.severity.toUpperCase()}] ${f.summary}`);
|
|
87
|
+
lines.push('');
|
|
88
|
+
lines.push(`- **Type:** ${f.type}`);
|
|
89
|
+
lines.push(`- **Flow:** ${f.flow}`);
|
|
90
|
+
lines.push(`- **URL:** ${f.evidence.url}`);
|
|
91
|
+
if (f.evidence.screenshotPath) {
|
|
92
|
+
lines.push(`- **Screenshot:** ${f.evidence.screenshotPath}`);
|
|
93
|
+
}
|
|
94
|
+
if (f.evidence.reproSteps.length > 0) {
|
|
95
|
+
lines.push('- **Repro steps:**');
|
|
96
|
+
for (const step of f.evidence.reproSteps) {
|
|
97
|
+
lines.push(` 1. ${step}`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
lines.push('');
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// Flow sign-offs
|
|
104
|
+
lines.push(`## Flow Sign-offs`, '');
|
|
105
|
+
lines.push(`| Flow | Status | Findings |`);
|
|
106
|
+
lines.push(`|------|--------|----------|`);
|
|
107
|
+
for (const s of v.flowSignoffs) {
|
|
108
|
+
const statusIcon = s.status === 'passed' ? '✅' : s.status === 'failed' ? '❌' : '⏭️';
|
|
109
|
+
lines.push(`| ${s.flowName} | ${statusIcon} ${s.status} | ${s.findings.length} |`);
|
|
110
|
+
}
|
|
111
|
+
lines.push('');
|
|
112
|
+
// Generated specs
|
|
113
|
+
if (report.phase3.generatedSpecs.length > 0) {
|
|
114
|
+
lines.push(`## Generated Specs`, '');
|
|
115
|
+
for (const spec of report.phase3.generatedSpecs) {
|
|
116
|
+
lines.push(`- ${spec}`);
|
|
117
|
+
}
|
|
118
|
+
lines.push('');
|
|
119
|
+
}
|
|
120
|
+
return lines.join('\n');
|
|
121
|
+
}
|