@yasserkhanorg/e2e-agents 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agentic/fix_loop.d.ts +26 -0
- package/dist/agentic/fix_loop.d.ts.map +1 -0
- package/dist/agentic/fix_loop.js +95 -0
- package/dist/agentic/playwright_runner.d.ts +43 -0
- package/dist/agentic/playwright_runner.d.ts.map +1 -0
- package/dist/agentic/playwright_runner.js +165 -0
- package/dist/agentic/runner.d.ts +25 -0
- package/dist/agentic/runner.d.ts.map +1 -0
- package/dist/agentic/runner.js +207 -0
- package/dist/agentic/types.d.ts +62 -0
- package/dist/agentic/types.d.ts.map +1 -0
- package/dist/agentic/types.js +4 -0
- package/dist/cli.js +130 -0
- package/dist/engine/plan_builder.d.ts.map +1 -1
- package/dist/engine/plan_builder.js +8 -7
- package/dist/esm/agentic/fix_loop.js +90 -0
- package/dist/esm/agentic/playwright_runner.js +161 -0
- package/dist/esm/agentic/runner.js +204 -0
- package/dist/esm/agentic/types.js +3 -0
- package/dist/esm/cli.js +131 -1
- package/dist/esm/engine/plan_builder.js +8 -7
- package/dist/esm/index.js +2 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -19,6 +19,9 @@ const traceability_capture_js_1 = require("./agent/traceability_capture.js");
|
|
|
19
19
|
const pipeline_js_1 = require("./agent/pipeline.js");
|
|
20
20
|
const playwright_report_js_1 = require("./agent/playwright_report.js");
|
|
21
21
|
const orchestrator_js_1 = require("./pipeline/orchestrator.js");
|
|
22
|
+
const provider_factory_js_1 = require("./provider_factory.js");
|
|
23
|
+
const runner_js_1 = require("./agentic/runner.js");
|
|
24
|
+
const api_surface_js_1 = require("./knowledge/api_surface.js");
|
|
22
25
|
const CONFIG_CANDIDATES = ['e2e-ai-agents.config.json', '.e2e-ai-agents.config.json'];
|
|
23
26
|
function findConfigUpwards(startDir) {
|
|
24
27
|
if (!startDir) {
|
|
@@ -69,6 +72,7 @@ function printUsage() {
|
|
|
69
72
|
' e2e-ai-agents feedback --path <app-root> --feedback-input <json>',
|
|
70
73
|
' e2e-ai-agents traceability-capture --path <app-root> --traceability-report <json>',
|
|
71
74
|
' e2e-ai-agents traceability-ingest --path <app-root> --traceability-input <json>',
|
|
75
|
+
' e2e-ai-agents generate [--scenarios <path|json>] [--max-attempts <n>] [--dry-run]',
|
|
72
76
|
' e2e-ai-agents analyze --path <app-root> [--tests-root <path>] [--since <ref>] [--generate] [--generate-output <dir>] [--heal] [--heal-report <json>]',
|
|
73
77
|
' e2e-ai-agents llm-health',
|
|
74
78
|
'',
|
|
@@ -131,6 +135,8 @@ function printUsage() {
|
|
|
131
135
|
' --pr-base <branch> PR base branch for finalize-generated-tests',
|
|
132
136
|
' (auto-heal-pr defaults to base=master)',
|
|
133
137
|
' --dry-run Preview actions without mutating git state',
|
|
138
|
+
' --max-attempts <n> Max fix attempts per scenario (default: 3)',
|
|
139
|
+
' --scenarios <path|json> Scenarios file/JSON for generate command',
|
|
134
140
|
' --apply Apply data-testid patches and generate tests',
|
|
135
141
|
' (legacy shortcut; prefer approve-and-generate)',
|
|
136
142
|
' --help Show help',
|
|
@@ -146,6 +152,7 @@ function parseArgs(argv) {
|
|
|
146
152
|
|| command === 'plan'
|
|
147
153
|
|| command === 'heal'
|
|
148
154
|
|| command === 'suggest'
|
|
155
|
+
|| command === 'generate'
|
|
149
156
|
|| command === 'finalize-generated-tests'
|
|
150
157
|
|| command === 'feedback'
|
|
151
158
|
|| command === 'traceability-capture'
|
|
@@ -161,6 +168,16 @@ function parseArgs(argv) {
|
|
|
161
168
|
parsed.help = true;
|
|
162
169
|
continue;
|
|
163
170
|
}
|
|
171
|
+
if (arg === '--max-attempts' && next) {
|
|
172
|
+
parsed.maxAttempts = parseInt(next, 10);
|
|
173
|
+
i += 1;
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
176
|
+
if (arg === '--scenarios' && next) {
|
|
177
|
+
parsed.generateScenarios = next;
|
|
178
|
+
i += 1;
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
164
181
|
if (arg === '--apply') {
|
|
165
182
|
parsed.apply = true;
|
|
166
183
|
continue;
|
|
@@ -981,6 +998,119 @@ async function main() {
|
|
|
981
998
|
}
|
|
982
999
|
return;
|
|
983
1000
|
}
|
|
1001
|
+
if (args.command === 'generate') {
|
|
1002
|
+
const reportRoot = config.testsRoot || config.path;
|
|
1003
|
+
// Load scenarios from --scenarios flag or plan-report.json
|
|
1004
|
+
let scenarios = [];
|
|
1005
|
+
if (args.generateScenarios) {
|
|
1006
|
+
let raw;
|
|
1007
|
+
if ((0, fs_1.existsSync)(args.generateScenarios)) {
|
|
1008
|
+
raw = JSON.parse((0, fs_1.readFileSync)(args.generateScenarios, 'utf-8'));
|
|
1009
|
+
}
|
|
1010
|
+
else {
|
|
1011
|
+
raw = JSON.parse(args.generateScenarios);
|
|
1012
|
+
}
|
|
1013
|
+
if (!Array.isArray(raw)) {
|
|
1014
|
+
// eslint-disable-next-line no-console
|
|
1015
|
+
console.error('--scenarios must be a JSON array of ScenarioInput objects.');
|
|
1016
|
+
process.exit(1);
|
|
1017
|
+
}
|
|
1018
|
+
for (const item of raw) {
|
|
1019
|
+
if (!item.id || !item.name || !Array.isArray(item.scenarios) || !item.routeFamily || !item.priority) {
|
|
1020
|
+
// eslint-disable-next-line no-console
|
|
1021
|
+
console.error(`Invalid scenario: each must have id, name, scenarios[], routeFamily, priority.`);
|
|
1022
|
+
process.exit(1);
|
|
1023
|
+
}
|
|
1024
|
+
}
|
|
1025
|
+
scenarios = raw;
|
|
1026
|
+
}
|
|
1027
|
+
else {
|
|
1028
|
+
const planReportPath = (0, path_1.join)(reportRoot, '.e2e-ai-agents', 'plan-report.json');
|
|
1029
|
+
if (!(0, fs_1.existsSync)(planReportPath)) {
|
|
1030
|
+
// eslint-disable-next-line no-console
|
|
1031
|
+
console.error('No plan report found. Run `plan` first or pass --scenarios.');
|
|
1032
|
+
process.exit(1);
|
|
1033
|
+
}
|
|
1034
|
+
const planReport = JSON.parse((0, fs_1.readFileSync)(planReportPath, 'utf-8'));
|
|
1035
|
+
scenarios = (planReport.gapDetails || []).map((gap) => ({
|
|
1036
|
+
id: gap.id,
|
|
1037
|
+
name: gap.id,
|
|
1038
|
+
scenarios: gap.missingScenarios || gap.reasons || ['Verify core user flow'],
|
|
1039
|
+
routeFamily: gap.id.split('.')[0] || gap.id,
|
|
1040
|
+
priority: 'P1',
|
|
1041
|
+
}));
|
|
1042
|
+
}
|
|
1043
|
+
if (scenarios.length === 0) {
|
|
1044
|
+
// eslint-disable-next-line no-console
|
|
1045
|
+
console.log('No scenarios to generate tests for.');
|
|
1046
|
+
return;
|
|
1047
|
+
}
|
|
1048
|
+
let apiSurface;
|
|
1049
|
+
try {
|
|
1050
|
+
apiSurface = (0, api_surface_js_1.loadOrBuildApiSurface)(reportRoot, config.apiSurface);
|
|
1051
|
+
}
|
|
1052
|
+
catch {
|
|
1053
|
+
// eslint-disable-next-line no-console
|
|
1054
|
+
console.warn('Could not load API surface catalog. Generation will use generic selectors.');
|
|
1055
|
+
}
|
|
1056
|
+
const provider = await provider_factory_js_1.LLMProviderFactory.createFromEnv();
|
|
1057
|
+
// eslint-disable-next-line no-console
|
|
1058
|
+
console.log(`Generating tests for ${scenarios.length} scenario(s)...`);
|
|
1059
|
+
const summary = await (0, runner_js_1.runAgenticGeneration)({
|
|
1060
|
+
scenarios,
|
|
1061
|
+
config: {
|
|
1062
|
+
maxAttempts: args.maxAttempts || 3,
|
|
1063
|
+
project: args.pipelineProject || 'chrome',
|
|
1064
|
+
baseUrl: args.pipelineBaseUrl,
|
|
1065
|
+
testTimeoutMs: 120000,
|
|
1066
|
+
testsRoot: reportRoot,
|
|
1067
|
+
dryRun: args.dryRun,
|
|
1068
|
+
},
|
|
1069
|
+
provider,
|
|
1070
|
+
apiSurface,
|
|
1071
|
+
});
|
|
1072
|
+
// eslint-disable-next-line no-console
|
|
1073
|
+
console.log(`\nAgentic Generation Summary:`);
|
|
1074
|
+
// eslint-disable-next-line no-console
|
|
1075
|
+
console.log(` Generated: ${summary.totalGenerated}`);
|
|
1076
|
+
// eslint-disable-next-line no-console
|
|
1077
|
+
console.log(` Passed: ${summary.totalPassed}`);
|
|
1078
|
+
// eslint-disable-next-line no-console
|
|
1079
|
+
console.log(` Failed: ${summary.totalFailed}`);
|
|
1080
|
+
// eslint-disable-next-line no-console
|
|
1081
|
+
console.log(` Attempts: ${summary.totalAttempts}`);
|
|
1082
|
+
// eslint-disable-next-line no-console
|
|
1083
|
+
console.log(` Duration: ${(summary.durationMs / 1000).toFixed(1)}s`);
|
|
1084
|
+
for (const result of summary.results) {
|
|
1085
|
+
const icon = result.status === 'passed' ? 'PASS' : result.status === 'skipped' ? 'SKIP' : 'FAIL';
|
|
1086
|
+
// eslint-disable-next-line no-console
|
|
1087
|
+
console.log(` [${icon}] ${result.scenarioSource} (${result.attempts} attempts)`);
|
|
1088
|
+
if (result.status === 'passed' || result.status === 'skipped') {
|
|
1089
|
+
// eslint-disable-next-line no-console
|
|
1090
|
+
console.log(` ${result.specPath}`);
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
if (summary.warnings.length > 0) {
|
|
1094
|
+
// eslint-disable-next-line no-console
|
|
1095
|
+
console.log(`\nWarnings:`);
|
|
1096
|
+
for (const w of summary.warnings) {
|
|
1097
|
+
// eslint-disable-next-line no-console
|
|
1098
|
+
console.warn(` - ${w}`);
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
const summaryDir = (0, path_1.join)(reportRoot, '.e2e-ai-agents');
|
|
1102
|
+
if (!(0, fs_1.existsSync)(summaryDir)) {
|
|
1103
|
+
(0, fs_1.mkdirSync)(summaryDir, { recursive: true });
|
|
1104
|
+
}
|
|
1105
|
+
const summaryPath = (0, path_1.join)(summaryDir, 'agentic-summary.json');
|
|
1106
|
+
(0, fs_1.writeFileSync)(summaryPath, JSON.stringify(summary, null, 2), 'utf-8');
|
|
1107
|
+
// eslint-disable-next-line no-console
|
|
1108
|
+
console.log(`\nReport: ${summaryPath}`);
|
|
1109
|
+
if (summary.totalFailed > 0) {
|
|
1110
|
+
process.exit(1);
|
|
1111
|
+
}
|
|
1112
|
+
return;
|
|
1113
|
+
}
|
|
984
1114
|
// eslint-disable-next-line no-console
|
|
985
1115
|
console.error(`Unknown command: ${args.command}`);
|
|
986
1116
|
printUsage();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"plan_builder.d.ts","sourceRoot":"","sources":["../../src/engine/plan_builder.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,oBAAoB,CAAC;AACrD,OAAO,KAAK,EAAC,YAAY,EAAkB,MAAM,oBAAoB,CAAC;AAEtE,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,oBAAoB,CAAC;AAG3D,OAAO,KAAK,EACR,UAAU,EACV,SAAS,EACT,kBAAkB,EAIrB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EAAC,UAAU,EAAE,SAAS,EAAE,kBAAkB,EAAC,CAAC;AAqOxD,wBAAgB,mBAAmB,CAC/B,MAAM,EAAE,YAAY,EACpB,cAAc,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,EACtC,YAAY,CAAC,EAAE,kBAAkB,GAClC,UAAU,CA0IZ;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,GAAG,MAAM,CAMzE;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,
|
|
1
|
+
{"version":3,"file":"plan_builder.d.ts","sourceRoot":"","sources":["../../src/engine/plan_builder.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,oBAAoB,CAAC;AACrD,OAAO,KAAK,EAAC,YAAY,EAAkB,MAAM,oBAAoB,CAAC;AAEtE,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,oBAAoB,CAAC;AAG3D,OAAO,KAAK,EACR,UAAU,EACV,SAAS,EACT,kBAAkB,EAIrB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EAAC,UAAU,EAAE,SAAS,EAAE,kBAAkB,EAAC,CAAC;AAqOxD,wBAAgB,mBAAmB,CAC/B,MAAM,EAAE,YAAY,EACpB,cAAc,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,EACtC,YAAY,CAAC,EAAE,kBAAkB,GAClC,UAAU,CA0IZ;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,GAAG,MAAM,CAMzE;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAwGhE;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,SAAiC,GAAG,MAAM,CAMvH"}
|
|
@@ -392,13 +392,14 @@ function renderCiSummaryMarkdown(plan) {
|
|
|
392
392
|
lines.push(`### 💡 New behavior detected in ${flowsWithAdvisory.length} covered feature${flowsWithAdvisory.length !== 1 ? 's' : ''} — consider adding tests`);
|
|
393
393
|
lines.push('');
|
|
394
394
|
for (const flow of flowsWithAdvisory) {
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
const
|
|
401
|
-
|
|
395
|
+
const specParts = [];
|
|
396
|
+
for (const s of flow.coveredBy) {
|
|
397
|
+
// Strip "N Playwright spec(s)" → "N PW" and "N Cypress spec(s)" → "N Cy"
|
|
398
|
+
specParts.push(s.replace(/ Playwright spec\(s\)/, ' PW').replace(/ Cypress spec\(s\)/, ' Cy'));
|
|
399
|
+
}
|
|
400
|
+
const specSummary = specParts.length > 0 ? ` — ${specParts.join(' · ')}` : '';
|
|
401
|
+
const scenarioCount = flow.advisoryScenarios.length;
|
|
402
|
+
lines.push(`<details><summary>💡 <strong>${flow.name}</strong> · ${flow.priority}${specSummary} · ${scenarioCount} scenario${scenarioCount !== 1 ? 's' : ''}</summary>`);
|
|
402
403
|
lines.push('');
|
|
403
404
|
for (const s of flow.advisoryScenarios) {
|
|
404
405
|
lines.push(`- [ ] ${s}`);
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
export function buildFixPrompt(ctx) {
|
|
4
|
+
const isCompileError = ctx.failures.some((f) => f.testTitle === '(compile)');
|
|
5
|
+
const failuresBlock = ctx.failures.map((f) => {
|
|
6
|
+
const lines = [` Test: ${f.testTitle}`, ` Error: ${f.error}`];
|
|
7
|
+
if (f.stack)
|
|
8
|
+
lines.push(` Stack: ${f.stack}`);
|
|
9
|
+
if (f.line)
|
|
10
|
+
lines.push(` Line: ${f.line}`);
|
|
11
|
+
if (f.expected)
|
|
12
|
+
lines.push(` Expected: ${f.expected}`);
|
|
13
|
+
if (f.actual)
|
|
14
|
+
lines.push(` Actual: ${f.actual}`);
|
|
15
|
+
return lines.join('\n');
|
|
16
|
+
}).join('\n\n');
|
|
17
|
+
const errorType = isCompileError ? 'COMPILE ERROR' : 'TEST FAILURE';
|
|
18
|
+
const apiBlock = ctx.apiSurfaceHint
|
|
19
|
+
? `\nAVAILABLE PAGE OBJECT API:\n${ctx.apiSurfaceHint}\n`
|
|
20
|
+
: '';
|
|
21
|
+
return [
|
|
22
|
+
`Fix this Playwright E2E test. This is attempt ${ctx.attempt} of ${ctx.maxAttempts}.`,
|
|
23
|
+
'',
|
|
24
|
+
`## ${errorType}`,
|
|
25
|
+
'',
|
|
26
|
+
failuresBlock,
|
|
27
|
+
'',
|
|
28
|
+
'## CURRENT SPEC CODE',
|
|
29
|
+
'',
|
|
30
|
+
'```typescript',
|
|
31
|
+
ctx.specCode,
|
|
32
|
+
'```',
|
|
33
|
+
apiBlock,
|
|
34
|
+
'## RULES',
|
|
35
|
+
'',
|
|
36
|
+
'1. Import ONLY from "@mattermost/playwright-lib" — no "@playwright/test" imports.',
|
|
37
|
+
'2. Every test must call `await pw.initSetup()` first.',
|
|
38
|
+
'3. Use `await pw.testBrowser.login(user)` to log in.',
|
|
39
|
+
'4. Use ONLY page object methods listed in the API above. Do NOT invent methods.',
|
|
40
|
+
'5. If a method is not available, use `page.getByRole()` or `page.getByTestId()`.',
|
|
41
|
+
'6. For flaky/timing issues: add `await expect(locator).toBeVisible()` waits before interactions.',
|
|
42
|
+
'7. Keep the same test scenarios — fix the implementation, not the intent.',
|
|
43
|
+
'8. Return the COMPLETE fixed spec file — not a diff or partial code.',
|
|
44
|
+
'',
|
|
45
|
+
isCompileError
|
|
46
|
+
? 'The file does not compile. Fix syntax errors, missing imports, or invalid method calls.'
|
|
47
|
+
: 'The test compiles but fails at runtime. Fix selectors, waits, or assertion logic.',
|
|
48
|
+
'',
|
|
49
|
+
'Return ONLY the complete TypeScript code. No explanations, no markdown fences (except wrapping the code).',
|
|
50
|
+
].join('\n');
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Extract fixed spec code from an LLM response.
|
|
54
|
+
* Returns null if the response doesn't contain valid test code.
|
|
55
|
+
*/
|
|
56
|
+
export function applyFix(llmResponse) {
|
|
57
|
+
let code = llmResponse.trim();
|
|
58
|
+
if (!code)
|
|
59
|
+
return null;
|
|
60
|
+
// Strip markdown fences
|
|
61
|
+
const fenced = code.match(/```(?:typescript|ts)?\s*([\s\S]*?)```/i);
|
|
62
|
+
if (fenced) {
|
|
63
|
+
code = fenced[1].trim();
|
|
64
|
+
}
|
|
65
|
+
// Must contain test( to be valid
|
|
66
|
+
if (!code.includes('test('))
|
|
67
|
+
return null;
|
|
68
|
+
// Ensure it has the right import
|
|
69
|
+
if (!code.includes('@mattermost/playwright-lib')) {
|
|
70
|
+
code = `import {expect, test} from '@mattermost/playwright-lib';\n\n${code}`;
|
|
71
|
+
}
|
|
72
|
+
return code;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Run one fix attempt: call LLM with failure context, return fixed code.
|
|
76
|
+
*/
|
|
77
|
+
export async function generateFix(provider, ctx) {
|
|
78
|
+
const prompt = buildFixPrompt(ctx);
|
|
79
|
+
const response = await provider.generateText(prompt, {
|
|
80
|
+
maxTokens: 8000,
|
|
81
|
+
temperature: 0.1,
|
|
82
|
+
timeout: 60000,
|
|
83
|
+
systemPrompt: 'You are an expert Playwright test fixer for Mattermost. Return only TypeScript code.',
|
|
84
|
+
});
|
|
85
|
+
const code = applyFix(response.text);
|
|
86
|
+
return {
|
|
87
|
+
code,
|
|
88
|
+
tokensUsed: { input: response.usage?.inputTokens || 0, output: response.usage?.outputTokens || 0 },
|
|
89
|
+
};
|
|
90
|
+
}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
import { spawnSync } from 'child_process';
|
|
4
|
+
import { existsSync, readFileSync, mkdirSync, rmSync } from 'fs';
|
|
5
|
+
import { join, resolve } from 'path';
|
|
6
|
+
const MAX_STDOUT_CHARS = 8000;
|
|
7
|
+
const MAX_ERROR_CHARS = 2000;
|
|
8
|
+
const MAX_STACK_CHARS = 1000;
|
|
9
|
+
function extractSpecs(suites) {
|
|
10
|
+
const specs = [];
|
|
11
|
+
for (const suite of suites) {
|
|
12
|
+
specs.push(...suite.specs);
|
|
13
|
+
if (suite.suites) {
|
|
14
|
+
specs.push(...extractSpecs(suite.suites));
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
return specs;
|
|
18
|
+
}
|
|
19
|
+
export function parsePlaywrightJsonReport(report, specPath) {
|
|
20
|
+
const failures = [];
|
|
21
|
+
const allSpecs = extractSpecs(report.suites);
|
|
22
|
+
let passed = 0;
|
|
23
|
+
let failed = 0;
|
|
24
|
+
for (const spec of allSpecs) {
|
|
25
|
+
if (spec.ok) {
|
|
26
|
+
passed++;
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
failed++;
|
|
30
|
+
const lastResult = spec.tests[0]?.results?.at(-1);
|
|
31
|
+
failures.push({
|
|
32
|
+
testTitle: spec.title,
|
|
33
|
+
specPath,
|
|
34
|
+
error: (lastResult?.error?.message || 'Unknown error').slice(0, MAX_ERROR_CHARS),
|
|
35
|
+
stack: (lastResult?.error?.stack || '').slice(0, MAX_STACK_CHARS),
|
|
36
|
+
line: extractLineNumber(lastResult?.error?.stack),
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return {
|
|
41
|
+
specPath,
|
|
42
|
+
passed,
|
|
43
|
+
failed,
|
|
44
|
+
flaky: report.stats.flaky || 0,
|
|
45
|
+
skipped: report.stats.skipped || 0,
|
|
46
|
+
failures,
|
|
47
|
+
stdout: '',
|
|
48
|
+
durationMs: report.stats.duration || 0,
|
|
49
|
+
compiled: true,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
function extractLineNumber(stack) {
|
|
53
|
+
if (!stack)
|
|
54
|
+
return undefined;
|
|
55
|
+
const match = stack.match(/:(\d+):\d+\)?$/m);
|
|
56
|
+
return match ? parseInt(match[1], 10) : undefined;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Run a single Playwright spec file and return structured results.
|
|
60
|
+
* Uses a temp JSON reporter to get machine-readable output.
|
|
61
|
+
*/
|
|
62
|
+
export function runPlaywrightSpec(specPath, testsRoot, options) {
|
|
63
|
+
// SECURITY: Validate spec path is within testsRoot and has valid extension
|
|
64
|
+
const resolvedSpec = resolve(specPath);
|
|
65
|
+
const resolvedRoot = resolve(testsRoot);
|
|
66
|
+
if (!resolvedSpec.startsWith(resolvedRoot + '/')) {
|
|
67
|
+
throw new Error(`Security: spec path ${specPath} is outside testsRoot`);
|
|
68
|
+
}
|
|
69
|
+
if (!resolvedSpec.endsWith('.spec.ts') && !resolvedSpec.endsWith('.test.ts')) {
|
|
70
|
+
throw new Error(`Security: spec path must end in .spec.ts or .test.ts`);
|
|
71
|
+
}
|
|
72
|
+
const reportDir = join(testsRoot, '.e2e-ai-agents', 'agentic-reports');
|
|
73
|
+
if (!existsSync(reportDir)) {
|
|
74
|
+
mkdirSync(reportDir, { recursive: true });
|
|
75
|
+
}
|
|
76
|
+
const reportPath = join(reportDir, `report-${Date.now()}.json`);
|
|
77
|
+
const args = [
|
|
78
|
+
'playwright', 'test',
|
|
79
|
+
specPath,
|
|
80
|
+
'--reporter', 'json',
|
|
81
|
+
'--project', options.project || 'chrome',
|
|
82
|
+
];
|
|
83
|
+
if (options.baseUrl) {
|
|
84
|
+
args.push('--config', 'playwright.config.ts');
|
|
85
|
+
}
|
|
86
|
+
const startTime = Date.now();
|
|
87
|
+
const result = spawnSync('npx', args, {
|
|
88
|
+
cwd: testsRoot,
|
|
89
|
+
encoding: 'utf-8',
|
|
90
|
+
timeout: options.timeoutMs || 120000,
|
|
91
|
+
maxBuffer: 2 * 1024 * 1024,
|
|
92
|
+
env: {
|
|
93
|
+
...process.env,
|
|
94
|
+
PLAYWRIGHT_JSON_OUTPUT_NAME: reportPath,
|
|
95
|
+
},
|
|
96
|
+
});
|
|
97
|
+
const durationMs = Date.now() - startTime;
|
|
98
|
+
const stdout = (result.stdout || '').slice(0, MAX_STDOUT_CHARS);
|
|
99
|
+
const stderr = (result.stderr || '').slice(0, MAX_STDOUT_CHARS);
|
|
100
|
+
// Check for compile errors
|
|
101
|
+
if (stderr.includes('SyntaxError') || stderr.includes('Cannot find module') || stderr.includes('TypeError')) {
|
|
102
|
+
return {
|
|
103
|
+
specPath,
|
|
104
|
+
passed: 0,
|
|
105
|
+
failed: 1,
|
|
106
|
+
flaky: 0,
|
|
107
|
+
skipped: 0,
|
|
108
|
+
failures: [{
|
|
109
|
+
testTitle: '(compile)',
|
|
110
|
+
specPath,
|
|
111
|
+
error: stderr.slice(0, MAX_ERROR_CHARS),
|
|
112
|
+
stack: '',
|
|
113
|
+
}],
|
|
114
|
+
stdout: `${stdout}\n${stderr}`,
|
|
115
|
+
durationMs,
|
|
116
|
+
compiled: false,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
// Try to parse JSON report from stdout (Playwright JSON reporter writes to stdout)
|
|
120
|
+
try {
|
|
121
|
+
const jsonReport = JSON.parse(stdout);
|
|
122
|
+
const parsed = parsePlaywrightJsonReport(jsonReport, specPath);
|
|
123
|
+
parsed.durationMs = durationMs;
|
|
124
|
+
parsed.stdout = stdout;
|
|
125
|
+
return parsed;
|
|
126
|
+
}
|
|
127
|
+
catch {
|
|
128
|
+
// Fallback: try the file-based report
|
|
129
|
+
}
|
|
130
|
+
// Try file-based report
|
|
131
|
+
if (existsSync(reportPath)) {
|
|
132
|
+
try {
|
|
133
|
+
const jsonReport = JSON.parse(readFileSync(reportPath, 'utf-8'));
|
|
134
|
+
const parsed = parsePlaywrightJsonReport(jsonReport, specPath);
|
|
135
|
+
parsed.durationMs = durationMs;
|
|
136
|
+
parsed.stdout = stdout;
|
|
137
|
+
try {
|
|
138
|
+
rmSync(reportPath);
|
|
139
|
+
}
|
|
140
|
+
catch { /* ignore */ }
|
|
141
|
+
return parsed;
|
|
142
|
+
}
|
|
143
|
+
catch {
|
|
144
|
+
// Fallback to exit code
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
// Last resort: use exit code
|
|
148
|
+
return {
|
|
149
|
+
specPath,
|
|
150
|
+
passed: result.status === 0 ? 1 : 0,
|
|
151
|
+
failed: result.status === 0 ? 0 : 1,
|
|
152
|
+
flaky: 0,
|
|
153
|
+
skipped: 0,
|
|
154
|
+
failures: result.status !== 0
|
|
155
|
+
? [{ testTitle: '(unknown)', specPath, error: stderr.slice(0, MAX_ERROR_CHARS), stack: '' }]
|
|
156
|
+
: [],
|
|
157
|
+
stdout,
|
|
158
|
+
durationMs,
|
|
159
|
+
compiled: !stderr.includes('Error'),
|
|
160
|
+
};
|
|
161
|
+
}
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
4
|
+
import { dirname, join, resolve } from 'path';
|
|
5
|
+
import { runPlaywrightSpec } from './playwright_runner.js';
|
|
6
|
+
import { generateFix } from './fix_loop.js';
|
|
7
|
+
import { parseGenerationResponse } from '../prompts/generation.js';
|
|
8
|
+
import { formatApiSurfaceForPrompt } from '../knowledge/api_surface.js';
|
|
9
|
+
function buildGeneratePrompt(scenario, apiSurfaceHint) {
|
|
10
|
+
const scenariosBlock = scenario.scenarios
|
|
11
|
+
.map((s, i) => ` ${i + 1}. ${s}`)
|
|
12
|
+
.join('\n');
|
|
13
|
+
return [
|
|
14
|
+
'Generate a Mattermost Playwright E2E test file.',
|
|
15
|
+
'',
|
|
16
|
+
`FLOW: ${scenario.name}`,
|
|
17
|
+
`Route Family: ${scenario.routeFamily}`,
|
|
18
|
+
`Priority: ${scenario.priority}`,
|
|
19
|
+
scenario.evidence ? `Evidence: ${scenario.evidence}` : '',
|
|
20
|
+
'',
|
|
21
|
+
'SCENARIOS TO IMPLEMENT:',
|
|
22
|
+
scenariosBlock,
|
|
23
|
+
'',
|
|
24
|
+
'AVAILABLE PAGE OBJECTS AND METHODS:',
|
|
25
|
+
apiSurfaceHint || 'Use page.getByRole() or page.getByTestId() for selectors.',
|
|
26
|
+
'',
|
|
27
|
+
'MANDATORY RULES:',
|
|
28
|
+
'1. Import ONLY from "@mattermost/playwright-lib" — no other test framework imports.',
|
|
29
|
+
'2. Every test must call `await pw.initSetup()` first.',
|
|
30
|
+
'3. Use `await pw.testBrowser.login(user)` to log in — never hardcode credentials.',
|
|
31
|
+
'4. Use ONLY page object methods listed above. Do NOT invent methods.',
|
|
32
|
+
'5. If a method is not available, use `page.getByRole()` or `page.getByTestId()`.',
|
|
33
|
+
`6. Tag every test: {tag: '@${scenario.routeFamily}'}`,
|
|
34
|
+
'7. Write one test per scenario with a descriptive name.',
|
|
35
|
+
'8. Use `expect` from "@mattermost/playwright-lib".',
|
|
36
|
+
'9. Include the copyright header.',
|
|
37
|
+
'10. NEVER fabricate test IDs (MM-TXXXX). Use descriptive names only.',
|
|
38
|
+
'',
|
|
39
|
+
'EXAMPLE STRUCTURE:',
|
|
40
|
+
'```typescript',
|
|
41
|
+
'// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.',
|
|
42
|
+
'// See LICENSE.txt for license information.',
|
|
43
|
+
'',
|
|
44
|
+
"import {expect, test} from '@mattermost/playwright-lib';",
|
|
45
|
+
'',
|
|
46
|
+
'test(',
|
|
47
|
+
" 'user can post a message in channel',",
|
|
48
|
+
` {tag: '@${scenario.routeFamily}'},`,
|
|
49
|
+
' async ({pw}) => {',
|
|
50
|
+
' const {user} = await pw.initSetup();',
|
|
51
|
+
' const {channelsPage} = await pw.testBrowser.login(user);',
|
|
52
|
+
' await channelsPage.goto();',
|
|
53
|
+
' await channelsPage.toBeVisible();',
|
|
54
|
+
' // test steps...',
|
|
55
|
+
' },',
|
|
56
|
+
');',
|
|
57
|
+
'```',
|
|
58
|
+
'',
|
|
59
|
+
'Return ONLY the TypeScript code. No explanations.',
|
|
60
|
+
].filter(Boolean).join('\n');
|
|
61
|
+
}
|
|
62
|
+
function resolveSpecPath(scenario, testsRoot) {
|
|
63
|
+
let specPath;
|
|
64
|
+
if (scenario.targetSpec) {
|
|
65
|
+
specPath = join(testsRoot, scenario.targetSpec);
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
const safeName = scenario.id.replace(/[^a-zA-Z0-9_-]/g, '_').toLowerCase();
|
|
69
|
+
const outputDir = join(testsRoot, 'specs', 'functional', 'ai-assisted');
|
|
70
|
+
specPath = join(outputDir, `${safeName}.spec.ts`);
|
|
71
|
+
}
|
|
72
|
+
// SECURITY: Prevent path traversal
|
|
73
|
+
const resolved = resolve(specPath);
|
|
74
|
+
const resolvedRoot = resolve(testsRoot);
|
|
75
|
+
if (!resolved.startsWith(resolvedRoot + '/') && resolved !== resolvedRoot) {
|
|
76
|
+
throw new Error(`Path traversal blocked: ${specPath} resolves outside testsRoot`);
|
|
77
|
+
}
|
|
78
|
+
if (!resolved.endsWith('.spec.ts') && !resolved.endsWith('.test.ts')) {
|
|
79
|
+
throw new Error(`Invalid spec path: must end in .spec.ts or .test.ts`);
|
|
80
|
+
}
|
|
81
|
+
return specPath;
|
|
82
|
+
}
|
|
83
|
+
async function generateInitialSpec(provider, scenario, specPath, apiSurfaceHint) {
|
|
84
|
+
const prompt = buildGeneratePrompt(scenario, apiSurfaceHint);
|
|
85
|
+
const response = await provider.generateText(prompt, {
|
|
86
|
+
maxTokens: 8000,
|
|
87
|
+
temperature: 0.1,
|
|
88
|
+
timeout: 60000,
|
|
89
|
+
systemPrompt: 'You are an expert Playwright test writer for Mattermost. Return only TypeScript code.',
|
|
90
|
+
});
|
|
91
|
+
// Reuse existing parsing logic from prompts/generation.ts
|
|
92
|
+
const parsed = parseGenerationResponse(response.text, specPath, 'create_spec', scenario.id);
|
|
93
|
+
return parsed?.code ?? null;
|
|
94
|
+
}
|
|
95
|
+
async function runSingleScenario(scenario, options) {
|
|
96
|
+
const { config, provider } = options;
|
|
97
|
+
const warnings = [];
|
|
98
|
+
const specPath = resolveSpecPath(scenario, config.testsRoot);
|
|
99
|
+
// Build API surface hint
|
|
100
|
+
let apiHint = options.apiSurfaceHint || '';
|
|
101
|
+
if (!apiHint && options.apiSurface) {
|
|
102
|
+
const allClassNames = options.apiSurface.pageObjects.map((po) => po.className);
|
|
103
|
+
apiHint = formatApiSurfaceForPrompt(options.apiSurface, allClassNames);
|
|
104
|
+
}
|
|
105
|
+
// Step 1: Generate initial spec
|
|
106
|
+
let specCode;
|
|
107
|
+
try {
|
|
108
|
+
specCode = await generateInitialSpec(provider, scenario, specPath, apiHint);
|
|
109
|
+
}
|
|
110
|
+
catch (error) {
|
|
111
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
112
|
+
warnings.push(`Generation failed for ${scenario.id}: ${msg}`);
|
|
113
|
+
return { specPath, scenarioSource: scenario.id, status: 'failed', attempts: 0, warnings };
|
|
114
|
+
}
|
|
115
|
+
if (!specCode) {
|
|
116
|
+
warnings.push(`LLM returned invalid code for ${scenario.id}`);
|
|
117
|
+
return { specPath, scenarioSource: scenario.id, status: 'failed', attempts: 0, warnings };
|
|
118
|
+
}
|
|
119
|
+
// Write the spec file
|
|
120
|
+
const dir = dirname(specPath);
|
|
121
|
+
if (!existsSync(dir)) {
|
|
122
|
+
mkdirSync(dir, { recursive: true });
|
|
123
|
+
}
|
|
124
|
+
writeFileSync(specPath, specCode, 'utf-8');
|
|
125
|
+
// Dry run: skip execution
|
|
126
|
+
if (config.dryRun) {
|
|
127
|
+
return { specPath, scenarioSource: scenario.id, status: 'skipped', attempts: 0, warnings };
|
|
128
|
+
}
|
|
129
|
+
// Step 2: Run -> Fix loop
|
|
130
|
+
let lastRun;
|
|
131
|
+
for (let attempt = 1; attempt <= config.maxAttempts; attempt++) {
|
|
132
|
+
lastRun = runPlaywrightSpec(specPath, config.testsRoot, {
|
|
133
|
+
project: config.project,
|
|
134
|
+
baseUrl: config.baseUrl,
|
|
135
|
+
timeoutMs: config.testTimeoutMs,
|
|
136
|
+
});
|
|
137
|
+
// All passed!
|
|
138
|
+
if (lastRun.failed === 0 && lastRun.compiled) {
|
|
139
|
+
return {
|
|
140
|
+
specPath,
|
|
141
|
+
scenarioSource: scenario.id,
|
|
142
|
+
status: 'passed',
|
|
143
|
+
attempts: attempt,
|
|
144
|
+
finalRun: lastRun,
|
|
145
|
+
warnings,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
// If this is the last attempt, don't try to fix
|
|
149
|
+
if (attempt >= config.maxAttempts) {
|
|
150
|
+
break;
|
|
151
|
+
}
|
|
152
|
+
// Step 3: Fix
|
|
153
|
+
const currentCode = readFileSync(specPath, 'utf-8');
|
|
154
|
+
try {
|
|
155
|
+
const fixResult = await generateFix(provider, {
|
|
156
|
+
specCode: currentCode,
|
|
157
|
+
failures: lastRun.failures,
|
|
158
|
+
attempt,
|
|
159
|
+
maxAttempts: config.maxAttempts,
|
|
160
|
+
apiSurfaceHint: apiHint,
|
|
161
|
+
});
|
|
162
|
+
if (fixResult.code) {
|
|
163
|
+
writeFileSync(specPath, fixResult.code, 'utf-8');
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
warnings.push(`Fix attempt ${attempt} returned invalid code for ${scenario.id}`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
catch (error) {
|
|
170
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
171
|
+
warnings.push(`Fix attempt ${attempt} failed for ${scenario.id}: ${msg}`);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return {
|
|
175
|
+
specPath,
|
|
176
|
+
scenarioSource: scenario.id,
|
|
177
|
+
status: lastRun?.compiled === false ? 'compile-error' : 'max-attempts',
|
|
178
|
+
attempts: config.maxAttempts,
|
|
179
|
+
finalRun: lastRun,
|
|
180
|
+
warnings,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
export async function runAgenticGeneration(options) {
|
|
184
|
+
const startTime = Date.now();
|
|
185
|
+
const results = [];
|
|
186
|
+
const warnings = [];
|
|
187
|
+
for (const scenario of options.scenarios) {
|
|
188
|
+
const result = await runSingleScenario(scenario, options);
|
|
189
|
+
results.push(result);
|
|
190
|
+
warnings.push(...result.warnings);
|
|
191
|
+
}
|
|
192
|
+
const totalPassed = results.filter((r) => r.status === 'passed').length;
|
|
193
|
+
const totalFailed = results.filter((r) => r.status !== 'passed' && r.status !== 'skipped').length;
|
|
194
|
+
const totalAttempts = results.reduce((sum, r) => sum + r.attempts, 0);
|
|
195
|
+
return {
|
|
196
|
+
results,
|
|
197
|
+
totalGenerated: results.length,
|
|
198
|
+
totalPassed,
|
|
199
|
+
totalFailed,
|
|
200
|
+
totalAttempts,
|
|
201
|
+
durationMs: Date.now() - startTime,
|
|
202
|
+
warnings,
|
|
203
|
+
};
|
|
204
|
+
}
|