@yasserkhanorg/e2e-agents 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agentic/fix_loop.d.ts +26 -0
- package/dist/agentic/fix_loop.d.ts.map +1 -0
- package/dist/agentic/fix_loop.js +95 -0
- package/dist/agentic/playwright_runner.d.ts +43 -0
- package/dist/agentic/playwright_runner.d.ts.map +1 -0
- package/dist/agentic/playwright_runner.js +165 -0
- package/dist/agentic/runner.d.ts +25 -0
- package/dist/agentic/runner.d.ts.map +1 -0
- package/dist/agentic/runner.js +207 -0
- package/dist/agentic/types.d.ts +62 -0
- package/dist/agentic/types.d.ts.map +1 -0
- package/dist/agentic/types.js +4 -0
- package/dist/cli.js +130 -0
- package/dist/engine/plan_builder.d.ts.map +1 -1
- package/dist/engine/plan_builder.js +8 -7
- package/dist/esm/agentic/fix_loop.js +90 -0
- package/dist/esm/agentic/playwright_runner.js +161 -0
- package/dist/esm/agentic/runner.js +204 -0
- package/dist/esm/agentic/types.js +3 -0
- package/dist/esm/cli.js +131 -1
- package/dist/esm/engine/plan_builder.js +8 -7
- package/dist/esm/index.js +2 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -1
- package/package.json +1 -1
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { LLMProvider } from '../provider_interface.js';
|
|
2
|
+
import type { TestFailure } from './types.js';
|
|
3
|
+
export interface FixPromptContext {
|
|
4
|
+
specCode: string;
|
|
5
|
+
failures: TestFailure[];
|
|
6
|
+
attempt: number;
|
|
7
|
+
maxAttempts: number;
|
|
8
|
+
apiSurfaceHint?: string;
|
|
9
|
+
}
|
|
10
|
+
export declare function buildFixPrompt(ctx: FixPromptContext): string;
|
|
11
|
+
/**
|
|
12
|
+
* Extract fixed spec code from an LLM response.
|
|
13
|
+
* Returns null if the response doesn't contain valid test code.
|
|
14
|
+
*/
|
|
15
|
+
export declare function applyFix(llmResponse: string): string | null;
|
|
16
|
+
/**
|
|
17
|
+
* Run one fix attempt: call LLM with failure context, return fixed code.
|
|
18
|
+
*/
|
|
19
|
+
export declare function generateFix(provider: LLMProvider, ctx: FixPromptContext): Promise<{
|
|
20
|
+
code: string | null;
|
|
21
|
+
tokensUsed: {
|
|
22
|
+
input: number;
|
|
23
|
+
output: number;
|
|
24
|
+
};
|
|
25
|
+
}>;
|
|
26
|
+
//# sourceMappingURL=fix_loop.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fix_loop.d.ts","sourceRoot":"","sources":["../../src/agentic/fix_loop.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAC,WAAW,EAAC,MAAM,0BAA0B,CAAC;AAC1D,OAAO,KAAK,EAAC,WAAW,EAAC,MAAM,YAAY,CAAC;AAE5C,MAAM,WAAW,gBAAgB;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,wBAAgB,cAAc,CAAC,GAAG,EAAE,gBAAgB,GAAG,MAAM,CA+C5D;AAED;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAmB3D;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC7B,QAAQ,EAAE,WAAW,EACrB,GAAG,EAAE,gBAAgB,GACtB,OAAO,CAAC;IAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,UAAU,EAAE;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAC,CAAA;CAAC,CAAC,CAe7E"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
3
|
+
// See LICENSE.txt for license information.
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.buildFixPrompt = buildFixPrompt;
|
|
6
|
+
exports.applyFix = applyFix;
|
|
7
|
+
exports.generateFix = generateFix;
|
|
8
|
+
function buildFixPrompt(ctx) {
|
|
9
|
+
const isCompileError = ctx.failures.some((f) => f.testTitle === '(compile)');
|
|
10
|
+
const failuresBlock = ctx.failures.map((f) => {
|
|
11
|
+
const lines = [` Test: ${f.testTitle}`, ` Error: ${f.error}`];
|
|
12
|
+
if (f.stack)
|
|
13
|
+
lines.push(` Stack: ${f.stack}`);
|
|
14
|
+
if (f.line)
|
|
15
|
+
lines.push(` Line: ${f.line}`);
|
|
16
|
+
if (f.expected)
|
|
17
|
+
lines.push(` Expected: ${f.expected}`);
|
|
18
|
+
if (f.actual)
|
|
19
|
+
lines.push(` Actual: ${f.actual}`);
|
|
20
|
+
return lines.join('\n');
|
|
21
|
+
}).join('\n\n');
|
|
22
|
+
const errorType = isCompileError ? 'COMPILE ERROR' : 'TEST FAILURE';
|
|
23
|
+
const apiBlock = ctx.apiSurfaceHint
|
|
24
|
+
? `\nAVAILABLE PAGE OBJECT API:\n${ctx.apiSurfaceHint}\n`
|
|
25
|
+
: '';
|
|
26
|
+
return [
|
|
27
|
+
`Fix this Playwright E2E test. This is attempt ${ctx.attempt} of ${ctx.maxAttempts}.`,
|
|
28
|
+
'',
|
|
29
|
+
`## ${errorType}`,
|
|
30
|
+
'',
|
|
31
|
+
failuresBlock,
|
|
32
|
+
'',
|
|
33
|
+
'## CURRENT SPEC CODE',
|
|
34
|
+
'',
|
|
35
|
+
'```typescript',
|
|
36
|
+
ctx.specCode,
|
|
37
|
+
'```',
|
|
38
|
+
apiBlock,
|
|
39
|
+
'## RULES',
|
|
40
|
+
'',
|
|
41
|
+
'1. Import ONLY from "@mattermost/playwright-lib" — no "@playwright/test" imports.',
|
|
42
|
+
'2. Every test must call `await pw.initSetup()` first.',
|
|
43
|
+
'3. Use `await pw.testBrowser.login(user)` to log in.',
|
|
44
|
+
'4. Use ONLY page object methods listed in the API above. Do NOT invent methods.',
|
|
45
|
+
'5. If a method is not available, use `page.getByRole()` or `page.getByTestId()`.',
|
|
46
|
+
'6. For flaky/timing issues: add `await expect(locator).toBeVisible()` waits before interactions.',
|
|
47
|
+
'7. Keep the same test scenarios — fix the implementation, not the intent.',
|
|
48
|
+
'8. Return the COMPLETE fixed spec file — not a diff or partial code.',
|
|
49
|
+
'',
|
|
50
|
+
isCompileError
|
|
51
|
+
? 'The file does not compile. Fix syntax errors, missing imports, or invalid method calls.'
|
|
52
|
+
: 'The test compiles but fails at runtime. Fix selectors, waits, or assertion logic.',
|
|
53
|
+
'',
|
|
54
|
+
'Return ONLY the complete TypeScript code. No explanations, no markdown fences (except wrapping the code).',
|
|
55
|
+
].join('\n');
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Extract fixed spec code from an LLM response.
|
|
59
|
+
* Returns null if the response doesn't contain valid test code.
|
|
60
|
+
*/
|
|
61
|
+
function applyFix(llmResponse) {
|
|
62
|
+
let code = llmResponse.trim();
|
|
63
|
+
if (!code)
|
|
64
|
+
return null;
|
|
65
|
+
// Strip markdown fences
|
|
66
|
+
const fenced = code.match(/```(?:typescript|ts)?\s*([\s\S]*?)```/i);
|
|
67
|
+
if (fenced) {
|
|
68
|
+
code = fenced[1].trim();
|
|
69
|
+
}
|
|
70
|
+
// Must contain test( to be valid
|
|
71
|
+
if (!code.includes('test('))
|
|
72
|
+
return null;
|
|
73
|
+
// Ensure it has the right import
|
|
74
|
+
if (!code.includes('@mattermost/playwright-lib')) {
|
|
75
|
+
code = `import {expect, test} from '@mattermost/playwright-lib';\n\n${code}`;
|
|
76
|
+
}
|
|
77
|
+
return code;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Run one fix attempt: call LLM with failure context, return fixed code.
|
|
81
|
+
*/
|
|
82
|
+
async function generateFix(provider, ctx) {
|
|
83
|
+
const prompt = buildFixPrompt(ctx);
|
|
84
|
+
const response = await provider.generateText(prompt, {
|
|
85
|
+
maxTokens: 8000,
|
|
86
|
+
temperature: 0.1,
|
|
87
|
+
timeout: 60000,
|
|
88
|
+
systemPrompt: 'You are an expert Playwright test fixer for Mattermost. Return only TypeScript code.',
|
|
89
|
+
});
|
|
90
|
+
const code = applyFix(response.text);
|
|
91
|
+
return {
|
|
92
|
+
code,
|
|
93
|
+
tokensUsed: { input: response.usage?.inputTokens || 0, output: response.usage?.outputTokens || 0 },
|
|
94
|
+
};
|
|
95
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { PlaywrightRunResult } from './types.js';
|
|
2
|
+
interface PlaywrightReportSpec {
|
|
3
|
+
title: string;
|
|
4
|
+
ok: boolean;
|
|
5
|
+
tests: Array<{
|
|
6
|
+
status: string;
|
|
7
|
+
results: Array<{
|
|
8
|
+
status: string;
|
|
9
|
+
duration: number;
|
|
10
|
+
error?: {
|
|
11
|
+
message: string;
|
|
12
|
+
stack?: string;
|
|
13
|
+
};
|
|
14
|
+
}>;
|
|
15
|
+
}>;
|
|
16
|
+
}
|
|
17
|
+
interface PlaywrightReportSuite {
|
|
18
|
+
title: string;
|
|
19
|
+
specs: PlaywrightReportSpec[];
|
|
20
|
+
suites?: PlaywrightReportSuite[];
|
|
21
|
+
}
|
|
22
|
+
interface PlaywrightReport {
|
|
23
|
+
suites: PlaywrightReportSuite[];
|
|
24
|
+
stats: {
|
|
25
|
+
expected: number;
|
|
26
|
+
unexpected: number;
|
|
27
|
+
flaky: number;
|
|
28
|
+
skipped: number;
|
|
29
|
+
duration: number;
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
export declare function parsePlaywrightJsonReport(report: PlaywrightReport, specPath: string): PlaywrightRunResult;
|
|
33
|
+
/**
|
|
34
|
+
* Run a single Playwright spec file and return structured results.
|
|
35
|
+
* Uses a temp JSON reporter to get machine-readable output.
|
|
36
|
+
*/
|
|
37
|
+
export declare function runPlaywrightSpec(specPath: string, testsRoot: string, options: {
|
|
38
|
+
project?: string;
|
|
39
|
+
baseUrl?: string;
|
|
40
|
+
timeoutMs?: number;
|
|
41
|
+
}): PlaywrightRunResult;
|
|
42
|
+
export {};
|
|
43
|
+
//# sourceMappingURL=playwright_runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright_runner.d.ts","sourceRoot":"","sources":["../../src/agentic/playwright_runner.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAC,mBAAmB,EAAc,MAAM,YAAY,CAAC;AAMjE,UAAU,oBAAoB;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,EAAE,EAAE,OAAO,CAAC;IACZ,KAAK,EAAE,KAAK,CAAC;QACT,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,KAAK,CAAC;YACX,MAAM,EAAE,MAAM,CAAC;YACf,QAAQ,EAAE,MAAM,CAAC;YACjB,KAAK,CAAC,EAAE;gBAAC,OAAO,EAAE,MAAM,CAAC;gBAAC,KAAK,CAAC,EAAE,MAAM,CAAA;aAAC,CAAC;SAC7C,CAAC,CAAC;KACN,CAAC,CAAC;CACN;AAED,UAAU,qBAAqB;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,oBAAoB,EAAE,CAAC;IAC9B,MAAM,CAAC,EAAE,qBAAqB,EAAE,CAAC;CACpC;AAED,UAAU,gBAAgB;IACtB,MAAM,EAAE,qBAAqB,EAAE,CAAC;IAChC,KAAK,EAAE;QACH,QAAQ,EAAE,MAAM,CAAC;QACjB,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,MAAM,CAAC;KACpB,CAAC;CACL;AAaD,wBAAgB,yBAAyB,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,EAAE,MAAM,GAAG,mBAAmB,CAkCzG;AAQD;;;GAGG;AACH,wBAAgB,iBAAiB,CAC7B,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAC,GAClE,mBAAmB,CAsGrB"}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
3
|
+
// See LICENSE.txt for license information.
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.parsePlaywrightJsonReport = parsePlaywrightJsonReport;
|
|
6
|
+
exports.runPlaywrightSpec = runPlaywrightSpec;
|
|
7
|
+
const child_process_1 = require("child_process");
|
|
8
|
+
const fs_1 = require("fs");
|
|
9
|
+
const path_1 = require("path");
|
|
10
|
+
const MAX_STDOUT_CHARS = 8000;
|
|
11
|
+
const MAX_ERROR_CHARS = 2000;
|
|
12
|
+
const MAX_STACK_CHARS = 1000;
|
|
13
|
+
function extractSpecs(suites) {
|
|
14
|
+
const specs = [];
|
|
15
|
+
for (const suite of suites) {
|
|
16
|
+
specs.push(...suite.specs);
|
|
17
|
+
if (suite.suites) {
|
|
18
|
+
specs.push(...extractSpecs(suite.suites));
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
return specs;
|
|
22
|
+
}
|
|
23
|
+
function parsePlaywrightJsonReport(report, specPath) {
|
|
24
|
+
const failures = [];
|
|
25
|
+
const allSpecs = extractSpecs(report.suites);
|
|
26
|
+
let passed = 0;
|
|
27
|
+
let failed = 0;
|
|
28
|
+
for (const spec of allSpecs) {
|
|
29
|
+
if (spec.ok) {
|
|
30
|
+
passed++;
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
failed++;
|
|
34
|
+
const lastResult = spec.tests[0]?.results?.at(-1);
|
|
35
|
+
failures.push({
|
|
36
|
+
testTitle: spec.title,
|
|
37
|
+
specPath,
|
|
38
|
+
error: (lastResult?.error?.message || 'Unknown error').slice(0, MAX_ERROR_CHARS),
|
|
39
|
+
stack: (lastResult?.error?.stack || '').slice(0, MAX_STACK_CHARS),
|
|
40
|
+
line: extractLineNumber(lastResult?.error?.stack),
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return {
|
|
45
|
+
specPath,
|
|
46
|
+
passed,
|
|
47
|
+
failed,
|
|
48
|
+
flaky: report.stats.flaky || 0,
|
|
49
|
+
skipped: report.stats.skipped || 0,
|
|
50
|
+
failures,
|
|
51
|
+
stdout: '',
|
|
52
|
+
durationMs: report.stats.duration || 0,
|
|
53
|
+
compiled: true,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
function extractLineNumber(stack) {
|
|
57
|
+
if (!stack)
|
|
58
|
+
return undefined;
|
|
59
|
+
const match = stack.match(/:(\d+):\d+\)?$/m);
|
|
60
|
+
return match ? parseInt(match[1], 10) : undefined;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Run a single Playwright spec file and return structured results.
|
|
64
|
+
* Uses a temp JSON reporter to get machine-readable output.
|
|
65
|
+
*/
|
|
66
|
+
function runPlaywrightSpec(specPath, testsRoot, options) {
|
|
67
|
+
// SECURITY: Validate spec path is within testsRoot and has valid extension
|
|
68
|
+
const resolvedSpec = (0, path_1.resolve)(specPath);
|
|
69
|
+
const resolvedRoot = (0, path_1.resolve)(testsRoot);
|
|
70
|
+
if (!resolvedSpec.startsWith(resolvedRoot + '/')) {
|
|
71
|
+
throw new Error(`Security: spec path ${specPath} is outside testsRoot`);
|
|
72
|
+
}
|
|
73
|
+
if (!resolvedSpec.endsWith('.spec.ts') && !resolvedSpec.endsWith('.test.ts')) {
|
|
74
|
+
throw new Error(`Security: spec path must end in .spec.ts or .test.ts`);
|
|
75
|
+
}
|
|
76
|
+
const reportDir = (0, path_1.join)(testsRoot, '.e2e-ai-agents', 'agentic-reports');
|
|
77
|
+
if (!(0, fs_1.existsSync)(reportDir)) {
|
|
78
|
+
(0, fs_1.mkdirSync)(reportDir, { recursive: true });
|
|
79
|
+
}
|
|
80
|
+
const reportPath = (0, path_1.join)(reportDir, `report-${Date.now()}.json`);
|
|
81
|
+
const args = [
|
|
82
|
+
'playwright', 'test',
|
|
83
|
+
specPath,
|
|
84
|
+
'--reporter', 'json',
|
|
85
|
+
'--project', options.project || 'chrome',
|
|
86
|
+
];
|
|
87
|
+
if (options.baseUrl) {
|
|
88
|
+
args.push('--config', 'playwright.config.ts');
|
|
89
|
+
}
|
|
90
|
+
const startTime = Date.now();
|
|
91
|
+
const result = (0, child_process_1.spawnSync)('npx', args, {
|
|
92
|
+
cwd: testsRoot,
|
|
93
|
+
encoding: 'utf-8',
|
|
94
|
+
timeout: options.timeoutMs || 120000,
|
|
95
|
+
maxBuffer: 2 * 1024 * 1024,
|
|
96
|
+
env: {
|
|
97
|
+
...process.env,
|
|
98
|
+
PLAYWRIGHT_JSON_OUTPUT_NAME: reportPath,
|
|
99
|
+
},
|
|
100
|
+
});
|
|
101
|
+
const durationMs = Date.now() - startTime;
|
|
102
|
+
const stdout = (result.stdout || '').slice(0, MAX_STDOUT_CHARS);
|
|
103
|
+
const stderr = (result.stderr || '').slice(0, MAX_STDOUT_CHARS);
|
|
104
|
+
// Check for compile errors
|
|
105
|
+
if (stderr.includes('SyntaxError') || stderr.includes('Cannot find module') || stderr.includes('TypeError')) {
|
|
106
|
+
return {
|
|
107
|
+
specPath,
|
|
108
|
+
passed: 0,
|
|
109
|
+
failed: 1,
|
|
110
|
+
flaky: 0,
|
|
111
|
+
skipped: 0,
|
|
112
|
+
failures: [{
|
|
113
|
+
testTitle: '(compile)',
|
|
114
|
+
specPath,
|
|
115
|
+
error: stderr.slice(0, MAX_ERROR_CHARS),
|
|
116
|
+
stack: '',
|
|
117
|
+
}],
|
|
118
|
+
stdout: `${stdout}\n${stderr}`,
|
|
119
|
+
durationMs,
|
|
120
|
+
compiled: false,
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
// Try to parse JSON report from stdout (Playwright JSON reporter writes to stdout)
|
|
124
|
+
try {
|
|
125
|
+
const jsonReport = JSON.parse(stdout);
|
|
126
|
+
const parsed = parsePlaywrightJsonReport(jsonReport, specPath);
|
|
127
|
+
parsed.durationMs = durationMs;
|
|
128
|
+
parsed.stdout = stdout;
|
|
129
|
+
return parsed;
|
|
130
|
+
}
|
|
131
|
+
catch {
|
|
132
|
+
// Fallback: try the file-based report
|
|
133
|
+
}
|
|
134
|
+
// Try file-based report
|
|
135
|
+
if ((0, fs_1.existsSync)(reportPath)) {
|
|
136
|
+
try {
|
|
137
|
+
const jsonReport = JSON.parse((0, fs_1.readFileSync)(reportPath, 'utf-8'));
|
|
138
|
+
const parsed = parsePlaywrightJsonReport(jsonReport, specPath);
|
|
139
|
+
parsed.durationMs = durationMs;
|
|
140
|
+
parsed.stdout = stdout;
|
|
141
|
+
try {
|
|
142
|
+
(0, fs_1.rmSync)(reportPath);
|
|
143
|
+
}
|
|
144
|
+
catch { /* ignore */ }
|
|
145
|
+
return parsed;
|
|
146
|
+
}
|
|
147
|
+
catch {
|
|
148
|
+
// Fallback to exit code
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
// Last resort: use exit code
|
|
152
|
+
return {
|
|
153
|
+
specPath,
|
|
154
|
+
passed: result.status === 0 ? 1 : 0,
|
|
155
|
+
failed: result.status === 0 ? 0 : 1,
|
|
156
|
+
flaky: 0,
|
|
157
|
+
skipped: 0,
|
|
158
|
+
failures: result.status !== 0
|
|
159
|
+
? [{ testTitle: '(unknown)', specPath, error: stderr.slice(0, MAX_ERROR_CHARS), stack: '' }]
|
|
160
|
+
: [],
|
|
161
|
+
stdout,
|
|
162
|
+
durationMs,
|
|
163
|
+
compiled: !stderr.includes('Error'),
|
|
164
|
+
};
|
|
165
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { LLMProvider } from '../provider_interface.js';
|
|
2
|
+
import type { AgenticConfig, AgenticSummary } from './types.js';
|
|
3
|
+
import type { ApiSurfaceCatalog } from '../knowledge/api_surface.js';
|
|
4
|
+
export interface ScenarioInput {
|
|
5
|
+
id: string;
|
|
6
|
+
name: string;
|
|
7
|
+
scenarios: string[];
|
|
8
|
+
routeFamily: string;
|
|
9
|
+
priority: string;
|
|
10
|
+
/** Existing spec to add scenarios to */
|
|
11
|
+
targetSpec?: string;
|
|
12
|
+
/** Changed files for context */
|
|
13
|
+
changedFiles?: string[];
|
|
14
|
+
/** Evidence from impact analysis */
|
|
15
|
+
evidence?: string;
|
|
16
|
+
}
|
|
17
|
+
export interface AgenticRunOptions {
|
|
18
|
+
scenarios: ScenarioInput[];
|
|
19
|
+
config: AgenticConfig;
|
|
20
|
+
provider: LLMProvider;
|
|
21
|
+
apiSurfaceHint?: string;
|
|
22
|
+
apiSurface?: ApiSurfaceCatalog;
|
|
23
|
+
}
|
|
24
|
+
export declare function runAgenticGeneration(options: AgenticRunOptions): Promise<AgenticSummary>;
|
|
25
|
+
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/agentic/runner.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAC,WAAW,EAAC,MAAM,0BAA0B,CAAC;AAC1D,OAAO,KAAK,EAAC,aAAa,EAAiB,cAAc,EAAsB,MAAM,YAAY,CAAC;AAKlG,OAAO,KAAK,EAAC,iBAAiB,EAAC,MAAM,6BAA6B,CAAC;AAEnE,MAAM,WAAW,aAAa;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,wCAAwC;IACxC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gCAAgC;IAChC,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,oCAAoC;IACpC,QAAQ,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAC9B,SAAS,EAAE,aAAa,EAAE,CAAC;IAC3B,MAAM,EAAE,aAAa,CAAC;IACtB,QAAQ,EAAE,WAAW,CAAC;IACtB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,iBAAiB,CAAC;CAClC;AAqMD,wBAAsB,oBAAoB,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,cAAc,CAAC,CAwB9F"}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
3
|
+
// See LICENSE.txt for license information.
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.runAgenticGeneration = runAgenticGeneration;
|
|
6
|
+
const fs_1 = require("fs");
|
|
7
|
+
const path_1 = require("path");
|
|
8
|
+
const playwright_runner_js_1 = require("./playwright_runner.js");
|
|
9
|
+
const fix_loop_js_1 = require("./fix_loop.js");
|
|
10
|
+
const generation_js_1 = require("../prompts/generation.js");
|
|
11
|
+
const api_surface_js_1 = require("../knowledge/api_surface.js");
|
|
12
|
+
function buildGeneratePrompt(scenario, apiSurfaceHint) {
|
|
13
|
+
const scenariosBlock = scenario.scenarios
|
|
14
|
+
.map((s, i) => ` ${i + 1}. ${s}`)
|
|
15
|
+
.join('\n');
|
|
16
|
+
return [
|
|
17
|
+
'Generate a Mattermost Playwright E2E test file.',
|
|
18
|
+
'',
|
|
19
|
+
`FLOW: ${scenario.name}`,
|
|
20
|
+
`Route Family: ${scenario.routeFamily}`,
|
|
21
|
+
`Priority: ${scenario.priority}`,
|
|
22
|
+
scenario.evidence ? `Evidence: ${scenario.evidence}` : '',
|
|
23
|
+
'',
|
|
24
|
+
'SCENARIOS TO IMPLEMENT:',
|
|
25
|
+
scenariosBlock,
|
|
26
|
+
'',
|
|
27
|
+
'AVAILABLE PAGE OBJECTS AND METHODS:',
|
|
28
|
+
apiSurfaceHint || 'Use page.getByRole() or page.getByTestId() for selectors.',
|
|
29
|
+
'',
|
|
30
|
+
'MANDATORY RULES:',
|
|
31
|
+
'1. Import ONLY from "@mattermost/playwright-lib" — no other test framework imports.',
|
|
32
|
+
'2. Every test must call `await pw.initSetup()` first.',
|
|
33
|
+
'3. Use `await pw.testBrowser.login(user)` to log in — never hardcode credentials.',
|
|
34
|
+
'4. Use ONLY page object methods listed above. Do NOT invent methods.',
|
|
35
|
+
'5. If a method is not available, use `page.getByRole()` or `page.getByTestId()`.',
|
|
36
|
+
`6. Tag every test: {tag: '@${scenario.routeFamily}'}`,
|
|
37
|
+
'7. Write one test per scenario with a descriptive name.',
|
|
38
|
+
'8. Use `expect` from "@mattermost/playwright-lib".',
|
|
39
|
+
'9. Include the copyright header.',
|
|
40
|
+
'10. NEVER fabricate test IDs (MM-TXXXX). Use descriptive names only.',
|
|
41
|
+
'',
|
|
42
|
+
'EXAMPLE STRUCTURE:',
|
|
43
|
+
'```typescript',
|
|
44
|
+
'// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.',
|
|
45
|
+
'// See LICENSE.txt for license information.',
|
|
46
|
+
'',
|
|
47
|
+
"import {expect, test} from '@mattermost/playwright-lib';",
|
|
48
|
+
'',
|
|
49
|
+
'test(',
|
|
50
|
+
" 'user can post a message in channel',",
|
|
51
|
+
` {tag: '@${scenario.routeFamily}'},`,
|
|
52
|
+
' async ({pw}) => {',
|
|
53
|
+
' const {user} = await pw.initSetup();',
|
|
54
|
+
' const {channelsPage} = await pw.testBrowser.login(user);',
|
|
55
|
+
' await channelsPage.goto();',
|
|
56
|
+
' await channelsPage.toBeVisible();',
|
|
57
|
+
' // test steps...',
|
|
58
|
+
' },',
|
|
59
|
+
');',
|
|
60
|
+
'```',
|
|
61
|
+
'',
|
|
62
|
+
'Return ONLY the TypeScript code. No explanations.',
|
|
63
|
+
].filter(Boolean).join('\n');
|
|
64
|
+
}
|
|
65
|
+
function resolveSpecPath(scenario, testsRoot) {
|
|
66
|
+
let specPath;
|
|
67
|
+
if (scenario.targetSpec) {
|
|
68
|
+
specPath = (0, path_1.join)(testsRoot, scenario.targetSpec);
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
const safeName = scenario.id.replace(/[^a-zA-Z0-9_-]/g, '_').toLowerCase();
|
|
72
|
+
const outputDir = (0, path_1.join)(testsRoot, 'specs', 'functional', 'ai-assisted');
|
|
73
|
+
specPath = (0, path_1.join)(outputDir, `${safeName}.spec.ts`);
|
|
74
|
+
}
|
|
75
|
+
// SECURITY: Prevent path traversal
|
|
76
|
+
const resolved = (0, path_1.resolve)(specPath);
|
|
77
|
+
const resolvedRoot = (0, path_1.resolve)(testsRoot);
|
|
78
|
+
if (!resolved.startsWith(resolvedRoot + '/') && resolved !== resolvedRoot) {
|
|
79
|
+
throw new Error(`Path traversal blocked: ${specPath} resolves outside testsRoot`);
|
|
80
|
+
}
|
|
81
|
+
if (!resolved.endsWith('.spec.ts') && !resolved.endsWith('.test.ts')) {
|
|
82
|
+
throw new Error(`Invalid spec path: must end in .spec.ts or .test.ts`);
|
|
83
|
+
}
|
|
84
|
+
return specPath;
|
|
85
|
+
}
|
|
86
|
+
async function generateInitialSpec(provider, scenario, specPath, apiSurfaceHint) {
|
|
87
|
+
const prompt = buildGeneratePrompt(scenario, apiSurfaceHint);
|
|
88
|
+
const response = await provider.generateText(prompt, {
|
|
89
|
+
maxTokens: 8000,
|
|
90
|
+
temperature: 0.1,
|
|
91
|
+
timeout: 60000,
|
|
92
|
+
systemPrompt: 'You are an expert Playwright test writer for Mattermost. Return only TypeScript code.',
|
|
93
|
+
});
|
|
94
|
+
// Reuse existing parsing logic from prompts/generation.ts
|
|
95
|
+
const parsed = (0, generation_js_1.parseGenerationResponse)(response.text, specPath, 'create_spec', scenario.id);
|
|
96
|
+
return parsed?.code ?? null;
|
|
97
|
+
}
|
|
98
|
+
async function runSingleScenario(scenario, options) {
|
|
99
|
+
const { config, provider } = options;
|
|
100
|
+
const warnings = [];
|
|
101
|
+
const specPath = resolveSpecPath(scenario, config.testsRoot);
|
|
102
|
+
// Build API surface hint
|
|
103
|
+
let apiHint = options.apiSurfaceHint || '';
|
|
104
|
+
if (!apiHint && options.apiSurface) {
|
|
105
|
+
const allClassNames = options.apiSurface.pageObjects.map((po) => po.className);
|
|
106
|
+
apiHint = (0, api_surface_js_1.formatApiSurfaceForPrompt)(options.apiSurface, allClassNames);
|
|
107
|
+
}
|
|
108
|
+
// Step 1: Generate initial spec
|
|
109
|
+
let specCode;
|
|
110
|
+
try {
|
|
111
|
+
specCode = await generateInitialSpec(provider, scenario, specPath, apiHint);
|
|
112
|
+
}
|
|
113
|
+
catch (error) {
|
|
114
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
115
|
+
warnings.push(`Generation failed for ${scenario.id}: ${msg}`);
|
|
116
|
+
return { specPath, scenarioSource: scenario.id, status: 'failed', attempts: 0, warnings };
|
|
117
|
+
}
|
|
118
|
+
if (!specCode) {
|
|
119
|
+
warnings.push(`LLM returned invalid code for ${scenario.id}`);
|
|
120
|
+
return { specPath, scenarioSource: scenario.id, status: 'failed', attempts: 0, warnings };
|
|
121
|
+
}
|
|
122
|
+
// Write the spec file
|
|
123
|
+
const dir = (0, path_1.dirname)(specPath);
|
|
124
|
+
if (!(0, fs_1.existsSync)(dir)) {
|
|
125
|
+
(0, fs_1.mkdirSync)(dir, { recursive: true });
|
|
126
|
+
}
|
|
127
|
+
(0, fs_1.writeFileSync)(specPath, specCode, 'utf-8');
|
|
128
|
+
// Dry run: skip execution
|
|
129
|
+
if (config.dryRun) {
|
|
130
|
+
return { specPath, scenarioSource: scenario.id, status: 'skipped', attempts: 0, warnings };
|
|
131
|
+
}
|
|
132
|
+
// Step 2: Run -> Fix loop
|
|
133
|
+
let lastRun;
|
|
134
|
+
for (let attempt = 1; attempt <= config.maxAttempts; attempt++) {
|
|
135
|
+
lastRun = (0, playwright_runner_js_1.runPlaywrightSpec)(specPath, config.testsRoot, {
|
|
136
|
+
project: config.project,
|
|
137
|
+
baseUrl: config.baseUrl,
|
|
138
|
+
timeoutMs: config.testTimeoutMs,
|
|
139
|
+
});
|
|
140
|
+
// All passed!
|
|
141
|
+
if (lastRun.failed === 0 && lastRun.compiled) {
|
|
142
|
+
return {
|
|
143
|
+
specPath,
|
|
144
|
+
scenarioSource: scenario.id,
|
|
145
|
+
status: 'passed',
|
|
146
|
+
attempts: attempt,
|
|
147
|
+
finalRun: lastRun,
|
|
148
|
+
warnings,
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
// If this is the last attempt, don't try to fix
|
|
152
|
+
if (attempt >= config.maxAttempts) {
|
|
153
|
+
break;
|
|
154
|
+
}
|
|
155
|
+
// Step 3: Fix
|
|
156
|
+
const currentCode = (0, fs_1.readFileSync)(specPath, 'utf-8');
|
|
157
|
+
try {
|
|
158
|
+
const fixResult = await (0, fix_loop_js_1.generateFix)(provider, {
|
|
159
|
+
specCode: currentCode,
|
|
160
|
+
failures: lastRun.failures,
|
|
161
|
+
attempt,
|
|
162
|
+
maxAttempts: config.maxAttempts,
|
|
163
|
+
apiSurfaceHint: apiHint,
|
|
164
|
+
});
|
|
165
|
+
if (fixResult.code) {
|
|
166
|
+
(0, fs_1.writeFileSync)(specPath, fixResult.code, 'utf-8');
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
warnings.push(`Fix attempt ${attempt} returned invalid code for ${scenario.id}`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
catch (error) {
|
|
173
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
174
|
+
warnings.push(`Fix attempt ${attempt} failed for ${scenario.id}: ${msg}`);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return {
|
|
178
|
+
specPath,
|
|
179
|
+
scenarioSource: scenario.id,
|
|
180
|
+
status: lastRun?.compiled === false ? 'compile-error' : 'max-attempts',
|
|
181
|
+
attempts: config.maxAttempts,
|
|
182
|
+
finalRun: lastRun,
|
|
183
|
+
warnings,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
async function runAgenticGeneration(options) {
|
|
187
|
+
const startTime = Date.now();
|
|
188
|
+
const results = [];
|
|
189
|
+
const warnings = [];
|
|
190
|
+
for (const scenario of options.scenarios) {
|
|
191
|
+
const result = await runSingleScenario(scenario, options);
|
|
192
|
+
results.push(result);
|
|
193
|
+
warnings.push(...result.warnings);
|
|
194
|
+
}
|
|
195
|
+
const totalPassed = results.filter((r) => r.status === 'passed').length;
|
|
196
|
+
const totalFailed = results.filter((r) => r.status !== 'passed' && r.status !== 'skipped').length;
|
|
197
|
+
const totalAttempts = results.reduce((sum, r) => sum + r.attempts, 0);
|
|
198
|
+
return {
|
|
199
|
+
results,
|
|
200
|
+
totalGenerated: results.length,
|
|
201
|
+
totalPassed,
|
|
202
|
+
totalFailed,
|
|
203
|
+
totalAttempts,
|
|
204
|
+
durationMs: Date.now() - startTime,
|
|
205
|
+
warnings,
|
|
206
|
+
};
|
|
207
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
export interface TestFailure {
|
|
2
|
+
testTitle: string;
|
|
3
|
+
specPath: string;
|
|
4
|
+
error: string;
|
|
5
|
+
/** Truncated stack trace */
|
|
6
|
+
stack: string;
|
|
7
|
+
/** Expected vs actual if available */
|
|
8
|
+
expected?: string;
|
|
9
|
+
actual?: string;
|
|
10
|
+
/** Line number in spec where failure occurred */
|
|
11
|
+
line?: number;
|
|
12
|
+
}
|
|
13
|
+
export interface PlaywrightRunResult {
|
|
14
|
+
specPath: string;
|
|
15
|
+
passed: number;
|
|
16
|
+
failed: number;
|
|
17
|
+
flaky: number;
|
|
18
|
+
skipped: number;
|
|
19
|
+
failures: TestFailure[];
|
|
20
|
+
/** Raw stdout (truncated) */
|
|
21
|
+
stdout: string;
|
|
22
|
+
/** Duration in ms */
|
|
23
|
+
durationMs: number;
|
|
24
|
+
/** Whether the spec even compiled */
|
|
25
|
+
compiled: boolean;
|
|
26
|
+
}
|
|
27
|
+
export interface AgenticConfig {
|
|
28
|
+
/** Max fix attempts before giving up (default: 3) */
|
|
29
|
+
maxAttempts: number;
|
|
30
|
+
/** Playwright project to use (default: 'chrome') */
|
|
31
|
+
project: string;
|
|
32
|
+
/** Base URL for Playwright (e.g. http://localhost:8065) */
|
|
33
|
+
baseUrl?: string;
|
|
34
|
+
/** Timeout per test run in ms (default: 120000) */
|
|
35
|
+
testTimeoutMs: number;
|
|
36
|
+
/** LLM provider override */
|
|
37
|
+
provider?: string;
|
|
38
|
+
/** Whether to use Playwright MCP for browser exploration */
|
|
39
|
+
useMcp?: boolean;
|
|
40
|
+
/** Dry run — generate but don't run tests */
|
|
41
|
+
dryRun?: boolean;
|
|
42
|
+
/** Tests root directory */
|
|
43
|
+
testsRoot: string;
|
|
44
|
+
}
|
|
45
|
+
export interface AgenticResult {
|
|
46
|
+
specPath: string;
|
|
47
|
+
scenarioSource: string;
|
|
48
|
+
status: 'passed' | 'failed' | 'max-attempts' | 'compile-error' | 'skipped';
|
|
49
|
+
attempts: number;
|
|
50
|
+
finalRun?: PlaywrightRunResult;
|
|
51
|
+
warnings: string[];
|
|
52
|
+
}
|
|
53
|
+
export interface AgenticSummary {
|
|
54
|
+
results: AgenticResult[];
|
|
55
|
+
totalGenerated: number;
|
|
56
|
+
totalPassed: number;
|
|
57
|
+
totalFailed: number;
|
|
58
|
+
totalAttempts: number;
|
|
59
|
+
durationMs: number;
|
|
60
|
+
warnings: string[];
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/agentic/types.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,WAAW;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,4BAA4B;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,sCAAsC;IACtC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,iDAAiD;IACjD,IAAI,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,mBAAmB;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,6BAA6B;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,qBAAqB;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,QAAQ,EAAE,OAAO,CAAC;CACrB;AAED,MAAM,WAAW,aAAa;IAC1B,qDAAqD;IACrD,WAAW,EAAE,MAAM,CAAC;IACpB,oDAAoD;IACpD,OAAO,EAAE,MAAM,CAAC;IAChB,2DAA2D;IAC3D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mDAAmD;IACnD,aAAa,EAAE,MAAM,CAAC;IACtB,4BAA4B;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,4DAA4D;IAC5D,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,6CAA6C;IAC7C,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,2BAA2B;IAC3B,SAAS,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,aAAa;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,QAAQ,GAAG,QAAQ,GAAG,cAAc,GAAG,eAAe,GAAG,SAAS,CAAC;IAC3E,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,mBAAmB,CAAC;IAC/B,QAAQ,EAAE,MAAM,EAAE,CAAC;CACtB;AAED,MAAM,WAAW,cAAc;IAC3B,OAAO,EAAE,aAAa,EAAE,CAAC;IACzB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACtB"}
|