@yasserkhanorg/e2e-agents 1.2.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/feedback.d.ts +20 -0
- package/dist/agent/feedback.d.ts.map +1 -1
- package/dist/agent/feedback.js +4 -0
- package/dist/esm/agent/feedback.js +3 -0
- package/dist/esm/index.js +1 -1
- package/dist/esm/qa-agent/cli.js +205 -0
- package/dist/esm/qa-agent/orchestrator.js +120 -0
- package/dist/esm/qa-agent/phase1/runner.js +139 -0
- package/dist/esm/qa-agent/phase1/scope.js +126 -0
- package/dist/esm/qa-agent/phase2/agent_browser.js +95 -0
- package/dist/esm/qa-agent/phase2/agent_loop.js +315 -0
- package/dist/esm/qa-agent/phase2/exploration_state.js +76 -0
- package/dist/esm/qa-agent/phase2/tools.js +288 -0
- package/dist/esm/qa-agent/phase2/vision.js +75 -0
- package/dist/esm/qa-agent/phase3/feedback.js +34 -0
- package/dist/esm/qa-agent/phase3/reporter.js +118 -0
- package/dist/esm/qa-agent/phase3/spec_generator.js +62 -0
- package/dist/esm/qa-agent/phase3/verdict.js +66 -0
- package/dist/esm/qa-agent/safe_env.js +23 -0
- package/dist/esm/qa-agent/types.js +3 -0
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/qa-agent/cli.d.ts +3 -0
- package/dist/qa-agent/cli.d.ts.map +1 -0
- package/dist/qa-agent/cli.js +207 -0
- package/dist/qa-agent/orchestrator.d.ts +3 -0
- package/dist/qa-agent/orchestrator.d.ts.map +1 -0
- package/dist/qa-agent/orchestrator.js +123 -0
- package/dist/qa-agent/phase1/runner.d.ts +3 -0
- package/dist/qa-agent/phase1/runner.d.ts.map +1 -0
- package/dist/qa-agent/phase1/runner.js +142 -0
- package/dist/qa-agent/phase1/scope.d.ts +6 -0
- package/dist/qa-agent/phase1/scope.d.ts.map +1 -0
- package/dist/qa-agent/phase1/scope.js +129 -0
- package/dist/qa-agent/phase2/agent_browser.d.ts +35 -0
- package/dist/qa-agent/phase2/agent_browser.d.ts.map +1 -0
- package/dist/qa-agent/phase2/agent_browser.js +99 -0
- package/dist/qa-agent/phase2/agent_loop.d.ts +3 -0
- package/dist/qa-agent/phase2/agent_loop.d.ts.map +1 -0
- package/dist/qa-agent/phase2/agent_loop.js +321 -0
- package/dist/qa-agent/phase2/exploration_state.d.ts +12 -0
- package/dist/qa-agent/phase2/exploration_state.d.ts.map +1 -0
- package/dist/qa-agent/phase2/exploration_state.js +88 -0
- package/dist/qa-agent/phase2/tools.d.ts +28 -0
- package/dist/qa-agent/phase2/tools.d.ts.map +1 -0
- package/dist/qa-agent/phase2/tools.js +292 -0
- package/dist/qa-agent/phase2/vision.d.ts +3 -0
- package/dist/qa-agent/phase2/vision.d.ts.map +1 -0
- package/dist/qa-agent/phase2/vision.js +78 -0
- package/dist/qa-agent/phase3/feedback.d.ts +3 -0
- package/dist/qa-agent/phase3/feedback.d.ts.map +1 -0
- package/dist/qa-agent/phase3/feedback.js +37 -0
- package/dist/qa-agent/phase3/reporter.d.ts +3 -0
- package/dist/qa-agent/phase3/reporter.d.ts.map +1 -0
- package/dist/qa-agent/phase3/reporter.js +121 -0
- package/dist/qa-agent/phase3/spec_generator.d.ts +3 -0
- package/dist/qa-agent/phase3/spec_generator.d.ts.map +1 -0
- package/dist/qa-agent/phase3/spec_generator.js +65 -0
- package/dist/qa-agent/phase3/verdict.d.ts +3 -0
- package/dist/qa-agent/phase3/verdict.d.ts.map +1 -0
- package/dist/qa-agent/phase3/verdict.js +69 -0
- package/dist/qa-agent/safe_env.d.ts +3 -0
- package/dist/qa-agent/safe_env.d.ts.map +1 -0
- package/dist/qa-agent/safe_env.js +26 -0
- package/dist/qa-agent/types.d.ts +122 -0
- package/dist/qa-agent/types.d.ts.map +1 -0
- package/dist/qa-agent/types.js +4 -0
- package/package.json +12 -3
package/dist/agent/feedback.d.ts
CHANGED
|
@@ -46,10 +46,30 @@ export interface CalibrationSummary {
|
|
|
46
46
|
};
|
|
47
47
|
}>;
|
|
48
48
|
}
|
|
49
|
+
export interface FlakySummary {
|
|
50
|
+
schemaVersion: '1.1.0';
|
|
51
|
+
generatedAt: string;
|
|
52
|
+
tests: Array<{
|
|
53
|
+
test: string;
|
|
54
|
+
subsystem: string;
|
|
55
|
+
owners: string[];
|
|
56
|
+
flakeRate: number;
|
|
57
|
+
flakeRate7d: number;
|
|
58
|
+
flakeRate30d: number;
|
|
59
|
+
trend: 'up' | 'down' | 'stable';
|
|
60
|
+
quarantine: boolean;
|
|
61
|
+
quarantineState: 'none' | 'active' | 'retire-candidate';
|
|
62
|
+
lastFailureAt?: string;
|
|
63
|
+
samples: number;
|
|
64
|
+
samples7d: number;
|
|
65
|
+
samples30d: number;
|
|
66
|
+
}>;
|
|
67
|
+
}
|
|
49
68
|
export declare function appendFeedbackAndRecompute(appRoot: string, input: RecommendationFeedbackEntry): {
|
|
50
69
|
feedbackPath: string;
|
|
51
70
|
calibrationPath: string;
|
|
52
71
|
calibration: CalibrationSummary;
|
|
53
72
|
};
|
|
54
73
|
export declare function readCalibration(appRoot: string): CalibrationSummary | null;
|
|
74
|
+
export declare function readFlakyTests(appRoot: string): FlakySummary | null;
|
|
55
75
|
//# sourceMappingURL=feedback.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;
|
|
1
|
+
{"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;AAOD,MAAM,WAAW,YAAY;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,KAAK,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,EAAE,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,IAAI,GAAG,MAAM,GAAG,QAAQ,CAAC;QAChC,UAAU,EAAE,OAAO,CAAC;QACpB,eAAe,EAAE,MAAM,GAAG,QAAQ,GAAG,kBAAkB,CAAC;QACxD,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACtB,CAAC,CAAC;CACN;AAyQD,wBAAgB,0BAA0B,CACtC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,2BAA2B,GACnC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,kBAAkB,CAAA;CAAC,CAwBlF;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI,CAE1E;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAEnE"}
|
package/dist/agent/feedback.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
5
|
exports.appendFeedbackAndRecompute = appendFeedbackAndRecompute;
|
|
6
6
|
exports.readCalibration = readCalibration;
|
|
7
|
+
exports.readFlakyTests = readFlakyTests;
|
|
7
8
|
const fs_1 = require("fs");
|
|
8
9
|
const path_1 = require("path");
|
|
9
10
|
const test_path_js_1 = require("./test_path.js");
|
|
@@ -255,3 +256,6 @@ function appendFeedbackAndRecompute(appRoot, input) {
|
|
|
255
256
|
function readCalibration(appRoot) {
|
|
256
257
|
return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'calibration.json'));
|
|
257
258
|
}
|
|
259
|
+
function readFlakyTests(appRoot) {
|
|
260
|
+
return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
|
|
261
|
+
}
|
|
@@ -251,3 +251,6 @@ export function appendFeedbackAndRecompute(appRoot, input) {
|
|
|
251
251
|
export function readCalibration(appRoot) {
|
|
252
252
|
return readJson(join(appRoot, '.e2e-ai-agents', 'calibration.json'));
|
|
253
253
|
}
|
|
254
|
+
export function readFlakyTests(appRoot) {
|
|
255
|
+
return readJson(join(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
|
|
256
|
+
}
|
package/dist/esm/index.js
CHANGED
|
@@ -14,7 +14,7 @@ export { analyzeImpactDeterministic, recommendTestsDeterministic, handoffGenerat
|
|
|
14
14
|
export { analyzeImpact as analyzeImpactV2, getGaps, getPartialGaps } from './engine/impact_engine.js';
|
|
15
15
|
export { extractScenarios } from './engine/impact_engine.js';
|
|
16
16
|
export { buildPlanFromImpact } from './engine/plan_builder.js';
|
|
17
|
-
export { appendFeedbackAndRecompute, readCalibration } from './agent/feedback.js';
|
|
17
|
+
export { appendFeedbackAndRecompute, readCalibration, readFlakyTests } from './agent/feedback.js';
|
|
18
18
|
export { finalizeGeneratedTests } from './agent/handoff.js';
|
|
19
19
|
export { ingestTraceabilityInput } from './agent/traceability_ingest.js';
|
|
20
20
|
export { captureTraceabilityInput } from './agent/traceability_capture.js';
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
3
|
+
// See LICENSE.txt for license information.
|
|
4
|
+
import { resolve, sep } from 'path';
|
|
5
|
+
import { runQAAgent } from './orchestrator.js';
|
|
6
|
+
const MODES = new Set(['pr', 'hunt', 'fix', 'release']);
|
|
7
|
+
const KNOWN_FLAGS = new Set([
|
|
8
|
+
'--base-url', '--since', '--phase', '--time', '--budget',
|
|
9
|
+
'--headed', '--tests-root', '--project', '--output', '--help', '-h',
|
|
10
|
+
]);
|
|
11
|
+
function printUsage() {
|
|
12
|
+
console.log(`
|
|
13
|
+
Usage: e2e-qa-agent <mode> [options]
|
|
14
|
+
|
|
15
|
+
Modes:
|
|
16
|
+
pr Test changed features from a PR
|
|
17
|
+
hunt Deep-dive into a specific area
|
|
18
|
+
fix Verify healed tests and side effects
|
|
19
|
+
release Full regression + release readiness verdict
|
|
20
|
+
|
|
21
|
+
Options:
|
|
22
|
+
--base-url <url> Application URL (required)
|
|
23
|
+
--since <ref> Git ref for diff (default: origin/main)
|
|
24
|
+
--phase <1|2|3> Run only up to this phase
|
|
25
|
+
--time <minutes> Time limit (default: 15)
|
|
26
|
+
--budget <usd> LLM budget in USD (default: 2.00)
|
|
27
|
+
--headed Run browser in headed mode
|
|
28
|
+
--tests-root <path> Path to tests directory
|
|
29
|
+
--project <name> Playwright project name
|
|
30
|
+
--output <dir> Output directory (default: .e2e-ai-agents)
|
|
31
|
+
--help Show this help
|
|
32
|
+
|
|
33
|
+
Examples:
|
|
34
|
+
e2e-qa-agent pr --since origin/main --base-url http://localhost:8065
|
|
35
|
+
e2e-qa-agent hunt "channel settings" --base-url http://localhost:8065
|
|
36
|
+
e2e-qa-agent release --base-url http://localhost:8065 --time 30
|
|
37
|
+
e2e-qa-agent fix --base-url http://localhost:8065
|
|
38
|
+
`);
|
|
39
|
+
}
|
|
40
|
+
function parseCliArgs(argv) {
|
|
41
|
+
if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) {
|
|
42
|
+
printUsage();
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
const modeArg = argv[0];
|
|
46
|
+
if (!MODES.has(modeArg)) {
|
|
47
|
+
console.error(`Unknown mode: ${modeArg}`);
|
|
48
|
+
printUsage();
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
const mode = modeArg;
|
|
52
|
+
let baseUrl = '';
|
|
53
|
+
let since;
|
|
54
|
+
let huntTarget;
|
|
55
|
+
let phase;
|
|
56
|
+
let timeLimitMinutes = mode === 'release' ? 30 : 15;
|
|
57
|
+
let budgetUSD = 2.0;
|
|
58
|
+
let headed = false;
|
|
59
|
+
let testsRoot;
|
|
60
|
+
let project;
|
|
61
|
+
let outputDir;
|
|
62
|
+
// For hunt mode, the second positional arg is the target
|
|
63
|
+
let startFlags = 1;
|
|
64
|
+
if (mode === 'hunt' && argv[1] && !argv[1].startsWith('--')) {
|
|
65
|
+
huntTarget = argv[1];
|
|
66
|
+
startFlags = 2;
|
|
67
|
+
}
|
|
68
|
+
for (let i = startFlags; i < argv.length; i++) {
|
|
69
|
+
const arg = argv[i];
|
|
70
|
+
const next = argv[i + 1];
|
|
71
|
+
switch (arg) {
|
|
72
|
+
case '--base-url':
|
|
73
|
+
baseUrl = next || '';
|
|
74
|
+
i++;
|
|
75
|
+
break;
|
|
76
|
+
case '--since':
|
|
77
|
+
since = next;
|
|
78
|
+
i++;
|
|
79
|
+
break;
|
|
80
|
+
case '--phase': {
|
|
81
|
+
const parsed = parseInt(next || '0', 10);
|
|
82
|
+
if (parsed !== 1 && parsed !== 2 && parsed !== 3) {
|
|
83
|
+
console.error(`Error: --phase must be 1, 2, or 3 (got "${next}")`);
|
|
84
|
+
process.exit(1);
|
|
85
|
+
}
|
|
86
|
+
phase = parsed;
|
|
87
|
+
i++;
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
case '--time': {
|
|
91
|
+
const parsed = parseInt(next || '15', 10);
|
|
92
|
+
if (!Number.isFinite(parsed) || parsed <= 0) {
|
|
93
|
+
console.error(`Error: --time must be a positive number (got "${next}")`);
|
|
94
|
+
process.exit(1);
|
|
95
|
+
}
|
|
96
|
+
timeLimitMinutes = parsed;
|
|
97
|
+
i++;
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
case '--budget': {
|
|
101
|
+
const parsed = parseFloat(next || '2.0');
|
|
102
|
+
if (!Number.isFinite(parsed) || parsed <= 0) {
|
|
103
|
+
console.error(`Error: --budget must be a positive number (got "${next}")`);
|
|
104
|
+
process.exit(1);
|
|
105
|
+
}
|
|
106
|
+
budgetUSD = parsed;
|
|
107
|
+
i++;
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
case '--headed':
|
|
111
|
+
headed = true;
|
|
112
|
+
break;
|
|
113
|
+
case '--tests-root':
|
|
114
|
+
testsRoot = next;
|
|
115
|
+
i++;
|
|
116
|
+
break;
|
|
117
|
+
case '--project':
|
|
118
|
+
project = next;
|
|
119
|
+
i++;
|
|
120
|
+
break;
|
|
121
|
+
case '--output':
|
|
122
|
+
outputDir = next;
|
|
123
|
+
i++;
|
|
124
|
+
break;
|
|
125
|
+
default:
|
|
126
|
+
if (arg.startsWith('--') && !KNOWN_FLAGS.has(arg)) {
|
|
127
|
+
console.error(`Warning: unknown flag "${arg}" (ignored)`);
|
|
128
|
+
}
|
|
129
|
+
break;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
// Validate --since and hunt target against flag injection (must not start with -)
|
|
133
|
+
if (since && since.startsWith('-')) {
|
|
134
|
+
console.error(`Error: --since value "${since}" looks like a flag, not a git ref`);
|
|
135
|
+
process.exit(1);
|
|
136
|
+
}
|
|
137
|
+
if (huntTarget && huntTarget.startsWith('-')) {
|
|
138
|
+
console.error(`Error: hunt target "${huntTarget}" looks like a flag`);
|
|
139
|
+
process.exit(1);
|
|
140
|
+
}
|
|
141
|
+
if (!baseUrl) {
|
|
142
|
+
console.error('Error: --base-url is required');
|
|
143
|
+
process.exit(1);
|
|
144
|
+
}
|
|
145
|
+
// Validate baseUrl is a proper HTTP(S) URL
|
|
146
|
+
try {
|
|
147
|
+
const parsed = new URL(baseUrl);
|
|
148
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
|
149
|
+
console.error(`Error: --base-url must use http or https (got "${parsed.protocol}")`);
|
|
150
|
+
process.exit(1);
|
|
151
|
+
}
|
|
152
|
+
// Normalize: remove trailing slash
|
|
153
|
+
baseUrl = parsed.origin + parsed.pathname.replace(/\/+$/, '');
|
|
154
|
+
}
|
|
155
|
+
catch {
|
|
156
|
+
console.error(`Error: --base-url is not a valid URL ("${baseUrl}")`);
|
|
157
|
+
process.exit(1);
|
|
158
|
+
}
|
|
159
|
+
// Validate --output stays within project directory
|
|
160
|
+
if (outputDir) {
|
|
161
|
+
const resolved = resolve(outputDir);
|
|
162
|
+
const cwd = process.cwd();
|
|
163
|
+
const normalizedCwd = cwd.endsWith(sep) ? cwd : cwd + sep;
|
|
164
|
+
if (resolved !== cwd && !resolved.startsWith(normalizedCwd)) {
|
|
165
|
+
console.error(`Error: --output "${outputDir}" resolves outside the project directory`);
|
|
166
|
+
process.exit(1);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return {
|
|
170
|
+
mode,
|
|
171
|
+
baseUrl,
|
|
172
|
+
since: since || 'origin/main',
|
|
173
|
+
huntTarget,
|
|
174
|
+
phase,
|
|
175
|
+
timeLimitMinutes,
|
|
176
|
+
budgetUSD,
|
|
177
|
+
headed,
|
|
178
|
+
testsRoot,
|
|
179
|
+
project,
|
|
180
|
+
outputDir,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
async function main() {
|
|
184
|
+
const config = parseCliArgs(process.argv.slice(2));
|
|
185
|
+
if (!config) {
|
|
186
|
+
process.exit(0);
|
|
187
|
+
}
|
|
188
|
+
const report = await runQAAgent(config);
|
|
189
|
+
// Exit code based on verdict
|
|
190
|
+
switch (report.verdict.decision) {
|
|
191
|
+
case 'go':
|
|
192
|
+
process.exit(0);
|
|
193
|
+
break;
|
|
194
|
+
case 'conditional':
|
|
195
|
+
process.exit(1);
|
|
196
|
+
break;
|
|
197
|
+
case 'no-go':
|
|
198
|
+
process.exit(2);
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
main().catch((error) => {
|
|
203
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
204
|
+
process.exit(1);
|
|
205
|
+
});
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
import { execFileSync } from 'child_process';
|
|
4
|
+
import { mkdirSync } from 'fs';
|
|
5
|
+
import { logger } from '../logger.js';
|
|
6
|
+
import { runPhase1 } from './phase1/runner.js';
|
|
7
|
+
import { runAgentLoop } from './phase2/agent_loop.js';
|
|
8
|
+
import { computeVerdict } from './phase3/verdict.js';
|
|
9
|
+
import { generateReport } from './phase3/reporter.js';
|
|
10
|
+
import { generateSpecsForFindings } from './phase3/spec_generator.js';
|
|
11
|
+
import { submitFeedback } from './phase3/feedback.js';
|
|
12
|
+
function emptyPhase2Result() {
|
|
13
|
+
return { findings: [], flowsExplored: [], actionsCount: 0, tokensUsed: 0, costUSD: 0, durationMs: 0 };
|
|
14
|
+
}
|
|
15
|
+
export async function runQAAgent(inputConfig) {
|
|
16
|
+
const outputDir = inputConfig.outputDir || '.e2e-ai-agents';
|
|
17
|
+
const screenshotDir = inputConfig.screenshotDir || `${outputDir}/qa-screenshots`;
|
|
18
|
+
mkdirSync(screenshotDir, { recursive: true });
|
|
19
|
+
const config = { ...inputConfig, outputDir, screenshotDir };
|
|
20
|
+
// -----------------------------------------------------------------------
|
|
21
|
+
// Phase 1: Scripted (scope resolution + run matched specs)
|
|
22
|
+
// -----------------------------------------------------------------------
|
|
23
|
+
logger.info('=== Phase 1: Scope & Scripted Tests ===');
|
|
24
|
+
let phase1;
|
|
25
|
+
if (config.phase && config.phase > 1) {
|
|
26
|
+
// Skip Phase 1 — provide empty results
|
|
27
|
+
phase1 = { flows: [], specResults: [] };
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
phase1 = runPhase1(config);
|
|
31
|
+
}
|
|
32
|
+
if (phase1.flows.length === 0 && phase1.specResults.length === 0 && !(config.phase && config.phase > 1)) {
|
|
33
|
+
logger.warn('Phase 1 produced no flows and no spec results — scoping may have failed. Check that route-families.json and plan.json are available.');
|
|
34
|
+
}
|
|
35
|
+
logger.info('Phase 1 complete', {
|
|
36
|
+
flows: phase1.flows.length,
|
|
37
|
+
specResults: phase1.specResults.length,
|
|
38
|
+
});
|
|
39
|
+
if (config.phase === 1) {
|
|
40
|
+
return earlyReturn(config, phase1);
|
|
41
|
+
}
|
|
42
|
+
// -----------------------------------------------------------------------
|
|
43
|
+
// Phase 2: Autonomous exploration (LLM + agent-browser)
|
|
44
|
+
// -----------------------------------------------------------------------
|
|
45
|
+
logger.info('=== Phase 2: Autonomous Exploration ===');
|
|
46
|
+
// Verify agent-browser is available before starting the exploration loop
|
|
47
|
+
if (!(config.phase && config.phase > 2)) {
|
|
48
|
+
try {
|
|
49
|
+
execFileSync('agent-browser', ['--version'], { encoding: 'utf-8', timeout: 5000 });
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
logger.error('agent-browser CLI not found. Install it (>= 0.18.0) or skip Phase 2 with --phase 1.');
|
|
53
|
+
return earlyReturn(config, phase1);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
let phase2;
|
|
57
|
+
if (config.phase && config.phase > 2) {
|
|
58
|
+
phase2 = emptyPhase2Result();
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
const flows = phase1.flows.length > 0
|
|
62
|
+
? phase1.flows
|
|
63
|
+
: [{ id: 'main', name: 'Main application', priority: 'P1' }];
|
|
64
|
+
// In fix mode, limit Phase 2 to verification only
|
|
65
|
+
const phase2Config = config.mode === 'fix'
|
|
66
|
+
? { ...config, timeLimitMinutes: Math.min(config.timeLimitMinutes ?? 15, 5) }
|
|
67
|
+
: config;
|
|
68
|
+
phase2 = await runAgentLoop(phase2Config, flows);
|
|
69
|
+
}
|
|
70
|
+
logger.info('Phase 2 complete', {
|
|
71
|
+
findings: phase2.findings.length,
|
|
72
|
+
flowsExplored: phase2.flowsExplored.length,
|
|
73
|
+
cost: `$${phase2.costUSD.toFixed(4)}`,
|
|
74
|
+
});
|
|
75
|
+
if (config.phase === 2) {
|
|
76
|
+
return earlyReturn(config, phase1, phase2);
|
|
77
|
+
}
|
|
78
|
+
// -----------------------------------------------------------------------
|
|
79
|
+
// Phase 3: Report + Spec Generation + Verdict
|
|
80
|
+
// -----------------------------------------------------------------------
|
|
81
|
+
logger.info('=== Phase 3: Report & Verdict ===');
|
|
82
|
+
// Generate specs for discovered bugs
|
|
83
|
+
const generatedSpecs = generateSpecsForFindings(phase2.findings, config);
|
|
84
|
+
// Compute verdict
|
|
85
|
+
const verdict = computeVerdict(phase1, phase2);
|
|
86
|
+
// Generate report
|
|
87
|
+
const phase3 = generateReport(config, phase1, phase2, verdict, generatedSpecs);
|
|
88
|
+
// Submit feedback
|
|
89
|
+
try {
|
|
90
|
+
submitFeedback(config);
|
|
91
|
+
}
|
|
92
|
+
catch (err) {
|
|
93
|
+
logger.warn('Feedback submission failed', { error: String(err) });
|
|
94
|
+
}
|
|
95
|
+
logger.info(`=== QA Agent Complete: ${verdict.decision.toUpperCase()} ===`);
|
|
96
|
+
logger.info(verdict.reason);
|
|
97
|
+
return buildQAReport(config, phase1, phase2, phase3, verdict);
|
|
98
|
+
}
|
|
99
|
+
function earlyReturn(config, phase1, phase2) {
|
|
100
|
+
const p2 = phase2 || emptyPhase2Result();
|
|
101
|
+
const verdict = computeVerdict(phase1, p2);
|
|
102
|
+
const phase3 = generateReport(config, phase1, p2, verdict, []);
|
|
103
|
+
return buildQAReport(config, phase1, p2, phase3, verdict);
|
|
104
|
+
}
|
|
105
|
+
function buildQAReport(config, phase1, phase2, phase3, verdict) {
|
|
106
|
+
return {
|
|
107
|
+
schemaVersion: '1.0.0',
|
|
108
|
+
generatedAt: new Date().toISOString(),
|
|
109
|
+
mode: config.mode,
|
|
110
|
+
config: {
|
|
111
|
+
baseUrl: config.baseUrl,
|
|
112
|
+
timeLimitMinutes: config.timeLimitMinutes,
|
|
113
|
+
budgetUSD: config.budgetUSD,
|
|
114
|
+
},
|
|
115
|
+
phase1,
|
|
116
|
+
phase2,
|
|
117
|
+
phase3,
|
|
118
|
+
verdict,
|
|
119
|
+
};
|
|
120
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
import { spawnSync } from 'child_process';
|
|
4
|
+
import { existsSync, readdirSync } from 'fs';
|
|
5
|
+
import { join } from 'path';
|
|
6
|
+
import { logger } from '../../logger.js';
|
|
7
|
+
import { safeEnv } from '../safe_env.js';
|
|
8
|
+
import { resolveScope } from './scope.js';
|
|
9
|
+
export function runPhase1(config) {
|
|
10
|
+
const { flows, specPaths } = resolveScope(config);
|
|
11
|
+
logger.info('Phase 1: Scope resolved', {
|
|
12
|
+
flows: flows.length,
|
|
13
|
+
specDirs: specPaths.length,
|
|
14
|
+
mode: config.mode,
|
|
15
|
+
});
|
|
16
|
+
// Run e2e-agents CLI for impact/plan if we have a since ref
|
|
17
|
+
if (config.since && config.mode !== 'release') {
|
|
18
|
+
runE2eAgentsCli(config);
|
|
19
|
+
}
|
|
20
|
+
// Run matched Playwright specs
|
|
21
|
+
const specResults = runMatchedSpecs(specPaths, config);
|
|
22
|
+
return {
|
|
23
|
+
flows,
|
|
24
|
+
specResults,
|
|
25
|
+
planPath: config.testsRoot
|
|
26
|
+
? join(config.testsRoot, '.e2e-ai-agents', 'plan.json')
|
|
27
|
+
: undefined,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
function runE2eAgentsCli(config) {
|
|
31
|
+
const args = ['e2e-ai-agents'];
|
|
32
|
+
switch (config.mode) {
|
|
33
|
+
case 'pr':
|
|
34
|
+
args.push('plan');
|
|
35
|
+
if (config.since)
|
|
36
|
+
args.push('--since', config.since);
|
|
37
|
+
break;
|
|
38
|
+
case 'hunt':
|
|
39
|
+
args.push('impact');
|
|
40
|
+
if (config.huntTarget)
|
|
41
|
+
args.push('--flow-patterns', config.huntTarget);
|
|
42
|
+
if (config.since)
|
|
43
|
+
args.push('--since', config.since);
|
|
44
|
+
break;
|
|
45
|
+
case 'fix':
|
|
46
|
+
args.push('heal');
|
|
47
|
+
break;
|
|
48
|
+
default:
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
if (config.testsRoot) {
|
|
52
|
+
args.push('--tests-root', config.testsRoot);
|
|
53
|
+
}
|
|
54
|
+
logger.info('Running e2e-ai-agents', { args: args.slice(1) });
|
|
55
|
+
const result = spawnSync('npx', args, {
|
|
56
|
+
cwd: config.testsRoot || process.cwd(),
|
|
57
|
+
encoding: 'utf-8',
|
|
58
|
+
timeout: 120000,
|
|
59
|
+
maxBuffer: 2 * 1024 * 1024,
|
|
60
|
+
env: safeEnv(),
|
|
61
|
+
});
|
|
62
|
+
// Exit code 2 = "no changes detected" from e2e-agents CLI, not an error
|
|
63
|
+
if (result.status !== 0 && result.status !== 2) {
|
|
64
|
+
logger.warn('e2e-agents exited with non-zero status', {
|
|
65
|
+
status: result.status,
|
|
66
|
+
stderr: (result.stderr || '').slice(0, 500),
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
function runMatchedSpecs(specPaths, config) {
|
|
71
|
+
const results = [];
|
|
72
|
+
const specFiles = collectSpecFiles(specPaths);
|
|
73
|
+
if (specFiles.length === 0) {
|
|
74
|
+
logger.info('No spec files found to run');
|
|
75
|
+
return results;
|
|
76
|
+
}
|
|
77
|
+
logger.info('Running matched specs', { count: specFiles.length });
|
|
78
|
+
for (const specFile of specFiles) {
|
|
79
|
+
const result = runSingleSpec(specFile, config);
|
|
80
|
+
results.push(result);
|
|
81
|
+
}
|
|
82
|
+
return results;
|
|
83
|
+
}
|
|
84
|
+
function collectSpecFiles(specPaths) {
|
|
85
|
+
const files = [];
|
|
86
|
+
for (const p of specPaths) {
|
|
87
|
+
if (!existsSync(p))
|
|
88
|
+
continue;
|
|
89
|
+
try {
|
|
90
|
+
const entries = readdirSync(p, { recursive: true, encoding: 'utf-8' });
|
|
91
|
+
for (const entry of entries) {
|
|
92
|
+
if (typeof entry === 'string' && (entry.endsWith('.spec.ts') || entry.endsWith('.test.ts'))) {
|
|
93
|
+
files.push(join(p, entry));
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
// Skip unreadable directories
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return files;
|
|
102
|
+
}
|
|
103
|
+
function runSingleSpec(specPath, config) {
|
|
104
|
+
const args = [
|
|
105
|
+
'playwright', 'test',
|
|
106
|
+
specPath,
|
|
107
|
+
'--reporter', 'json',
|
|
108
|
+
];
|
|
109
|
+
if (config.project) {
|
|
110
|
+
args.push('--project', config.project);
|
|
111
|
+
}
|
|
112
|
+
const result = spawnSync('npx', args, {
|
|
113
|
+
cwd: config.testsRoot || process.cwd(),
|
|
114
|
+
encoding: 'utf-8',
|
|
115
|
+
timeout: 120000,
|
|
116
|
+
maxBuffer: 2 * 1024 * 1024,
|
|
117
|
+
env: safeEnv(config.baseUrl ? { BASE_URL: config.baseUrl } : {}),
|
|
118
|
+
});
|
|
119
|
+
// Try to parse JSON output
|
|
120
|
+
try {
|
|
121
|
+
const report = JSON.parse(result.stdout || '{}');
|
|
122
|
+
return {
|
|
123
|
+
specPath,
|
|
124
|
+
passed: report.stats?.expected || 0,
|
|
125
|
+
failed: report.stats?.unexpected || 0,
|
|
126
|
+
flaky: report.stats?.flaky || 0,
|
|
127
|
+
skipped: report.stats?.skipped || 0,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
catch {
|
|
131
|
+
return {
|
|
132
|
+
specPath,
|
|
133
|
+
passed: result.status === 0 ? 1 : 0,
|
|
134
|
+
failed: result.status === 0 ? 0 : 1,
|
|
135
|
+
flaky: 0,
|
|
136
|
+
skipped: 0,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
import { existsSync, readFileSync } from 'fs';
|
|
4
|
+
import { join } from 'path';
|
|
5
|
+
import { loadRouteFamilyManifest } from '../../knowledge/route_families.js';
|
|
6
|
+
export function resolveScope(config) {
|
|
7
|
+
const testsRoot = config.testsRoot || process.cwd();
|
|
8
|
+
const planPath = join(testsRoot, '.e2e-ai-agents', 'plan.json');
|
|
9
|
+
// Try to read plan.json (written by e2e-agents plan command)
|
|
10
|
+
const plan = readPlan(planPath);
|
|
11
|
+
const manifest = loadRouteFamilyManifest(testsRoot, {});
|
|
12
|
+
const flows = [];
|
|
13
|
+
const specPaths = [];
|
|
14
|
+
if (config.mode === 'hunt' && config.huntTarget) {
|
|
15
|
+
return resolveHuntScope(config.huntTarget, manifest, testsRoot);
|
|
16
|
+
}
|
|
17
|
+
if (config.mode === 'release') {
|
|
18
|
+
return resolveReleaseScope(manifest, testsRoot);
|
|
19
|
+
}
|
|
20
|
+
// PR / fix mode: use plan.json flows
|
|
21
|
+
if (plan) {
|
|
22
|
+
const allFlows = [
|
|
23
|
+
...(plan.flows || []),
|
|
24
|
+
...(plan.gaps || []).map((g) => ({ id: g.flowId, name: g.flowName, priority: g.priority })),
|
|
25
|
+
];
|
|
26
|
+
for (const f of allFlows) {
|
|
27
|
+
const family = manifest?.families.find((fam) => fam.id === f.id);
|
|
28
|
+
const url = resolveUrlForFamily(family);
|
|
29
|
+
flows.push({
|
|
30
|
+
id: f.id,
|
|
31
|
+
name: f.name,
|
|
32
|
+
priority: f.priority || 'P1',
|
|
33
|
+
url,
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
// Collect spec paths from covered flows
|
|
37
|
+
for (const c of plan.coveredFlows || []) {
|
|
38
|
+
if (c.specDirs) {
|
|
39
|
+
for (const dir of c.specDirs) {
|
|
40
|
+
const fullDir = join(testsRoot, dir);
|
|
41
|
+
if (existsSync(fullDir)) {
|
|
42
|
+
specPaths.push(fullDir);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
// Sort by priority: P0 first
|
|
49
|
+
flows.sort((a, b) => a.priority.localeCompare(b.priority));
|
|
50
|
+
return { flows, specPaths };
|
|
51
|
+
}
|
|
52
|
+
function resolveHuntScope(target, manifest, testsRoot) {
|
|
53
|
+
const flows = [];
|
|
54
|
+
const specPaths = [];
|
|
55
|
+
const targetLower = target.toLowerCase();
|
|
56
|
+
if (manifest) {
|
|
57
|
+
for (const family of manifest.families) {
|
|
58
|
+
const matches = family.id.toLowerCase().includes(targetLower) ||
|
|
59
|
+
(family.userFlows || []).some((uf) => uf.toLowerCase().includes(targetLower));
|
|
60
|
+
if (matches) {
|
|
61
|
+
flows.push({
|
|
62
|
+
id: family.id,
|
|
63
|
+
name: family.id,
|
|
64
|
+
priority: family.priority || 'P1',
|
|
65
|
+
url: resolveUrlForFamily(family),
|
|
66
|
+
});
|
|
67
|
+
for (const dir of family.specDirs || []) {
|
|
68
|
+
const fullDir = join(testsRoot, dir);
|
|
69
|
+
if (existsSync(fullDir)) {
|
|
70
|
+
specPaths.push(fullDir);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
// If no manifest matches, create a generic flow
|
|
77
|
+
if (flows.length === 0) {
|
|
78
|
+
flows.push({ id: target, name: target, priority: 'P1' });
|
|
79
|
+
}
|
|
80
|
+
return { flows, specPaths };
|
|
81
|
+
}
|
|
82
|
+
function resolveReleaseScope(manifest, testsRoot) {
|
|
83
|
+
const flows = [];
|
|
84
|
+
const specPaths = [];
|
|
85
|
+
if (manifest) {
|
|
86
|
+
for (const family of manifest.families) {
|
|
87
|
+
if (family.priority === 'P0' || family.priority === 'P1') {
|
|
88
|
+
flows.push({
|
|
89
|
+
id: family.id,
|
|
90
|
+
name: family.id,
|
|
91
|
+
priority: family.priority,
|
|
92
|
+
url: resolveUrlForFamily(family),
|
|
93
|
+
});
|
|
94
|
+
for (const dir of family.specDirs || []) {
|
|
95
|
+
const fullDir = join(testsRoot, dir);
|
|
96
|
+
if (existsSync(fullDir)) {
|
|
97
|
+
specPaths.push(fullDir);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
flows.sort((a, b) => a.priority.localeCompare(b.priority));
|
|
104
|
+
return { flows, specPaths };
|
|
105
|
+
}
|
|
106
|
+
function resolveUrlForFamily(family) {
|
|
107
|
+
if (!family || !family.routes || family.routes.length === 0)
|
|
108
|
+
return undefined;
|
|
109
|
+
// Take the first route pattern and substitute common placeholders
|
|
110
|
+
const route = family.routes[0];
|
|
111
|
+
return route
|
|
112
|
+
.replace(/\{team\}/g, 'default')
|
|
113
|
+
.replace(/\{channel\}/g, 'town-square')
|
|
114
|
+
.replace(/\{user_id\}/g, 'me')
|
|
115
|
+
.replace(/\{[^}]+\}/g, 'test');
|
|
116
|
+
}
|
|
117
|
+
function readPlan(path) {
|
|
118
|
+
if (!existsSync(path))
|
|
119
|
+
return null;
|
|
120
|
+
try {
|
|
121
|
+
return JSON.parse(readFileSync(path, 'utf-8'));
|
|
122
|
+
}
|
|
123
|
+
catch {
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
}
|