@yasserkhanorg/e2e-agents 1.2.2 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -4
- package/dist/agent/feedback.d.ts +20 -0
- package/dist/agent/feedback.d.ts.map +1 -1
- package/dist/agent/feedback.js +4 -0
- package/dist/esm/agent/feedback.js +3 -0
- package/dist/esm/index.js +1 -1
- package/dist/esm/qa-agent/cli.js +205 -0
- package/dist/esm/qa-agent/orchestrator.js +120 -0
- package/dist/esm/qa-agent/phase1/runner.js +139 -0
- package/dist/esm/qa-agent/phase1/scope.js +126 -0
- package/dist/esm/qa-agent/phase2/agent_browser.js +95 -0
- package/dist/esm/qa-agent/phase2/agent_loop.js +315 -0
- package/dist/esm/qa-agent/phase2/exploration_state.js +76 -0
- package/dist/esm/qa-agent/phase2/tools.js +288 -0
- package/dist/esm/qa-agent/phase2/vision.js +75 -0
- package/dist/esm/qa-agent/phase3/feedback.js +34 -0
- package/dist/esm/qa-agent/phase3/reporter.js +118 -0
- package/dist/esm/qa-agent/phase3/spec_generator.js +62 -0
- package/dist/esm/qa-agent/phase3/verdict.js +66 -0
- package/dist/esm/qa-agent/safe_env.js +23 -0
- package/dist/esm/qa-agent/types.js +3 -0
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/qa-agent/cli.d.ts +3 -0
- package/dist/qa-agent/cli.d.ts.map +1 -0
- package/dist/qa-agent/cli.js +207 -0
- package/dist/qa-agent/orchestrator.d.ts +3 -0
- package/dist/qa-agent/orchestrator.d.ts.map +1 -0
- package/dist/qa-agent/orchestrator.js +123 -0
- package/dist/qa-agent/phase1/runner.d.ts +3 -0
- package/dist/qa-agent/phase1/runner.d.ts.map +1 -0
- package/dist/qa-agent/phase1/runner.js +142 -0
- package/dist/qa-agent/phase1/scope.d.ts +6 -0
- package/dist/qa-agent/phase1/scope.d.ts.map +1 -0
- package/dist/qa-agent/phase1/scope.js +129 -0
- package/dist/qa-agent/phase2/agent_browser.d.ts +35 -0
- package/dist/qa-agent/phase2/agent_browser.d.ts.map +1 -0
- package/dist/qa-agent/phase2/agent_browser.js +99 -0
- package/dist/qa-agent/phase2/agent_loop.d.ts +3 -0
- package/dist/qa-agent/phase2/agent_loop.d.ts.map +1 -0
- package/dist/qa-agent/phase2/agent_loop.js +321 -0
- package/dist/qa-agent/phase2/exploration_state.d.ts +12 -0
- package/dist/qa-agent/phase2/exploration_state.d.ts.map +1 -0
- package/dist/qa-agent/phase2/exploration_state.js +88 -0
- package/dist/qa-agent/phase2/tools.d.ts +28 -0
- package/dist/qa-agent/phase2/tools.d.ts.map +1 -0
- package/dist/qa-agent/phase2/tools.js +292 -0
- package/dist/qa-agent/phase2/vision.d.ts +3 -0
- package/dist/qa-agent/phase2/vision.d.ts.map +1 -0
- package/dist/qa-agent/phase2/vision.js +78 -0
- package/dist/qa-agent/phase3/feedback.d.ts +3 -0
- package/dist/qa-agent/phase3/feedback.d.ts.map +1 -0
- package/dist/qa-agent/phase3/feedback.js +37 -0
- package/dist/qa-agent/phase3/reporter.d.ts +3 -0
- package/dist/qa-agent/phase3/reporter.d.ts.map +1 -0
- package/dist/qa-agent/phase3/reporter.js +121 -0
- package/dist/qa-agent/phase3/spec_generator.d.ts +3 -0
- package/dist/qa-agent/phase3/spec_generator.d.ts.map +1 -0
- package/dist/qa-agent/phase3/spec_generator.js +65 -0
- package/dist/qa-agent/phase3/verdict.d.ts +3 -0
- package/dist/qa-agent/phase3/verdict.d.ts.map +1 -0
- package/dist/qa-agent/phase3/verdict.js +69 -0
- package/dist/qa-agent/safe_env.d.ts +3 -0
- package/dist/qa-agent/safe_env.d.ts.map +1 -0
- package/dist/qa-agent/safe_env.js +26 -0
- package/dist/qa-agent/types.d.ts +122 -0
- package/dist/qa-agent/types.d.ts.map +1 -0
- package/dist/qa-agent/types.js +4 -0
- package/package.json +12 -3
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @yasserkhanorg/e2e-agents
|
|
2
2
|
|
|
3
|
-
AI-powered E2E test impact analysis, generation, and
|
|
3
|
+
AI-powered E2E test impact analysis, generation, healing, and autonomous QA for frontend repositories.
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/@yasserkhanorg/e2e-agents)
|
|
6
6
|
[](LICENSE)
|
|
@@ -8,7 +8,7 @@ AI-powered E2E test impact analysis, generation, and healing for frontend reposi
|
|
|
8
8
|
|
|
9
9
|
## What It Does
|
|
10
10
|
|
|
11
|
-
Given a git diff, `e2e-ai-agents` determines which E2E test flows are impacted, identifies coverage gaps, and can generate or heal Playwright tests — all from the CLI.
|
|
11
|
+
Given a git diff, `e2e-ai-agents` determines which E2E test flows are impacted, identifies coverage gaps, and can generate or heal Playwright tests — all from the CLI. The companion `e2e-qa-agent` goes further: it opens a real browser, explores your app autonomously, and produces a QA report with findings and a release-readiness verdict.
|
|
12
12
|
|
|
13
13
|
**Pipeline:** `impact` → `plan` → `generate` → `heal` → `finalize`
|
|
14
14
|
|
|
@@ -23,6 +23,9 @@ Requires Node.js >= 20. Ships both CommonJS and ESM builds.
|
|
|
23
23
|
## CLI Commands
|
|
24
24
|
|
|
25
25
|
```bash
|
|
26
|
+
# All-in-one: impact + plan + optional generate/heal
|
|
27
|
+
npx e2e-ai-agents analyze --path /path/to/webapp [--generate] [--heal]
|
|
28
|
+
|
|
26
29
|
# Analyze which flows are impacted by code changes
|
|
27
30
|
npx e2e-ai-agents impact --path /path/to/webapp
|
|
28
31
|
|
|
@@ -49,7 +52,7 @@ npx e2e-ai-agents feedback --path /path/to/webapp --feedback-input ./feedback.js
|
|
|
49
52
|
npx e2e-ai-agents llm-health
|
|
50
53
|
```
|
|
51
54
|
|
|
52
|
-
`plan` and `suggest` are aliases. Use `--help` for all available flags.
|
|
55
|
+
`plan` and `suggest` are aliases. `analyze` is a convenience wrapper that runs impact + plan and optionally generation/healing in one invocation. Use `--help` for all available flags.
|
|
53
56
|
|
|
54
57
|
## Configuration
|
|
55
58
|
|
|
@@ -114,7 +117,7 @@ The `plan` command writes:
|
|
|
114
117
|
|
|
115
118
|
Use `--fail-on-must-add-tests` to exit non-zero when uncovered P0/P1 gaps exist. Use `--github-output` to expose outputs to subsequent workflow steps.
|
|
116
119
|
|
|
117
|
-
See [examples/github-actions/](examples/github-actions/) for a complete workflow template.
|
|
120
|
+
See [examples/github-actions/pr-impact.yml](examples/github-actions/pr-impact.yml) for a complete workflow template.
|
|
118
121
|
|
|
119
122
|
## Pipeline Modes
|
|
120
123
|
|
|
@@ -206,6 +209,46 @@ Schemas: [schemas/traceability-input.schema.json](schemas/traceability-input.sch
|
|
|
206
209
|
|
|
207
210
|
All written under `<testsRoot>/.e2e-ai-agents/`.
|
|
208
211
|
|
|
212
|
+
## Autonomous QA Agent (`e2e-qa-agent`)
|
|
213
|
+
|
|
214
|
+
An autonomous QA engineer that opens a real browser, navigates to changed features, tries edge cases, and produces a findings report — all unsupervised. Built on top of `agent-browser` and the Anthropic tool-use API.
|
|
215
|
+
|
|
216
|
+
### Quick Start
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
# PR mode — test features changed since origin/main
|
|
220
|
+
npx e2e-qa-agent pr --since origin/main --base-url http://localhost:8065
|
|
221
|
+
|
|
222
|
+
# Hunt mode — deep-test a specific area
|
|
223
|
+
npx e2e-qa-agent hunt "channel settings" --base-url http://localhost:8065
|
|
224
|
+
|
|
225
|
+
# Release mode — systematic exploration of all critical flows
|
|
226
|
+
npx e2e-qa-agent release --base-url http://localhost:8065 --time 30
|
|
227
|
+
|
|
228
|
+
# Fix mode — verify healed specs
|
|
229
|
+
npx e2e-qa-agent fix --base-url http://localhost:8065
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### Architecture
|
|
233
|
+
|
|
234
|
+
1. **Phase 1 (Script)** — Runs `e2e-ai-agents impact/plan` to determine scope, then executes matched Playwright specs.
|
|
235
|
+
2. **Phase 2 (Explore)** — LLM-driven browser loop: observe (accessibility snapshot) → think → act (click/fill/navigate) → record findings. Includes stuck detection, multi-user testing, console error capture, and vision-based analysis.
|
|
236
|
+
3. **Phase 3 (Report)** — Generates a structured report with findings, per-flow sign-off, and a release-readiness verdict (go/no-go/conditional).
|
|
237
|
+
|
|
238
|
+
### Options
|
|
239
|
+
|
|
240
|
+
| Flag | Default | Description |
|
|
241
|
+
|------|---------|-------------|
|
|
242
|
+
| `--base-url` | `http://localhost:8065` | Application URL |
|
|
243
|
+
| `--time` | `15` | Time limit in minutes |
|
|
244
|
+
| `--budget` | `2.00` | Max LLM spend in USD |
|
|
245
|
+
| `--phase` | `all` | Run only `1`, `2`, or `3` |
|
|
246
|
+
| `--headed` | off | Keep browser visible |
|
|
247
|
+
| `--since` | — | Git ref for diff-based scoping |
|
|
248
|
+
| `--tests-root` | — | Path to Playwright tests directory |
|
|
249
|
+
|
|
250
|
+
Requires `agent-browser` CLI (`npm install -g agent-browser`) and `ANTHROPIC_API_KEY`.
|
|
251
|
+
|
|
209
252
|
## Production Usage
|
|
210
253
|
|
|
211
254
|
Used by [Mattermost](https://github.com/mattermost/mattermost) for CI-integrated E2E coverage gating, test generation, and spec healing. See the [Mattermost Playwright integration](https://github.com/mattermost/mattermost/tree/master/e2e-tests/playwright) for a real-world example.
|
package/dist/agent/feedback.d.ts
CHANGED
|
@@ -46,10 +46,30 @@ export interface CalibrationSummary {
|
|
|
46
46
|
};
|
|
47
47
|
}>;
|
|
48
48
|
}
|
|
49
|
+
export interface FlakySummary {
|
|
50
|
+
schemaVersion: '1.1.0';
|
|
51
|
+
generatedAt: string;
|
|
52
|
+
tests: Array<{
|
|
53
|
+
test: string;
|
|
54
|
+
subsystem: string;
|
|
55
|
+
owners: string[];
|
|
56
|
+
flakeRate: number;
|
|
57
|
+
flakeRate7d: number;
|
|
58
|
+
flakeRate30d: number;
|
|
59
|
+
trend: 'up' | 'down' | 'stable';
|
|
60
|
+
quarantine: boolean;
|
|
61
|
+
quarantineState: 'none' | 'active' | 'retire-candidate';
|
|
62
|
+
lastFailureAt?: string;
|
|
63
|
+
samples: number;
|
|
64
|
+
samples7d: number;
|
|
65
|
+
samples30d: number;
|
|
66
|
+
}>;
|
|
67
|
+
}
|
|
49
68
|
export declare function appendFeedbackAndRecompute(appRoot: string, input: RecommendationFeedbackEntry): {
|
|
50
69
|
feedbackPath: string;
|
|
51
70
|
calibrationPath: string;
|
|
52
71
|
calibration: CalibrationSummary;
|
|
53
72
|
};
|
|
54
73
|
export declare function readCalibration(appRoot: string): CalibrationSummary | null;
|
|
74
|
+
export declare function readFlakyTests(appRoot: string): FlakySummary | null;
|
|
55
75
|
//# sourceMappingURL=feedback.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;
|
|
1
|
+
{"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;AAOD,MAAM,WAAW,YAAY;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,KAAK,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,EAAE,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,IAAI,GAAG,MAAM,GAAG,QAAQ,CAAC;QAChC,UAAU,EAAE,OAAO,CAAC;QACpB,eAAe,EAAE,MAAM,GAAG,QAAQ,GAAG,kBAAkB,CAAC;QACxD,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACtB,CAAC,CAAC;CACN;AAyQD,wBAAgB,0BAA0B,CACtC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,2BAA2B,GACnC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,kBAAkB,CAAA;CAAC,CAwBlF;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI,CAE1E;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAEnE"}
|
package/dist/agent/feedback.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
5
|
exports.appendFeedbackAndRecompute = appendFeedbackAndRecompute;
|
|
6
6
|
exports.readCalibration = readCalibration;
|
|
7
|
+
exports.readFlakyTests = readFlakyTests;
|
|
7
8
|
const fs_1 = require("fs");
|
|
8
9
|
const path_1 = require("path");
|
|
9
10
|
const test_path_js_1 = require("./test_path.js");
|
|
@@ -255,3 +256,6 @@ function appendFeedbackAndRecompute(appRoot, input) {
|
|
|
255
256
|
function readCalibration(appRoot) {
|
|
256
257
|
return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'calibration.json'));
|
|
257
258
|
}
|
|
259
|
+
function readFlakyTests(appRoot) {
|
|
260
|
+
return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
|
|
261
|
+
}
|
|
@@ -251,3 +251,6 @@ export function appendFeedbackAndRecompute(appRoot, input) {
|
|
|
251
251
|
export function readCalibration(appRoot) {
|
|
252
252
|
return readJson(join(appRoot, '.e2e-ai-agents', 'calibration.json'));
|
|
253
253
|
}
|
|
254
|
+
export function readFlakyTests(appRoot) {
|
|
255
|
+
return readJson(join(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
|
|
256
|
+
}
|
package/dist/esm/index.js
CHANGED
|
@@ -14,7 +14,7 @@ export { analyzeImpactDeterministic, recommendTestsDeterministic, handoffGenerat
|
|
|
14
14
|
export { analyzeImpact as analyzeImpactV2, getGaps, getPartialGaps } from './engine/impact_engine.js';
|
|
15
15
|
export { extractScenarios } from './engine/impact_engine.js';
|
|
16
16
|
export { buildPlanFromImpact } from './engine/plan_builder.js';
|
|
17
|
-
export { appendFeedbackAndRecompute, readCalibration } from './agent/feedback.js';
|
|
17
|
+
export { appendFeedbackAndRecompute, readCalibration, readFlakyTests } from './agent/feedback.js';
|
|
18
18
|
export { finalizeGeneratedTests } from './agent/handoff.js';
|
|
19
19
|
export { ingestTraceabilityInput } from './agent/traceability_ingest.js';
|
|
20
20
|
export { captureTraceabilityInput } from './agent/traceability_capture.js';
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
3
|
+
// See LICENSE.txt for license information.
|
|
4
|
+
import { resolve, sep } from 'path';
|
|
5
|
+
import { runQAAgent } from './orchestrator.js';
|
|
6
|
+
const MODES = new Set(['pr', 'hunt', 'fix', 'release']);
|
|
7
|
+
const KNOWN_FLAGS = new Set([
|
|
8
|
+
'--base-url', '--since', '--phase', '--time', '--budget',
|
|
9
|
+
'--headed', '--tests-root', '--project', '--output', '--help', '-h',
|
|
10
|
+
]);
|
|
11
|
+
function printUsage() {
|
|
12
|
+
console.log(`
|
|
13
|
+
Usage: e2e-qa-agent <mode> [options]
|
|
14
|
+
|
|
15
|
+
Modes:
|
|
16
|
+
pr Test changed features from a PR
|
|
17
|
+
hunt Deep-dive into a specific area
|
|
18
|
+
fix Verify healed tests and side effects
|
|
19
|
+
release Full regression + release readiness verdict
|
|
20
|
+
|
|
21
|
+
Options:
|
|
22
|
+
--base-url <url> Application URL (required)
|
|
23
|
+
--since <ref> Git ref for diff (default: origin/main)
|
|
24
|
+
--phase <1|2|3> Run only up to this phase
|
|
25
|
+
--time <minutes> Time limit (default: 15)
|
|
26
|
+
--budget <usd> LLM budget in USD (default: 2.00)
|
|
27
|
+
--headed Run browser in headed mode
|
|
28
|
+
--tests-root <path> Path to tests directory
|
|
29
|
+
--project <name> Playwright project name
|
|
30
|
+
--output <dir> Output directory (default: .e2e-ai-agents)
|
|
31
|
+
--help Show this help
|
|
32
|
+
|
|
33
|
+
Examples:
|
|
34
|
+
e2e-qa-agent pr --since origin/main --base-url http://localhost:8065
|
|
35
|
+
e2e-qa-agent hunt "channel settings" --base-url http://localhost:8065
|
|
36
|
+
e2e-qa-agent release --base-url http://localhost:8065 --time 30
|
|
37
|
+
e2e-qa-agent fix --base-url http://localhost:8065
|
|
38
|
+
`);
|
|
39
|
+
}
|
|
40
|
+
function parseCliArgs(argv) {
|
|
41
|
+
if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) {
|
|
42
|
+
printUsage();
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
const modeArg = argv[0];
|
|
46
|
+
if (!MODES.has(modeArg)) {
|
|
47
|
+
console.error(`Unknown mode: ${modeArg}`);
|
|
48
|
+
printUsage();
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
const mode = modeArg;
|
|
52
|
+
let baseUrl = '';
|
|
53
|
+
let since;
|
|
54
|
+
let huntTarget;
|
|
55
|
+
let phase;
|
|
56
|
+
let timeLimitMinutes = mode === 'release' ? 30 : 15;
|
|
57
|
+
let budgetUSD = 2.0;
|
|
58
|
+
let headed = false;
|
|
59
|
+
let testsRoot;
|
|
60
|
+
let project;
|
|
61
|
+
let outputDir;
|
|
62
|
+
// For hunt mode, the second positional arg is the target
|
|
63
|
+
let startFlags = 1;
|
|
64
|
+
if (mode === 'hunt' && argv[1] && !argv[1].startsWith('--')) {
|
|
65
|
+
huntTarget = argv[1];
|
|
66
|
+
startFlags = 2;
|
|
67
|
+
}
|
|
68
|
+
for (let i = startFlags; i < argv.length; i++) {
|
|
69
|
+
const arg = argv[i];
|
|
70
|
+
const next = argv[i + 1];
|
|
71
|
+
switch (arg) {
|
|
72
|
+
case '--base-url':
|
|
73
|
+
baseUrl = next || '';
|
|
74
|
+
i++;
|
|
75
|
+
break;
|
|
76
|
+
case '--since':
|
|
77
|
+
since = next;
|
|
78
|
+
i++;
|
|
79
|
+
break;
|
|
80
|
+
case '--phase': {
|
|
81
|
+
const parsed = parseInt(next || '0', 10);
|
|
82
|
+
if (parsed !== 1 && parsed !== 2 && parsed !== 3) {
|
|
83
|
+
console.error(`Error: --phase must be 1, 2, or 3 (got "${next}")`);
|
|
84
|
+
process.exit(1);
|
|
85
|
+
}
|
|
86
|
+
phase = parsed;
|
|
87
|
+
i++;
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
case '--time': {
|
|
91
|
+
const parsed = parseInt(next || '15', 10);
|
|
92
|
+
if (!Number.isFinite(parsed) || parsed <= 0) {
|
|
93
|
+
console.error(`Error: --time must be a positive number (got "${next}")`);
|
|
94
|
+
process.exit(1);
|
|
95
|
+
}
|
|
96
|
+
timeLimitMinutes = parsed;
|
|
97
|
+
i++;
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
case '--budget': {
|
|
101
|
+
const parsed = parseFloat(next || '2.0');
|
|
102
|
+
if (!Number.isFinite(parsed) || parsed <= 0) {
|
|
103
|
+
console.error(`Error: --budget must be a positive number (got "${next}")`);
|
|
104
|
+
process.exit(1);
|
|
105
|
+
}
|
|
106
|
+
budgetUSD = parsed;
|
|
107
|
+
i++;
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
case '--headed':
|
|
111
|
+
headed = true;
|
|
112
|
+
break;
|
|
113
|
+
case '--tests-root':
|
|
114
|
+
testsRoot = next;
|
|
115
|
+
i++;
|
|
116
|
+
break;
|
|
117
|
+
case '--project':
|
|
118
|
+
project = next;
|
|
119
|
+
i++;
|
|
120
|
+
break;
|
|
121
|
+
case '--output':
|
|
122
|
+
outputDir = next;
|
|
123
|
+
i++;
|
|
124
|
+
break;
|
|
125
|
+
default:
|
|
126
|
+
if (arg.startsWith('--') && !KNOWN_FLAGS.has(arg)) {
|
|
127
|
+
console.error(`Warning: unknown flag "${arg}" (ignored)`);
|
|
128
|
+
}
|
|
129
|
+
break;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
// Validate --since and hunt target against flag injection (must not start with -)
|
|
133
|
+
if (since && since.startsWith('-')) {
|
|
134
|
+
console.error(`Error: --since value "${since}" looks like a flag, not a git ref`);
|
|
135
|
+
process.exit(1);
|
|
136
|
+
}
|
|
137
|
+
if (huntTarget && huntTarget.startsWith('-')) {
|
|
138
|
+
console.error(`Error: hunt target "${huntTarget}" looks like a flag`);
|
|
139
|
+
process.exit(1);
|
|
140
|
+
}
|
|
141
|
+
if (!baseUrl) {
|
|
142
|
+
console.error('Error: --base-url is required');
|
|
143
|
+
process.exit(1);
|
|
144
|
+
}
|
|
145
|
+
// Validate baseUrl is a proper HTTP(S) URL
|
|
146
|
+
try {
|
|
147
|
+
const parsed = new URL(baseUrl);
|
|
148
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
|
149
|
+
console.error(`Error: --base-url must use http or https (got "${parsed.protocol}")`);
|
|
150
|
+
process.exit(1);
|
|
151
|
+
}
|
|
152
|
+
// Normalize: remove trailing slash
|
|
153
|
+
baseUrl = parsed.origin + parsed.pathname.replace(/\/+$/, '');
|
|
154
|
+
}
|
|
155
|
+
catch {
|
|
156
|
+
console.error(`Error: --base-url is not a valid URL ("${baseUrl}")`);
|
|
157
|
+
process.exit(1);
|
|
158
|
+
}
|
|
159
|
+
// Validate --output stays within project directory
|
|
160
|
+
if (outputDir) {
|
|
161
|
+
const resolved = resolve(outputDir);
|
|
162
|
+
const cwd = process.cwd();
|
|
163
|
+
const normalizedCwd = cwd.endsWith(sep) ? cwd : cwd + sep;
|
|
164
|
+
if (resolved !== cwd && !resolved.startsWith(normalizedCwd)) {
|
|
165
|
+
console.error(`Error: --output "${outputDir}" resolves outside the project directory`);
|
|
166
|
+
process.exit(1);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return {
|
|
170
|
+
mode,
|
|
171
|
+
baseUrl,
|
|
172
|
+
since: since || 'origin/main',
|
|
173
|
+
huntTarget,
|
|
174
|
+
phase,
|
|
175
|
+
timeLimitMinutes,
|
|
176
|
+
budgetUSD,
|
|
177
|
+
headed,
|
|
178
|
+
testsRoot,
|
|
179
|
+
project,
|
|
180
|
+
outputDir,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
async function main() {
|
|
184
|
+
const config = parseCliArgs(process.argv.slice(2));
|
|
185
|
+
if (!config) {
|
|
186
|
+
process.exit(0);
|
|
187
|
+
}
|
|
188
|
+
const report = await runQAAgent(config);
|
|
189
|
+
// Exit code based on verdict
|
|
190
|
+
switch (report.verdict.decision) {
|
|
191
|
+
case 'go':
|
|
192
|
+
process.exit(0);
|
|
193
|
+
break;
|
|
194
|
+
case 'conditional':
|
|
195
|
+
process.exit(1);
|
|
196
|
+
break;
|
|
197
|
+
case 'no-go':
|
|
198
|
+
process.exit(2);
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
main().catch((error) => {
|
|
203
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
204
|
+
process.exit(1);
|
|
205
|
+
});
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
import { execFileSync } from 'child_process';
|
|
4
|
+
import { mkdirSync } from 'fs';
|
|
5
|
+
import { logger } from '../logger.js';
|
|
6
|
+
import { runPhase1 } from './phase1/runner.js';
|
|
7
|
+
import { runAgentLoop } from './phase2/agent_loop.js';
|
|
8
|
+
import { computeVerdict } from './phase3/verdict.js';
|
|
9
|
+
import { generateReport } from './phase3/reporter.js';
|
|
10
|
+
import { generateSpecsForFindings } from './phase3/spec_generator.js';
|
|
11
|
+
import { submitFeedback } from './phase3/feedback.js';
|
|
12
|
+
function emptyPhase2Result() {
|
|
13
|
+
return { findings: [], flowsExplored: [], actionsCount: 0, tokensUsed: 0, costUSD: 0, durationMs: 0 };
|
|
14
|
+
}
|
|
15
|
+
export async function runQAAgent(inputConfig) {
|
|
16
|
+
const outputDir = inputConfig.outputDir || '.e2e-ai-agents';
|
|
17
|
+
const screenshotDir = inputConfig.screenshotDir || `${outputDir}/qa-screenshots`;
|
|
18
|
+
mkdirSync(screenshotDir, { recursive: true });
|
|
19
|
+
const config = { ...inputConfig, outputDir, screenshotDir };
|
|
20
|
+
// -----------------------------------------------------------------------
|
|
21
|
+
// Phase 1: Scripted (scope resolution + run matched specs)
|
|
22
|
+
// -----------------------------------------------------------------------
|
|
23
|
+
logger.info('=== Phase 1: Scope & Scripted Tests ===');
|
|
24
|
+
let phase1;
|
|
25
|
+
if (config.phase && config.phase > 1) {
|
|
26
|
+
// Skip Phase 1 — provide empty results
|
|
27
|
+
phase1 = { flows: [], specResults: [] };
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
phase1 = runPhase1(config);
|
|
31
|
+
}
|
|
32
|
+
if (phase1.flows.length === 0 && phase1.specResults.length === 0 && !(config.phase && config.phase > 1)) {
|
|
33
|
+
logger.warn('Phase 1 produced no flows and no spec results — scoping may have failed. Check that route-families.json and plan.json are available.');
|
|
34
|
+
}
|
|
35
|
+
logger.info('Phase 1 complete', {
|
|
36
|
+
flows: phase1.flows.length,
|
|
37
|
+
specResults: phase1.specResults.length,
|
|
38
|
+
});
|
|
39
|
+
if (config.phase === 1) {
|
|
40
|
+
return earlyReturn(config, phase1);
|
|
41
|
+
}
|
|
42
|
+
// -----------------------------------------------------------------------
|
|
43
|
+
// Phase 2: Autonomous exploration (LLM + agent-browser)
|
|
44
|
+
// -----------------------------------------------------------------------
|
|
45
|
+
logger.info('=== Phase 2: Autonomous Exploration ===');
|
|
46
|
+
// Verify agent-browser is available before starting the exploration loop
|
|
47
|
+
if (!(config.phase && config.phase > 2)) {
|
|
48
|
+
try {
|
|
49
|
+
execFileSync('agent-browser', ['--version'], { encoding: 'utf-8', timeout: 5000 });
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
logger.error('agent-browser CLI not found. Install it (>= 0.18.0) or skip Phase 2 with --phase 1.');
|
|
53
|
+
return earlyReturn(config, phase1);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
let phase2;
|
|
57
|
+
if (config.phase && config.phase > 2) {
|
|
58
|
+
phase2 = emptyPhase2Result();
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
const flows = phase1.flows.length > 0
|
|
62
|
+
? phase1.flows
|
|
63
|
+
: [{ id: 'main', name: 'Main application', priority: 'P1' }];
|
|
64
|
+
// In fix mode, limit Phase 2 to verification only
|
|
65
|
+
const phase2Config = config.mode === 'fix'
|
|
66
|
+
? { ...config, timeLimitMinutes: Math.min(config.timeLimitMinutes ?? 15, 5) }
|
|
67
|
+
: config;
|
|
68
|
+
phase2 = await runAgentLoop(phase2Config, flows);
|
|
69
|
+
}
|
|
70
|
+
logger.info('Phase 2 complete', {
|
|
71
|
+
findings: phase2.findings.length,
|
|
72
|
+
flowsExplored: phase2.flowsExplored.length,
|
|
73
|
+
cost: `$${phase2.costUSD.toFixed(4)}`,
|
|
74
|
+
});
|
|
75
|
+
if (config.phase === 2) {
|
|
76
|
+
return earlyReturn(config, phase1, phase2);
|
|
77
|
+
}
|
|
78
|
+
// -----------------------------------------------------------------------
|
|
79
|
+
// Phase 3: Report + Spec Generation + Verdict
|
|
80
|
+
// -----------------------------------------------------------------------
|
|
81
|
+
logger.info('=== Phase 3: Report & Verdict ===');
|
|
82
|
+
// Generate specs for discovered bugs
|
|
83
|
+
const generatedSpecs = generateSpecsForFindings(phase2.findings, config);
|
|
84
|
+
// Compute verdict
|
|
85
|
+
const verdict = computeVerdict(phase1, phase2);
|
|
86
|
+
// Generate report
|
|
87
|
+
const phase3 = generateReport(config, phase1, phase2, verdict, generatedSpecs);
|
|
88
|
+
// Submit feedback
|
|
89
|
+
try {
|
|
90
|
+
submitFeedback(config);
|
|
91
|
+
}
|
|
92
|
+
catch (err) {
|
|
93
|
+
logger.warn('Feedback submission failed', { error: String(err) });
|
|
94
|
+
}
|
|
95
|
+
logger.info(`=== QA Agent Complete: ${verdict.decision.toUpperCase()} ===`);
|
|
96
|
+
logger.info(verdict.reason);
|
|
97
|
+
return buildQAReport(config, phase1, phase2, phase3, verdict);
|
|
98
|
+
}
|
|
99
|
+
function earlyReturn(config, phase1, phase2) {
|
|
100
|
+
const p2 = phase2 || emptyPhase2Result();
|
|
101
|
+
const verdict = computeVerdict(phase1, p2);
|
|
102
|
+
const phase3 = generateReport(config, phase1, p2, verdict, []);
|
|
103
|
+
return buildQAReport(config, phase1, p2, phase3, verdict);
|
|
104
|
+
}
|
|
105
|
+
function buildQAReport(config, phase1, phase2, phase3, verdict) {
|
|
106
|
+
return {
|
|
107
|
+
schemaVersion: '1.0.0',
|
|
108
|
+
generatedAt: new Date().toISOString(),
|
|
109
|
+
mode: config.mode,
|
|
110
|
+
config: {
|
|
111
|
+
baseUrl: config.baseUrl,
|
|
112
|
+
timeLimitMinutes: config.timeLimitMinutes,
|
|
113
|
+
budgetUSD: config.budgetUSD,
|
|
114
|
+
},
|
|
115
|
+
phase1,
|
|
116
|
+
phase2,
|
|
117
|
+
phase3,
|
|
118
|
+
verdict,
|
|
119
|
+
};
|
|
120
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
import { spawnSync } from 'child_process';
|
|
4
|
+
import { existsSync, readdirSync } from 'fs';
|
|
5
|
+
import { join } from 'path';
|
|
6
|
+
import { logger } from '../../logger.js';
|
|
7
|
+
import { safeEnv } from '../safe_env.js';
|
|
8
|
+
import { resolveScope } from './scope.js';
|
|
9
|
+
export function runPhase1(config) {
|
|
10
|
+
const { flows, specPaths } = resolveScope(config);
|
|
11
|
+
logger.info('Phase 1: Scope resolved', {
|
|
12
|
+
flows: flows.length,
|
|
13
|
+
specDirs: specPaths.length,
|
|
14
|
+
mode: config.mode,
|
|
15
|
+
});
|
|
16
|
+
// Run e2e-agents CLI for impact/plan if we have a since ref
|
|
17
|
+
if (config.since && config.mode !== 'release') {
|
|
18
|
+
runE2eAgentsCli(config);
|
|
19
|
+
}
|
|
20
|
+
// Run matched Playwright specs
|
|
21
|
+
const specResults = runMatchedSpecs(specPaths, config);
|
|
22
|
+
return {
|
|
23
|
+
flows,
|
|
24
|
+
specResults,
|
|
25
|
+
planPath: config.testsRoot
|
|
26
|
+
? join(config.testsRoot, '.e2e-ai-agents', 'plan.json')
|
|
27
|
+
: undefined,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
function runE2eAgentsCli(config) {
|
|
31
|
+
const args = ['e2e-ai-agents'];
|
|
32
|
+
switch (config.mode) {
|
|
33
|
+
case 'pr':
|
|
34
|
+
args.push('plan');
|
|
35
|
+
if (config.since)
|
|
36
|
+
args.push('--since', config.since);
|
|
37
|
+
break;
|
|
38
|
+
case 'hunt':
|
|
39
|
+
args.push('impact');
|
|
40
|
+
if (config.huntTarget)
|
|
41
|
+
args.push('--flow-patterns', config.huntTarget);
|
|
42
|
+
if (config.since)
|
|
43
|
+
args.push('--since', config.since);
|
|
44
|
+
break;
|
|
45
|
+
case 'fix':
|
|
46
|
+
args.push('heal');
|
|
47
|
+
break;
|
|
48
|
+
default:
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
if (config.testsRoot) {
|
|
52
|
+
args.push('--tests-root', config.testsRoot);
|
|
53
|
+
}
|
|
54
|
+
logger.info('Running e2e-ai-agents', { args: args.slice(1) });
|
|
55
|
+
const result = spawnSync('npx', args, {
|
|
56
|
+
cwd: config.testsRoot || process.cwd(),
|
|
57
|
+
encoding: 'utf-8',
|
|
58
|
+
timeout: 120000,
|
|
59
|
+
maxBuffer: 2 * 1024 * 1024,
|
|
60
|
+
env: safeEnv(),
|
|
61
|
+
});
|
|
62
|
+
// Exit code 2 = "no changes detected" from e2e-agents CLI, not an error
|
|
63
|
+
if (result.status !== 0 && result.status !== 2) {
|
|
64
|
+
logger.warn('e2e-agents exited with non-zero status', {
|
|
65
|
+
status: result.status,
|
|
66
|
+
stderr: (result.stderr || '').slice(0, 500),
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
function runMatchedSpecs(specPaths, config) {
|
|
71
|
+
const results = [];
|
|
72
|
+
const specFiles = collectSpecFiles(specPaths);
|
|
73
|
+
if (specFiles.length === 0) {
|
|
74
|
+
logger.info('No spec files found to run');
|
|
75
|
+
return results;
|
|
76
|
+
}
|
|
77
|
+
logger.info('Running matched specs', { count: specFiles.length });
|
|
78
|
+
for (const specFile of specFiles) {
|
|
79
|
+
const result = runSingleSpec(specFile, config);
|
|
80
|
+
results.push(result);
|
|
81
|
+
}
|
|
82
|
+
return results;
|
|
83
|
+
}
|
|
84
|
+
function collectSpecFiles(specPaths) {
|
|
85
|
+
const files = [];
|
|
86
|
+
for (const p of specPaths) {
|
|
87
|
+
if (!existsSync(p))
|
|
88
|
+
continue;
|
|
89
|
+
try {
|
|
90
|
+
const entries = readdirSync(p, { recursive: true, encoding: 'utf-8' });
|
|
91
|
+
for (const entry of entries) {
|
|
92
|
+
if (typeof entry === 'string' && (entry.endsWith('.spec.ts') || entry.endsWith('.test.ts'))) {
|
|
93
|
+
files.push(join(p, entry));
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
// Skip unreadable directories
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return files;
|
|
102
|
+
}
|
|
103
|
+
function runSingleSpec(specPath, config) {
|
|
104
|
+
const args = [
|
|
105
|
+
'playwright', 'test',
|
|
106
|
+
specPath,
|
|
107
|
+
'--reporter', 'json',
|
|
108
|
+
];
|
|
109
|
+
if (config.project) {
|
|
110
|
+
args.push('--project', config.project);
|
|
111
|
+
}
|
|
112
|
+
const result = spawnSync('npx', args, {
|
|
113
|
+
cwd: config.testsRoot || process.cwd(),
|
|
114
|
+
encoding: 'utf-8',
|
|
115
|
+
timeout: 120000,
|
|
116
|
+
maxBuffer: 2 * 1024 * 1024,
|
|
117
|
+
env: safeEnv(config.baseUrl ? { BASE_URL: config.baseUrl } : {}),
|
|
118
|
+
});
|
|
119
|
+
// Try to parse JSON output
|
|
120
|
+
try {
|
|
121
|
+
const report = JSON.parse(result.stdout || '{}');
|
|
122
|
+
return {
|
|
123
|
+
specPath,
|
|
124
|
+
passed: report.stats?.expected || 0,
|
|
125
|
+
failed: report.stats?.unexpected || 0,
|
|
126
|
+
flaky: report.stats?.flaky || 0,
|
|
127
|
+
skipped: report.stats?.skipped || 0,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
catch {
|
|
131
|
+
return {
|
|
132
|
+
specPath,
|
|
133
|
+
passed: result.status === 0 ? 1 : 0,
|
|
134
|
+
failed: result.status === 0 ? 0 : 1,
|
|
135
|
+
flaky: 0,
|
|
136
|
+
skipped: 0,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
}
|