@yasserkhanorg/e2e-agents 1.2.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +47 -4
  2. package/dist/agent/feedback.d.ts +20 -0
  3. package/dist/agent/feedback.d.ts.map +1 -1
  4. package/dist/agent/feedback.js +4 -0
  5. package/dist/esm/agent/feedback.js +3 -0
  6. package/dist/esm/index.js +1 -1
  7. package/dist/esm/qa-agent/cli.js +205 -0
  8. package/dist/esm/qa-agent/orchestrator.js +120 -0
  9. package/dist/esm/qa-agent/phase1/runner.js +139 -0
  10. package/dist/esm/qa-agent/phase1/scope.js +126 -0
  11. package/dist/esm/qa-agent/phase2/agent_browser.js +95 -0
  12. package/dist/esm/qa-agent/phase2/agent_loop.js +315 -0
  13. package/dist/esm/qa-agent/phase2/exploration_state.js +76 -0
  14. package/dist/esm/qa-agent/phase2/tools.js +288 -0
  15. package/dist/esm/qa-agent/phase2/vision.js +75 -0
  16. package/dist/esm/qa-agent/phase3/feedback.js +34 -0
  17. package/dist/esm/qa-agent/phase3/reporter.js +118 -0
  18. package/dist/esm/qa-agent/phase3/spec_generator.js +62 -0
  19. package/dist/esm/qa-agent/phase3/verdict.js +66 -0
  20. package/dist/esm/qa-agent/safe_env.js +23 -0
  21. package/dist/esm/qa-agent/types.js +3 -0
  22. package/dist/index.d.ts +2 -2
  23. package/dist/index.d.ts.map +1 -1
  24. package/dist/index.js +2 -1
  25. package/dist/qa-agent/cli.d.ts +3 -0
  26. package/dist/qa-agent/cli.d.ts.map +1 -0
  27. package/dist/qa-agent/cli.js +207 -0
  28. package/dist/qa-agent/orchestrator.d.ts +3 -0
  29. package/dist/qa-agent/orchestrator.d.ts.map +1 -0
  30. package/dist/qa-agent/orchestrator.js +123 -0
  31. package/dist/qa-agent/phase1/runner.d.ts +3 -0
  32. package/dist/qa-agent/phase1/runner.d.ts.map +1 -0
  33. package/dist/qa-agent/phase1/runner.js +142 -0
  34. package/dist/qa-agent/phase1/scope.d.ts +6 -0
  35. package/dist/qa-agent/phase1/scope.d.ts.map +1 -0
  36. package/dist/qa-agent/phase1/scope.js +129 -0
  37. package/dist/qa-agent/phase2/agent_browser.d.ts +35 -0
  38. package/dist/qa-agent/phase2/agent_browser.d.ts.map +1 -0
  39. package/dist/qa-agent/phase2/agent_browser.js +99 -0
  40. package/dist/qa-agent/phase2/agent_loop.d.ts +3 -0
  41. package/dist/qa-agent/phase2/agent_loop.d.ts.map +1 -0
  42. package/dist/qa-agent/phase2/agent_loop.js +321 -0
  43. package/dist/qa-agent/phase2/exploration_state.d.ts +12 -0
  44. package/dist/qa-agent/phase2/exploration_state.d.ts.map +1 -0
  45. package/dist/qa-agent/phase2/exploration_state.js +88 -0
  46. package/dist/qa-agent/phase2/tools.d.ts +28 -0
  47. package/dist/qa-agent/phase2/tools.d.ts.map +1 -0
  48. package/dist/qa-agent/phase2/tools.js +292 -0
  49. package/dist/qa-agent/phase2/vision.d.ts +3 -0
  50. package/dist/qa-agent/phase2/vision.d.ts.map +1 -0
  51. package/dist/qa-agent/phase2/vision.js +78 -0
  52. package/dist/qa-agent/phase3/feedback.d.ts +3 -0
  53. package/dist/qa-agent/phase3/feedback.d.ts.map +1 -0
  54. package/dist/qa-agent/phase3/feedback.js +37 -0
  55. package/dist/qa-agent/phase3/reporter.d.ts +3 -0
  56. package/dist/qa-agent/phase3/reporter.d.ts.map +1 -0
  57. package/dist/qa-agent/phase3/reporter.js +121 -0
  58. package/dist/qa-agent/phase3/spec_generator.d.ts +3 -0
  59. package/dist/qa-agent/phase3/spec_generator.d.ts.map +1 -0
  60. package/dist/qa-agent/phase3/spec_generator.js +65 -0
  61. package/dist/qa-agent/phase3/verdict.d.ts +3 -0
  62. package/dist/qa-agent/phase3/verdict.d.ts.map +1 -0
  63. package/dist/qa-agent/phase3/verdict.js +69 -0
  64. package/dist/qa-agent/safe_env.d.ts +3 -0
  65. package/dist/qa-agent/safe_env.d.ts.map +1 -0
  66. package/dist/qa-agent/safe_env.js +26 -0
  67. package/dist/qa-agent/types.d.ts +122 -0
  68. package/dist/qa-agent/types.d.ts.map +1 -0
  69. package/dist/qa-agent/types.js +4 -0
  70. package/package.json +12 -3
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # @yasserkhanorg/e2e-agents
2
2
 
3
- AI-powered E2E test impact analysis, generation, and healing for frontend repositories.
3
+ AI-powered E2E test impact analysis, generation, healing, and autonomous QA for frontend repositories.
4
4
 
5
5
  [![npm](https://img.shields.io/npm/v/%40yasserkhanorg%2Fe2e-agents)](https://www.npmjs.com/package/@yasserkhanorg/e2e-agents)
6
6
  [![License](https://img.shields.io/badge/license-Apache%202.0-blue)](LICENSE)
@@ -8,7 +8,7 @@ AI-powered E2E test impact analysis, generation, and healing for frontend reposi
8
8
 
9
9
  ## What It Does
10
10
 
11
- Given a git diff, `e2e-ai-agents` determines which E2E test flows are impacted, identifies coverage gaps, and can generate or heal Playwright tests — all from the CLI.
11
+ Given a git diff, `e2e-ai-agents` determines which E2E test flows are impacted, identifies coverage gaps, and can generate or heal Playwright tests — all from the CLI. The companion `e2e-qa-agent` goes further: it opens a real browser, explores your app autonomously, and produces a QA report with findings and a release-readiness verdict.
12
12
 
13
13
  **Pipeline:** `impact` → `plan` → `generate` → `heal` → `finalize`
14
14
 
@@ -23,6 +23,9 @@ Requires Node.js >= 20. Ships both CommonJS and ESM builds.
23
23
  ## CLI Commands
24
24
 
25
25
  ```bash
26
+ # All-in-one: impact + plan + optional generate/heal
27
+ npx e2e-ai-agents analyze --path /path/to/webapp [--generate] [--heal]
28
+
26
29
  # Analyze which flows are impacted by code changes
27
30
  npx e2e-ai-agents impact --path /path/to/webapp
28
31
 
@@ -49,7 +52,7 @@ npx e2e-ai-agents feedback --path /path/to/webapp --feedback-input ./feedback.js
49
52
  npx e2e-ai-agents llm-health
50
53
  ```
51
54
 
52
- `plan` and `suggest` are aliases. Use `--help` for all available flags.
55
+ `plan` and `suggest` are aliases. `analyze` is a convenience wrapper that runs impact + plan and optionally generation/healing in one invocation. Use `--help` for all available flags.
53
56
 
54
57
  ## Configuration
55
58
 
@@ -114,7 +117,7 @@ The `plan` command writes:
114
117
 
115
118
  Use `--fail-on-must-add-tests` to exit non-zero when uncovered P0/P1 gaps exist. Use `--github-output` to expose outputs to subsequent workflow steps.
116
119
 
117
- See [examples/github-actions/](examples/github-actions/) for a complete workflow template.
120
+ See [examples/github-actions/pr-impact.yml](examples/github-actions/pr-impact.yml) for a complete workflow template.
118
121
 
119
122
  ## Pipeline Modes
120
123
 
@@ -206,6 +209,46 @@ Schemas: [schemas/traceability-input.schema.json](schemas/traceability-input.sch
206
209
 
207
210
  All written under `<testsRoot>/.e2e-ai-agents/`.
208
211
 
212
+ ## Autonomous QA Agent (`e2e-qa-agent`)
213
+
214
+ An autonomous QA engineer that opens a real browser, navigates to changed features, tries edge cases, and produces a findings report — all unsupervised. Built on top of `agent-browser` and the Anthropic tool-use API.
215
+
216
+ ### Quick Start
217
+
218
+ ```bash
219
+ # PR mode — test features changed since origin/main
220
+ npx e2e-qa-agent pr --since origin/main --base-url http://localhost:8065
221
+
222
+ # Hunt mode — deep-test a specific area
223
+ npx e2e-qa-agent hunt "channel settings" --base-url http://localhost:8065
224
+
225
+ # Release mode — systematic exploration of all critical flows
226
+ npx e2e-qa-agent release --base-url http://localhost:8065 --time 30
227
+
228
+ # Fix mode — verify healed specs
229
+ npx e2e-qa-agent fix --base-url http://localhost:8065
230
+ ```
231
+
232
+ ### Architecture
233
+
234
+ 1. **Phase 1 (Script)** — Runs `e2e-ai-agents impact/plan` to determine scope, then executes matched Playwright specs.
235
+ 2. **Phase 2 (Explore)** — LLM-driven browser loop: observe (accessibility snapshot) → think → act (click/fill/navigate) → record findings. Includes stuck detection, multi-user testing, console error capture, and vision-based analysis.
236
+ 3. **Phase 3 (Report)** — Generates a structured report with findings, per-flow sign-off, and a release-readiness verdict (go/no-go/conditional).
237
+
238
+ ### Options
239
+
240
+ | Flag | Default | Description |
241
+ |------|---------|-------------|
242
+ | `--base-url` | `http://localhost:8065` | Application URL |
243
+ | `--time` | `15` | Time limit in minutes |
244
+ | `--budget` | `2.00` | Max LLM spend in USD |
245
+ | `--phase` | `all` | Run only `1`, `2`, or `3` |
246
+ | `--headed` | off | Keep browser visible |
247
+ | `--since` | — | Git ref for diff-based scoping |
248
+ | `--tests-root` | — | Path to Playwright tests directory |
249
+
250
+ Requires `agent-browser` CLI (`npm install -g agent-browser`) and `ANTHROPIC_API_KEY`.
251
+
209
252
  ## Production Usage
210
253
 
211
254
  Used by [Mattermost](https://github.com/mattermost/mattermost) for CI-integrated E2E coverage gating, test generation, and spec healing. See the [Mattermost Playwright integration](https://github.com/mattermost/mattermost/tree/master/e2e-tests/playwright) for a real-world example.
@@ -46,10 +46,30 @@ export interface CalibrationSummary {
46
46
  };
47
47
  }>;
48
48
  }
49
+ export interface FlakySummary {
50
+ schemaVersion: '1.1.0';
51
+ generatedAt: string;
52
+ tests: Array<{
53
+ test: string;
54
+ subsystem: string;
55
+ owners: string[];
56
+ flakeRate: number;
57
+ flakeRate7d: number;
58
+ flakeRate30d: number;
59
+ trend: 'up' | 'down' | 'stable';
60
+ quarantine: boolean;
61
+ quarantineState: 'none' | 'active' | 'retire-candidate';
62
+ lastFailureAt?: string;
63
+ samples: number;
64
+ samples7d: number;
65
+ samples30d: number;
66
+ }>;
67
+ }
49
68
  export declare function appendFeedbackAndRecompute(appRoot: string, input: RecommendationFeedbackEntry): {
50
69
  feedbackPath: string;
51
70
  calibrationPath: string;
52
71
  calibration: CalibrationSummary;
53
72
  };
54
73
  export declare function readCalibration(appRoot: string): CalibrationSummary | null;
74
+ export declare function readFlakyTests(appRoot: string): FlakySummary | null;
55
75
  //# sourceMappingURL=feedback.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;AAkSD,wBAAgB,0BAA0B,CACtC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,2BAA2B,GACnC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,kBAAkB,CAAA;CAAC,CAwBlF;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI,CAE1E"}
1
+ {"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;AAOD,MAAM,WAAW,YAAY;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,KAAK,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,EAAE,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,IAAI,GAAG,MAAM,GAAG,QAAQ,CAAC;QAChC,UAAU,EAAE,OAAO,CAAC;QACpB,eAAe,EAAE,MAAM,GAAG,QAAQ,GAAG,kBAAkB,CAAC;QACxD,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACtB,CAAC,CAAC;CACN;AAyQD,wBAAgB,0BAA0B,CACtC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,2BAA2B,GACnC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,kBAAkB,CAAA;CAAC,CAwBlF;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI,CAE1E;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAEnE"}
@@ -4,6 +4,7 @@
4
4
  Object.defineProperty(exports, "__esModule", { value: true });
5
5
  exports.appendFeedbackAndRecompute = appendFeedbackAndRecompute;
6
6
  exports.readCalibration = readCalibration;
7
+ exports.readFlakyTests = readFlakyTests;
7
8
  const fs_1 = require("fs");
8
9
  const path_1 = require("path");
9
10
  const test_path_js_1 = require("./test_path.js");
@@ -255,3 +256,6 @@ function appendFeedbackAndRecompute(appRoot, input) {
255
256
  function readCalibration(appRoot) {
256
257
  return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'calibration.json'));
257
258
  }
259
+ function readFlakyTests(appRoot) {
260
+ return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
261
+ }
@@ -251,3 +251,6 @@ export function appendFeedbackAndRecompute(appRoot, input) {
251
251
  export function readCalibration(appRoot) {
252
252
  return readJson(join(appRoot, '.e2e-ai-agents', 'calibration.json'));
253
253
  }
254
+ export function readFlakyTests(appRoot) {
255
+ return readJson(join(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
256
+ }
package/dist/esm/index.js CHANGED
@@ -14,7 +14,7 @@ export { analyzeImpactDeterministic, recommendTestsDeterministic, handoffGenerat
14
14
  export { analyzeImpact as analyzeImpactV2, getGaps, getPartialGaps } from './engine/impact_engine.js';
15
15
  export { extractScenarios } from './engine/impact_engine.js';
16
16
  export { buildPlanFromImpact } from './engine/plan_builder.js';
17
- export { appendFeedbackAndRecompute, readCalibration } from './agent/feedback.js';
17
+ export { appendFeedbackAndRecompute, readCalibration, readFlakyTests } from './agent/feedback.js';
18
18
  export { finalizeGeneratedTests } from './agent/handoff.js';
19
19
  export { ingestTraceabilityInput } from './agent/traceability_ingest.js';
20
20
  export { captureTraceabilityInput } from './agent/traceability_capture.js';
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/env node
2
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
3
+ // See LICENSE.txt for license information.
4
+ import { resolve, sep } from 'path';
5
+ import { runQAAgent } from './orchestrator.js';
6
+ const MODES = new Set(['pr', 'hunt', 'fix', 'release']);
7
+ const KNOWN_FLAGS = new Set([
8
+ '--base-url', '--since', '--phase', '--time', '--budget',
9
+ '--headed', '--tests-root', '--project', '--output', '--help', '-h',
10
+ ]);
11
+ function printUsage() {
12
+ console.log(`
13
+ Usage: e2e-qa-agent <mode> [options]
14
+
15
+ Modes:
16
+ pr Test changed features from a PR
17
+ hunt Deep-dive into a specific area
18
+ fix Verify healed tests and side effects
19
+ release Full regression + release readiness verdict
20
+
21
+ Options:
22
+ --base-url <url> Application URL (required)
23
+ --since <ref> Git ref for diff (default: origin/main)
24
+ --phase <1|2|3> Run only up to this phase
25
+ --time <minutes> Time limit (default: 15)
26
+ --budget <usd> LLM budget in USD (default: 2.00)
27
+ --headed Run browser in headed mode
28
+ --tests-root <path> Path to tests directory
29
+ --project <name> Playwright project name
30
+ --output <dir> Output directory (default: .e2e-ai-agents)
31
+ --help Show this help
32
+
33
+ Examples:
34
+ e2e-qa-agent pr --since origin/main --base-url http://localhost:8065
35
+ e2e-qa-agent hunt "channel settings" --base-url http://localhost:8065
36
+ e2e-qa-agent release --base-url http://localhost:8065 --time 30
37
+ e2e-qa-agent fix --base-url http://localhost:8065
38
+ `);
39
+ }
40
+ function parseCliArgs(argv) {
41
+ if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) {
42
+ printUsage();
43
+ return null;
44
+ }
45
+ const modeArg = argv[0];
46
+ if (!MODES.has(modeArg)) {
47
+ console.error(`Unknown mode: ${modeArg}`);
48
+ printUsage();
49
+ return null;
50
+ }
51
+ const mode = modeArg;
52
+ let baseUrl = '';
53
+ let since;
54
+ let huntTarget;
55
+ let phase;
56
+ let timeLimitMinutes = mode === 'release' ? 30 : 15;
57
+ let budgetUSD = 2.0;
58
+ let headed = false;
59
+ let testsRoot;
60
+ let project;
61
+ let outputDir;
62
+ // For hunt mode, the second positional arg is the target
63
+ let startFlags = 1;
64
+ if (mode === 'hunt' && argv[1] && !argv[1].startsWith('--')) {
65
+ huntTarget = argv[1];
66
+ startFlags = 2;
67
+ }
68
+ for (let i = startFlags; i < argv.length; i++) {
69
+ const arg = argv[i];
70
+ const next = argv[i + 1];
71
+ switch (arg) {
72
+ case '--base-url':
73
+ baseUrl = next || '';
74
+ i++;
75
+ break;
76
+ case '--since':
77
+ since = next;
78
+ i++;
79
+ break;
80
+ case '--phase': {
81
+ const parsed = parseInt(next || '0', 10);
82
+ if (parsed !== 1 && parsed !== 2 && parsed !== 3) {
83
+ console.error(`Error: --phase must be 1, 2, or 3 (got "${next}")`);
84
+ process.exit(1);
85
+ }
86
+ phase = parsed;
87
+ i++;
88
+ break;
89
+ }
90
+ case '--time': {
91
+ const parsed = parseInt(next || '15', 10);
92
+ if (!Number.isFinite(parsed) || parsed <= 0) {
93
+ console.error(`Error: --time must be a positive number (got "${next}")`);
94
+ process.exit(1);
95
+ }
96
+ timeLimitMinutes = parsed;
97
+ i++;
98
+ break;
99
+ }
100
+ case '--budget': {
101
+ const parsed = parseFloat(next || '2.0');
102
+ if (!Number.isFinite(parsed) || parsed <= 0) {
103
+ console.error(`Error: --budget must be a positive number (got "${next}")`);
104
+ process.exit(1);
105
+ }
106
+ budgetUSD = parsed;
107
+ i++;
108
+ break;
109
+ }
110
+ case '--headed':
111
+ headed = true;
112
+ break;
113
+ case '--tests-root':
114
+ testsRoot = next;
115
+ i++;
116
+ break;
117
+ case '--project':
118
+ project = next;
119
+ i++;
120
+ break;
121
+ case '--output':
122
+ outputDir = next;
123
+ i++;
124
+ break;
125
+ default:
126
+ if (arg.startsWith('--') && !KNOWN_FLAGS.has(arg)) {
127
+ console.error(`Warning: unknown flag "${arg}" (ignored)`);
128
+ }
129
+ break;
130
+ }
131
+ }
132
+ // Validate --since and hunt target against flag injection (must not start with -)
133
+ if (since && since.startsWith('-')) {
134
+ console.error(`Error: --since value "${since}" looks like a flag, not a git ref`);
135
+ process.exit(1);
136
+ }
137
+ if (huntTarget && huntTarget.startsWith('-')) {
138
+ console.error(`Error: hunt target "${huntTarget}" looks like a flag`);
139
+ process.exit(1);
140
+ }
141
+ if (!baseUrl) {
142
+ console.error('Error: --base-url is required');
143
+ process.exit(1);
144
+ }
145
+ // Validate baseUrl is a proper HTTP(S) URL
146
+ try {
147
+ const parsed = new URL(baseUrl);
148
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
149
+ console.error(`Error: --base-url must use http or https (got "${parsed.protocol}")`);
150
+ process.exit(1);
151
+ }
152
+ // Normalize: remove trailing slash
153
+ baseUrl = parsed.origin + parsed.pathname.replace(/\/+$/, '');
154
+ }
155
+ catch {
156
+ console.error(`Error: --base-url is not a valid URL ("${baseUrl}")`);
157
+ process.exit(1);
158
+ }
159
+ // Validate --output stays within project directory
160
+ if (outputDir) {
161
+ const resolved = resolve(outputDir);
162
+ const cwd = process.cwd();
163
+ const normalizedCwd = cwd.endsWith(sep) ? cwd : cwd + sep;
164
+ if (resolved !== cwd && !resolved.startsWith(normalizedCwd)) {
165
+ console.error(`Error: --output "${outputDir}" resolves outside the project directory`);
166
+ process.exit(1);
167
+ }
168
+ }
169
+ return {
170
+ mode,
171
+ baseUrl,
172
+ since: since || 'origin/main',
173
+ huntTarget,
174
+ phase,
175
+ timeLimitMinutes,
176
+ budgetUSD,
177
+ headed,
178
+ testsRoot,
179
+ project,
180
+ outputDir,
181
+ };
182
+ }
183
+ async function main() {
184
+ const config = parseCliArgs(process.argv.slice(2));
185
+ if (!config) {
186
+ process.exit(0);
187
+ }
188
+ const report = await runQAAgent(config);
189
+ // Exit code based on verdict
190
+ switch (report.verdict.decision) {
191
+ case 'go':
192
+ process.exit(0);
193
+ break;
194
+ case 'conditional':
195
+ process.exit(1);
196
+ break;
197
+ case 'no-go':
198
+ process.exit(2);
199
+ break;
200
+ }
201
+ }
202
+ main().catch((error) => {
203
+ console.error(error instanceof Error ? error.message : String(error));
204
+ process.exit(1);
205
+ });
@@ -0,0 +1,120 @@
1
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
+ // See LICENSE.txt for license information.
3
+ import { execFileSync } from 'child_process';
4
+ import { mkdirSync } from 'fs';
5
+ import { logger } from '../logger.js';
6
+ import { runPhase1 } from './phase1/runner.js';
7
+ import { runAgentLoop } from './phase2/agent_loop.js';
8
+ import { computeVerdict } from './phase3/verdict.js';
9
+ import { generateReport } from './phase3/reporter.js';
10
+ import { generateSpecsForFindings } from './phase3/spec_generator.js';
11
+ import { submitFeedback } from './phase3/feedback.js';
12
+ function emptyPhase2Result() {
13
+ return { findings: [], flowsExplored: [], actionsCount: 0, tokensUsed: 0, costUSD: 0, durationMs: 0 };
14
+ }
15
+ export async function runQAAgent(inputConfig) {
16
+ const outputDir = inputConfig.outputDir || '.e2e-ai-agents';
17
+ const screenshotDir = inputConfig.screenshotDir || `${outputDir}/qa-screenshots`;
18
+ mkdirSync(screenshotDir, { recursive: true });
19
+ const config = { ...inputConfig, outputDir, screenshotDir };
20
+ // -----------------------------------------------------------------------
21
+ // Phase 1: Scripted (scope resolution + run matched specs)
22
+ // -----------------------------------------------------------------------
23
+ logger.info('=== Phase 1: Scope & Scripted Tests ===');
24
+ let phase1;
25
+ if (config.phase && config.phase > 1) {
26
+ // Skip Phase 1 — provide empty results
27
+ phase1 = { flows: [], specResults: [] };
28
+ }
29
+ else {
30
+ phase1 = runPhase1(config);
31
+ }
32
+ if (phase1.flows.length === 0 && phase1.specResults.length === 0 && !(config.phase && config.phase > 1)) {
33
+ logger.warn('Phase 1 produced no flows and no spec results — scoping may have failed. Check that route-families.json and plan.json are available.');
34
+ }
35
+ logger.info('Phase 1 complete', {
36
+ flows: phase1.flows.length,
37
+ specResults: phase1.specResults.length,
38
+ });
39
+ if (config.phase === 1) {
40
+ return earlyReturn(config, phase1);
41
+ }
42
+ // -----------------------------------------------------------------------
43
+ // Phase 2: Autonomous exploration (LLM + agent-browser)
44
+ // -----------------------------------------------------------------------
45
+ logger.info('=== Phase 2: Autonomous Exploration ===');
46
+ // Verify agent-browser is available before starting the exploration loop
47
+ if (!(config.phase && config.phase > 2)) {
48
+ try {
49
+ execFileSync('agent-browser', ['--version'], { encoding: 'utf-8', timeout: 5000 });
50
+ }
51
+ catch {
52
+ logger.error('agent-browser CLI not found. Install it (>= 0.18.0) or skip Phase 2 with --phase 1.');
53
+ return earlyReturn(config, phase1);
54
+ }
55
+ }
56
+ let phase2;
57
+ if (config.phase && config.phase > 2) {
58
+ phase2 = emptyPhase2Result();
59
+ }
60
+ else {
61
+ const flows = phase1.flows.length > 0
62
+ ? phase1.flows
63
+ : [{ id: 'main', name: 'Main application', priority: 'P1' }];
64
+ // In fix mode, limit Phase 2 to verification only
65
+ const phase2Config = config.mode === 'fix'
66
+ ? { ...config, timeLimitMinutes: Math.min(config.timeLimitMinutes ?? 15, 5) }
67
+ : config;
68
+ phase2 = await runAgentLoop(phase2Config, flows);
69
+ }
70
+ logger.info('Phase 2 complete', {
71
+ findings: phase2.findings.length,
72
+ flowsExplored: phase2.flowsExplored.length,
73
+ cost: `$${phase2.costUSD.toFixed(4)}`,
74
+ });
75
+ if (config.phase === 2) {
76
+ return earlyReturn(config, phase1, phase2);
77
+ }
78
+ // -----------------------------------------------------------------------
79
+ // Phase 3: Report + Spec Generation + Verdict
80
+ // -----------------------------------------------------------------------
81
+ logger.info('=== Phase 3: Report & Verdict ===');
82
+ // Generate specs for discovered bugs
83
+ const generatedSpecs = generateSpecsForFindings(phase2.findings, config);
84
+ // Compute verdict
85
+ const verdict = computeVerdict(phase1, phase2);
86
+ // Generate report
87
+ const phase3 = generateReport(config, phase1, phase2, verdict, generatedSpecs);
88
+ // Submit feedback
89
+ try {
90
+ submitFeedback(config);
91
+ }
92
+ catch (err) {
93
+ logger.warn('Feedback submission failed', { error: String(err) });
94
+ }
95
+ logger.info(`=== QA Agent Complete: ${verdict.decision.toUpperCase()} ===`);
96
+ logger.info(verdict.reason);
97
+ return buildQAReport(config, phase1, phase2, phase3, verdict);
98
+ }
99
+ function earlyReturn(config, phase1, phase2) {
100
+ const p2 = phase2 || emptyPhase2Result();
101
+ const verdict = computeVerdict(phase1, p2);
102
+ const phase3 = generateReport(config, phase1, p2, verdict, []);
103
+ return buildQAReport(config, phase1, p2, phase3, verdict);
104
+ }
105
+ function buildQAReport(config, phase1, phase2, phase3, verdict) {
106
+ return {
107
+ schemaVersion: '1.0.0',
108
+ generatedAt: new Date().toISOString(),
109
+ mode: config.mode,
110
+ config: {
111
+ baseUrl: config.baseUrl,
112
+ timeLimitMinutes: config.timeLimitMinutes,
113
+ budgetUSD: config.budgetUSD,
114
+ },
115
+ phase1,
116
+ phase2,
117
+ phase3,
118
+ verdict,
119
+ };
120
+ }
@@ -0,0 +1,139 @@
1
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
+ // See LICENSE.txt for license information.
3
+ import { spawnSync } from 'child_process';
4
+ import { existsSync, readdirSync } from 'fs';
5
+ import { join } from 'path';
6
+ import { logger } from '../../logger.js';
7
+ import { safeEnv } from '../safe_env.js';
8
+ import { resolveScope } from './scope.js';
9
+ export function runPhase1(config) {
10
+ const { flows, specPaths } = resolveScope(config);
11
+ logger.info('Phase 1: Scope resolved', {
12
+ flows: flows.length,
13
+ specDirs: specPaths.length,
14
+ mode: config.mode,
15
+ });
16
+ // Run e2e-agents CLI for impact/plan if we have a since ref
17
+ if (config.since && config.mode !== 'release') {
18
+ runE2eAgentsCli(config);
19
+ }
20
+ // Run matched Playwright specs
21
+ const specResults = runMatchedSpecs(specPaths, config);
22
+ return {
23
+ flows,
24
+ specResults,
25
+ planPath: config.testsRoot
26
+ ? join(config.testsRoot, '.e2e-ai-agents', 'plan.json')
27
+ : undefined,
28
+ };
29
+ }
30
+ function runE2eAgentsCli(config) {
31
+ const args = ['e2e-ai-agents'];
32
+ switch (config.mode) {
33
+ case 'pr':
34
+ args.push('plan');
35
+ if (config.since)
36
+ args.push('--since', config.since);
37
+ break;
38
+ case 'hunt':
39
+ args.push('impact');
40
+ if (config.huntTarget)
41
+ args.push('--flow-patterns', config.huntTarget);
42
+ if (config.since)
43
+ args.push('--since', config.since);
44
+ break;
45
+ case 'fix':
46
+ args.push('heal');
47
+ break;
48
+ default:
49
+ return;
50
+ }
51
+ if (config.testsRoot) {
52
+ args.push('--tests-root', config.testsRoot);
53
+ }
54
+ logger.info('Running e2e-ai-agents', { args: args.slice(1) });
55
+ const result = spawnSync('npx', args, {
56
+ cwd: config.testsRoot || process.cwd(),
57
+ encoding: 'utf-8',
58
+ timeout: 120000,
59
+ maxBuffer: 2 * 1024 * 1024,
60
+ env: safeEnv(),
61
+ });
62
+ // Exit code 2 = "no changes detected" from e2e-agents CLI, not an error
63
+ if (result.status !== 0 && result.status !== 2) {
64
+ logger.warn('e2e-agents exited with non-zero status', {
65
+ status: result.status,
66
+ stderr: (result.stderr || '').slice(0, 500),
67
+ });
68
+ }
69
+ }
70
+ function runMatchedSpecs(specPaths, config) {
71
+ const results = [];
72
+ const specFiles = collectSpecFiles(specPaths);
73
+ if (specFiles.length === 0) {
74
+ logger.info('No spec files found to run');
75
+ return results;
76
+ }
77
+ logger.info('Running matched specs', { count: specFiles.length });
78
+ for (const specFile of specFiles) {
79
+ const result = runSingleSpec(specFile, config);
80
+ results.push(result);
81
+ }
82
+ return results;
83
+ }
84
+ function collectSpecFiles(specPaths) {
85
+ const files = [];
86
+ for (const p of specPaths) {
87
+ if (!existsSync(p))
88
+ continue;
89
+ try {
90
+ const entries = readdirSync(p, { recursive: true, encoding: 'utf-8' });
91
+ for (const entry of entries) {
92
+ if (typeof entry === 'string' && (entry.endsWith('.spec.ts') || entry.endsWith('.test.ts'))) {
93
+ files.push(join(p, entry));
94
+ }
95
+ }
96
+ }
97
+ catch {
98
+ // Skip unreadable directories
99
+ }
100
+ }
101
+ return files;
102
+ }
103
+ function runSingleSpec(specPath, config) {
104
+ const args = [
105
+ 'playwright', 'test',
106
+ specPath,
107
+ '--reporter', 'json',
108
+ ];
109
+ if (config.project) {
110
+ args.push('--project', config.project);
111
+ }
112
+ const result = spawnSync('npx', args, {
113
+ cwd: config.testsRoot || process.cwd(),
114
+ encoding: 'utf-8',
115
+ timeout: 120000,
116
+ maxBuffer: 2 * 1024 * 1024,
117
+ env: safeEnv(config.baseUrl ? { BASE_URL: config.baseUrl } : {}),
118
+ });
119
+ // Try to parse JSON output
120
+ try {
121
+ const report = JSON.parse(result.stdout || '{}');
122
+ return {
123
+ specPath,
124
+ passed: report.stats?.expected || 0,
125
+ failed: report.stats?.unexpected || 0,
126
+ flaky: report.stats?.flaky || 0,
127
+ skipped: report.stats?.skipped || 0,
128
+ };
129
+ }
130
+ catch {
131
+ return {
132
+ specPath,
133
+ passed: result.status === 0 ? 1 : 0,
134
+ failed: result.status === 0 ? 0 : 1,
135
+ flaky: 0,
136
+ skipped: 0,
137
+ };
138
+ }
139
+ }