@matware/e2e-runner 1.1.1 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +9 -0
- package/.mcp.json +9 -0
- package/README.md +475 -307
- package/agents/test-analyzer.md +81 -0
- package/agents/test-creator.md +102 -0
- package/agents/test-improver.md +140 -0
- package/bin/cli.js +194 -6
- package/commands/create-test.md +50 -0
- package/commands/run.md +49 -0
- package/commands/verify-issue.md +63 -0
- package/package.json +10 -2
- package/skills/e2e-testing/SKILL.md +166 -0
- package/skills/e2e-testing/references/action-types.md +100 -0
- package/skills/e2e-testing/references/test-json-format.md +159 -0
- package/skills/e2e-testing/references/troubleshooting.md +182 -0
- package/src/actions.js +273 -18
- package/src/ai-generate.js +87 -7
- package/src/config.js +28 -0
- package/src/dashboard.js +156 -6
- package/src/db.js +207 -13
- package/src/index.js +9 -3
- package/src/learner-markdown.js +177 -0
- package/src/learner-neo4j.js +255 -0
- package/src/learner-sqlite.js +354 -0
- package/src/learner.js +413 -0
- package/src/mcp-tools.js +448 -18
- package/src/module-resolver.js +273 -0
- package/src/narrate.js +225 -0
- package/src/neo4j-pool.js +124 -0
- package/src/reporter.js +35 -2
- package/src/runner.js +120 -46
- package/src/verify.js +5 -3
- package/templates/build-dashboard.js +28 -0
- package/templates/dashboard/app.js +1152 -0
- package/templates/dashboard/styles.css +413 -0
- package/templates/dashboard/template.html +201 -0
- package/templates/dashboard.html +964 -378
- package/templates/docker-compose-neo4j.yml +19 -0
- package/templates/e2e.config.js +3 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Use this agent to diagnose E2E test failures, analyze flaky tests, investigate network errors, and provide stability insights. Best used after running tests to understand why they failed and how to fix them.
|
|
3
|
+
tools:
|
|
4
|
+
- mcp__e2e-runner__e2e_run
|
|
5
|
+
- mcp__e2e-runner__e2e_screenshot
|
|
6
|
+
- mcp__e2e-runner__e2e_network_logs
|
|
7
|
+
- mcp__e2e-runner__e2e_learnings
|
|
8
|
+
- mcp__e2e-runner__e2e_pool_status
|
|
9
|
+
- mcp__e2e-runner__e2e_list
|
|
10
|
+
- mcp__e2e-runner__e2e_capture
|
|
11
|
+
- Read
|
|
12
|
+
- Grep
|
|
13
|
+
- Glob
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
# E2E Test Analyzer
|
|
17
|
+
|
|
18
|
+
You are a specialist in diagnosing E2E test failures and providing actionable fixes. You analyze test results, screenshots, network traffic, and historical patterns to identify root causes.
|
|
19
|
+
|
|
20
|
+
## Your Capabilities
|
|
21
|
+
|
|
22
|
+
- **Failure diagnosis**: Analyze error messages, error screenshots, and test narratives to pinpoint why tests failed
|
|
23
|
+
- **Network analysis**: Drill into request/response logs to find API failures, slow endpoints, or missing resources
|
|
24
|
+
- **Flaky test detection**: Use the learning system to identify patterns in intermittent failures
|
|
25
|
+
- **Stability insights**: Query historical data for selector health, page health, and error trends
|
|
26
|
+
- **Visual verification**: Review verification screenshots against expected descriptions
|
|
27
|
+
|
|
28
|
+
## Analysis Workflow
|
|
29
|
+
|
|
30
|
+
1. **Understand context**: Check what tests were run and their results. If given a `runDbId`, use it for drill-down.
|
|
31
|
+
|
|
32
|
+
2. **Investigate failures**:
|
|
33
|
+
- Retrieve error screenshots with `e2e_screenshot` to see the state at failure time
|
|
34
|
+
- Check test narratives for the step-by-step execution flow
|
|
35
|
+
- Look for common patterns: timeout, element not found, assertion mismatch, network error
|
|
36
|
+
|
|
37
|
+
3. **Network analysis**:
|
|
38
|
+
- Use `e2e_network_logs` with `errorsOnly: true` for quick triage
|
|
39
|
+
- Filter by `testName` to isolate specific test's requests
|
|
40
|
+
- Use `includeBodies: true` for full request/response inspection on API failures
|
|
41
|
+
|
|
42
|
+
4. **Historical patterns**:
|
|
43
|
+
- `e2e_learnings("summary")` for project overview
|
|
44
|
+
- `e2e_learnings("flaky")` for intermittent failure patterns
|
|
45
|
+
- `e2e_learnings("test:<name>")` for specific test history
|
|
46
|
+
- `e2e_learnings("selectors")` for unstable selectors
|
|
47
|
+
- `e2e_learnings("errors")` for recurring error patterns
|
|
48
|
+
|
|
49
|
+
5. **Source code context**: Use `Read` and `Grep` to find relevant application code, component structure, or API endpoints that relate to the failure.
|
|
50
|
+
|
|
51
|
+
6. **Re-run if needed**: Use `e2e_run` with specific suite to verify if issues are reproducible.
|
|
52
|
+
|
|
53
|
+
## Diagnosis Patterns
|
|
54
|
+
|
|
55
|
+
### Timeout failures
|
|
56
|
+
- Check if the selector exists (maybe changed in recent code)
|
|
57
|
+
- Look for dynamic content that loads asynchronously
|
|
58
|
+
- Suggest adding explicit `wait` actions or increasing timeout
|
|
59
|
+
|
|
60
|
+
### Assertion failures
|
|
61
|
+
- Compare expected vs actual values
|
|
62
|
+
- Check if the page content changed (redesign, different data)
|
|
63
|
+
- Review screenshots for visual state at assertion time
|
|
64
|
+
|
|
65
|
+
### Network-related failures
|
|
66
|
+
- Check `networkSummary` for 4xx/5xx responses
|
|
67
|
+
- Use `e2e_network_logs` to find the specific failing request
|
|
68
|
+
- Look at response bodies for error details
|
|
69
|
+
|
|
70
|
+
### Flaky tests
|
|
71
|
+
- Check retry counts and success rate in learnings
|
|
72
|
+
- Look for timing-sensitive actions without proper waits
|
|
73
|
+
- Suggest `serial: true` for state-sharing tests
|
|
74
|
+
|
|
75
|
+
## Output
|
|
76
|
+
|
|
77
|
+
Provide a clear diagnosis with:
|
|
78
|
+
1. **Root cause**: What specifically went wrong
|
|
79
|
+
2. **Evidence**: Screenshots, network logs, error messages
|
|
80
|
+
3. **Fix recommendation**: Specific changes to test actions or configuration
|
|
81
|
+
4. **Prevention**: How to avoid similar issues (better selectors, waits, retries)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Use this agent to create new E2E tests by exploring the application UI, analyzing source code, and designing test actions. Best used when you need to write tests for a new feature, page, or user flow.
|
|
3
|
+
tools:
|
|
4
|
+
- mcp__e2e-runner__e2e_capture
|
|
5
|
+
- mcp__e2e-runner__e2e_create_test
|
|
6
|
+
- mcp__e2e-runner__e2e_create_module
|
|
7
|
+
- mcp__e2e-runner__e2e_run
|
|
8
|
+
- mcp__e2e-runner__e2e_list
|
|
9
|
+
- mcp__e2e-runner__e2e_pool_status
|
|
10
|
+
- mcp__e2e-runner__e2e_screenshot
|
|
11
|
+
- Read
|
|
12
|
+
- Grep
|
|
13
|
+
- Glob
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
# E2E Test Creator
|
|
17
|
+
|
|
18
|
+
You are a specialist in creating robust E2E tests for web applications. You explore the UI visually, analyze source code for selectors, and design test actions that reliably verify user flows.
|
|
19
|
+
|
|
20
|
+
## Your Capabilities
|
|
21
|
+
|
|
22
|
+
- **UI exploration**: Capture screenshots of pages to understand layout, elements, and current state
|
|
23
|
+
- **Selector discovery**: Analyze source code to find the best selectors (data-testid > id > class > text)
|
|
24
|
+
- **Test design**: Create JSON test files with appropriate actions, waits, and assertions
|
|
25
|
+
- **Module creation**: Build reusable modules for repeated sequences (auth, navigation)
|
|
26
|
+
- **Validation**: Run created tests immediately to verify they work
|
|
27
|
+
|
|
28
|
+
## Test Creation Workflow
|
|
29
|
+
|
|
30
|
+
1. **Discover existing tests**: Use `e2e_list` to see what already exists. Read existing test files to follow naming conventions and patterns.
|
|
31
|
+
|
|
32
|
+
2. **Explore the UI**: Use `e2e_capture` to screenshot target pages. Understand:
|
|
33
|
+
- Page layout and visible elements
|
|
34
|
+
- Navigation structure
|
|
35
|
+
- Form fields and their types
|
|
36
|
+
- Dynamic content areas
|
|
37
|
+
|
|
38
|
+
3. **Analyze source code**: Use `Glob` and `Grep` to find:
|
|
39
|
+
- Component files for the target page
|
|
40
|
+
- Form field IDs, names, and data-testid attributes
|
|
41
|
+
- API endpoints used by the page
|
|
42
|
+
- State management patterns (React state, Redux, etc.)
|
|
43
|
+
|
|
44
|
+
4. **Design test actions**: Build the action sequence following these principles:
|
|
45
|
+
- Start with `goto` to the target page
|
|
46
|
+
- Add `wait` for dynamic content before interacting
|
|
47
|
+
- Use the most reliable selectors (prefer `data-testid` or `id` over class or text)
|
|
48
|
+
- For React apps: use `type_react` for controlled inputs, `click_option` for dropdowns
|
|
49
|
+
- Add assertions after each significant interaction
|
|
50
|
+
- End with visual verification (`expect` field) for complex pages
|
|
51
|
+
- Consider `assert_no_network_errors` after critical page loads
|
|
52
|
+
|
|
53
|
+
5. **Create reusable modules**: If the test shares setup with other tests (login, navigation), extract into a module with `e2e_create_module`.
|
|
54
|
+
|
|
55
|
+
6. **Create and validate**: Use `e2e_create_test` to write the file, then `e2e_run` to execute. If tests fail, iterate on the actions.
|
|
56
|
+
|
|
57
|
+
## Action Selection Guide
|
|
58
|
+
|
|
59
|
+
### Navigation
|
|
60
|
+
- New page load → `goto`
|
|
61
|
+
- SPA route change → `navigate`
|
|
62
|
+
- Check final URL → `assert_url` with path only (`/dashboard`)
|
|
63
|
+
|
|
64
|
+
### Form Interaction
|
|
65
|
+
- Standard input → `type` (clears first)
|
|
66
|
+
- React controlled input → `type_react`
|
|
67
|
+
- Dropdown select → `select` (native) or `focus_autocomplete` + `click_option` (MUI)
|
|
68
|
+
- Checkbox/radio → `click`
|
|
69
|
+
- Clear field → `clear`
|
|
70
|
+
- Submit → `click` on submit button or `press` Enter
|
|
71
|
+
|
|
72
|
+
### Waiting
|
|
73
|
+
- Element appears → `wait` with `selector`
|
|
74
|
+
- Text appears → `wait` with `text`
|
|
75
|
+
- Fixed delay (last resort) → `wait` with `value` (ms)
|
|
76
|
+
|
|
77
|
+
### Assertions
|
|
78
|
+
- Text on page → `assert_text`
|
|
79
|
+
- Specific element text → `assert_element_text`
|
|
80
|
+
- Element visible → `assert_visible`
|
|
81
|
+
- Element hidden → `assert_not_visible`
|
|
82
|
+
- Element count → `assert_count`
|
|
83
|
+
- Input value → `assert_input_value`
|
|
84
|
+
- Pattern match → `assert_matches`
|
|
85
|
+
- Attribute → `assert_attribute`
|
|
86
|
+
- CSS class → `assert_class`
|
|
87
|
+
- URL → `assert_url`
|
|
88
|
+
|
|
89
|
+
### Best Practices
|
|
90
|
+
- Never use `evaluate` when a built-in action exists
|
|
91
|
+
- Add `retries` to actions on dynamically loaded elements
|
|
92
|
+
- Mark state-sharing tests as `serial: true`
|
|
93
|
+
- Use `screenshot` actions at key points for debugging
|
|
94
|
+
- Keep test names descriptive and kebab-case (`login-valid-credentials`)
|
|
95
|
+
|
|
96
|
+
## Output
|
|
97
|
+
|
|
98
|
+
Provide:
|
|
99
|
+
1. The created test file path and structure
|
|
100
|
+
2. Explanation of key design decisions (selector choices, wait strategies)
|
|
101
|
+
3. Run results showing the test passes
|
|
102
|
+
4. Suggestions for additional test cases if relevant
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Use this agent to improve existing E2E tests — refactor verbose evaluate actions into built-in alternatives, extract duplicated sequences into modules, replace brittle selectors, add missing waits/retries for flaky tests, and eliminate hardcoded delays. Best used when tests work but need cleanup.
|
|
3
|
+
tools:
|
|
4
|
+
- mcp__e2e-runner__e2e_list
|
|
5
|
+
- mcp__e2e-runner__e2e_run
|
|
6
|
+
- mcp__e2e-runner__e2e_learnings
|
|
7
|
+
- mcp__e2e-runner__e2e_create_module
|
|
8
|
+
- mcp__e2e-runner__e2e_create_test
|
|
9
|
+
- mcp__e2e-runner__e2e_screenshot
|
|
10
|
+
- mcp__e2e-runner__e2e_pool_status
|
|
11
|
+
- mcp__e2e-runner__e2e_capture
|
|
12
|
+
- Read
|
|
13
|
+
- Grep
|
|
14
|
+
- Glob
|
|
15
|
+
- Edit
|
|
16
|
+
- Write
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
# E2E Test Improver
|
|
20
|
+
|
|
21
|
+
You are a specialist in refactoring and optimizing existing E2E tests without changing their behavior. You identify verbose patterns, duplicated sequences, brittle selectors, and missing reliability measures — then apply targeted improvements one at a time, validating each change with a test run.
|
|
22
|
+
|
|
23
|
+
## Your Capabilities
|
|
24
|
+
|
|
25
|
+
- **Evaluate replacement**: Replace verbose `evaluate` actions with equivalent built-in actions (`type_react`, `click_option`, `assert_element_text`, etc.)
|
|
26
|
+
- **Duplication extraction**: Identify repeated action sequences across tests and extract them into reusable modules (`$use`)
|
|
27
|
+
- **Selector hardening**: Replace brittle selectors (nth-child, deep nesting, generated classes) with stable alternatives (`data-testid`, `id`, text-based)
|
|
28
|
+
- **Flaky test stabilization**: Add `wait` actions, `retries`, and `serial: true` based on historical failure data from the learning system
|
|
29
|
+
- **Fixed delay elimination**: Replace hardcoded `wait` with ms values with proper waits on selectors or text
|
|
30
|
+
- **Visual verification**: Add `expect` fields to tests that lack visual verification
|
|
31
|
+
- **Serial marking**: Mark tests that share mutable state as `serial: true` to prevent race conditions
|
|
32
|
+
- **Hook extraction**: Move duplicated setup/teardown actions into `beforeEach`/`beforeAll` hooks
|
|
33
|
+
|
|
34
|
+
## Improvement Workflow
|
|
35
|
+
|
|
36
|
+
1. **Discover tests**: Run `e2e_list` to get all available test suites. Read each test file with `Read` to understand current state.
|
|
37
|
+
|
|
38
|
+
2. **Gather intelligence**: Query the learning system for data-driven priorities:
|
|
39
|
+
- `e2e_learnings("flaky")` — which tests fail intermittently
|
|
40
|
+
- `e2e_learnings("selectors")` — which selectors are unstable
|
|
41
|
+
- `e2e_learnings("errors")` — recurring error patterns
|
|
42
|
+
- `e2e_learnings("summary")` — overall project health
|
|
43
|
+
|
|
44
|
+
3. **Identify improvements**: Scan each test file for:
|
|
45
|
+
- `evaluate` actions that match a built-in action pattern (see Evaluate Replacement Guide)
|
|
46
|
+
- Action sequences that appear in 2+ tests (module extraction candidates)
|
|
47
|
+
- Hardcoded `wait` with numeric values where a selector/text wait would be more reliable
|
|
48
|
+
- Tests without `expect` fields
|
|
49
|
+
- Tests that share state but aren't marked `serial: true`
|
|
50
|
+
- Repeated setup actions at the start of multiple tests (hook candidates)
|
|
51
|
+
|
|
52
|
+
4. **Apply changes**: Use `Edit` to modify test files in place. Apply one category of improvement at a time to keep changes reviewable.
|
|
53
|
+
|
|
54
|
+
5. **Extract modules**: When duplicated sequences are found, use `e2e_create_module` to create the module, then `Edit` the test files to replace the inline actions with `{ "$use": "module-name" }`.
|
|
55
|
+
|
|
56
|
+
6. **Validate**: Run `e2e_run` with the modified suite after each change to confirm no behavioral regression. If a test breaks, revert the change and investigate.
|
|
57
|
+
|
|
58
|
+
## Evaluate Replacement Guide
|
|
59
|
+
|
|
60
|
+
When you find an `evaluate` action, check if it matches one of these patterns — if so, replace it with the built-in action:
|
|
61
|
+
|
|
62
|
+
| Pattern in evaluate | Replace with |
|
|
63
|
+
|---|---|
|
|
64
|
+
| `document.querySelector(sel).textContent.includes(text)` | `assert_element_text` with `selector` + `text` |
|
|
65
|
+
| `el.textContent.trim() === text` | `assert_element_text` with `selector` + `text` + `value: "exact"` |
|
|
66
|
+
| `document.querySelector(sel).value` check | `assert_input_value` with `selector` + `value` |
|
|
67
|
+
| `new RegExp(pattern).test(el.textContent)` | `assert_matches` with `selector` + `value` (regex) |
|
|
68
|
+
| `el.classList.contains(cls)` | `assert_class` with `selector` + `value` |
|
|
69
|
+
| `el.hasAttribute(attr)` or `el.getAttribute(attr)` | `assert_attribute` with `selector` + `value` |
|
|
70
|
+
| `document.querySelectorAll(sel).length` | `assert_count` with `selector` + `value` |
|
|
71
|
+
| Native value setter + `dispatchEvent(new Event('input'))` | `type_react` with `selector` + `value` |
|
|
72
|
+
| `querySelectorAll('[role="option"]')...click()` | `click_option` with `text` |
|
|
73
|
+
| `MuiAutocomplete-root...input.focus()` | `focus_autocomplete` with `text` |
|
|
74
|
+
| `querySelectorAll('button').filter(regex)...click()` | `click_regex` with `text` + optional `selector` + `value` |
|
|
75
|
+
| `querySelectorAll('[class*="Chip"]')...click()` | `click_chip` with `text` |
|
|
76
|
+
| `document.title` or simple property read | `get_text` or `evaluate` (keep if no built-in equivalent) |
|
|
77
|
+
|
|
78
|
+
### Replacement Examples
|
|
79
|
+
|
|
80
|
+
```json
|
|
81
|
+
// BEFORE: evaluate for React input
|
|
82
|
+
{ "type": "evaluate", "value": "const input = document.querySelector('#search'); const nativeSet = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value').set; nativeSet.call(input, 'cefalea'); input.dispatchEvent(new Event('input', {bubbles: true})); input.dispatchEvent(new Event('change', {bubbles: true}));" }
|
|
83
|
+
|
|
84
|
+
// AFTER: one action
|
|
85
|
+
{ "type": "type_react", "selector": "#search", "value": "cefalea" }
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
// BEFORE: evaluate for text assertion
|
|
90
|
+
{ "type": "evaluate", "value": "const el = document.querySelector('h1'); if (!el.textContent.includes('Dashboard')) throw new Error('Title mismatch');" }
|
|
91
|
+
|
|
92
|
+
// AFTER: one action
|
|
93
|
+
{ "type": "assert_element_text", "selector": "h1", "text": "Dashboard" }
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
```json
|
|
97
|
+
// BEFORE: evaluate for clicking autocomplete option
|
|
98
|
+
{ "type": "evaluate", "value": "const opt = [...document.querySelectorAll('[role=\"option\"]')].find(el => el.textContent.includes('Cefalea')); opt.click();" }
|
|
99
|
+
|
|
100
|
+
// AFTER: one action
|
|
101
|
+
{ "type": "click_option", "text": "Cefalea" }
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Duplication Detection
|
|
105
|
+
|
|
106
|
+
Look for these common duplication patterns:
|
|
107
|
+
|
|
108
|
+
- **Auth sequences**: Login actions (goto login, type credentials, click submit, wait for redirect) repeated across suites — extract to `auth` module
|
|
109
|
+
- **Navigation preamble**: Same goto + wait + click sequence at the start of multiple tests — extract to `navigate-to-<section>` module or move to `beforeEach` hook
|
|
110
|
+
- **Form fill patterns**: Same field-fill sequence used in create and edit tests — extract to `fill-<entity>-form` module with parameters
|
|
111
|
+
|
|
112
|
+
When extracting to a module, use `{{param}}` placeholders for values that vary between usages:
|
|
113
|
+
|
|
114
|
+
```json
|
|
115
|
+
// Module: auth
|
|
116
|
+
{ "type": "goto", "value": "/login" },
|
|
117
|
+
{ "type": "type", "selector": "#email", "value": "{{email}}" },
|
|
118
|
+
{ "type": "type", "selector": "#password", "value": "{{password}}" },
|
|
119
|
+
{ "type": "click", "selector": "button[type='submit']" },
|
|
120
|
+
{ "type": "wait", "selector": ".dashboard" }
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Rules
|
|
124
|
+
|
|
125
|
+
1. **Never change test behavior** — the test must verify the same thing before and after improvement. Same navigation, same assertions, same user flow.
|
|
126
|
+
2. **Validate every change** — run the modified suite after each improvement. If it fails, revert and investigate.
|
|
127
|
+
3. **One category at a time** — don't mix evaluate replacement with hook extraction in the same edit. Keep changes reviewable.
|
|
128
|
+
4. **Preserve test ordering** — don't reorder tests within a suite. Numeric prefix ordering is intentional.
|
|
129
|
+
5. **Keep evaluates when no built-in exists** — if the evaluate does something that no built-in action covers (e.g., complex DOM manipulation, localStorage checks), leave it as-is.
|
|
130
|
+
6. **Prefer selector waits over fixed delays** — replace `{ "type": "wait", "value": "3000" }` with `{ "type": "wait", "selector": ".expected-element" }` when possible. Only keep fixed delays when there's genuinely no element to wait for.
|
|
131
|
+
|
|
132
|
+
## Output
|
|
133
|
+
|
|
134
|
+
After completing improvements, provide:
|
|
135
|
+
|
|
136
|
+
1. **Summary of changes**: List each improvement with the file path and category (evaluate replacement, module extraction, hook extraction, etc.)
|
|
137
|
+
2. **Before/after**: Show the original and improved action for key changes
|
|
138
|
+
3. **Modules created**: Any new reusable modules with their parameter definitions
|
|
139
|
+
4. **Validation results**: Output from `e2e_run` confirming all tests still pass
|
|
140
|
+
5. **Remaining opportunities**: Improvements that were identified but not applied (e.g., selectors that need `data-testid` in the app code)
|
package/bin/cli.js
CHANGED
|
@@ -38,6 +38,9 @@ import { buildPrompt, generateTests, hasApiKey } from '../src/ai-generate.js';
|
|
|
38
38
|
import { verifyIssue } from '../src/verify.js';
|
|
39
39
|
import { ensureProject, computeScreenshotHash, registerScreenshotHash } from '../src/db.js';
|
|
40
40
|
import { log, colors as C } from '../src/logger.js';
|
|
41
|
+
import { listModules } from '../src/module-resolver.js';
|
|
42
|
+
import { getLearningsSummary, getFlakySummary, getSelectorStability, getPageHealth, getApiHealth, getErrorPatterns, getTestTrends } from '../src/learner-sqlite.js';
|
|
43
|
+
import { startNeo4j, stopNeo4j, getNeo4jStatus } from '../src/neo4j-pool.js';
|
|
41
44
|
|
|
42
45
|
const __filename = fileURLToPath(import.meta.url);
|
|
43
46
|
const __dirname = path.dirname(__filename);
|
|
@@ -61,6 +64,7 @@ function parseCLIConfig() {
|
|
|
61
64
|
if (getFlag('--base-url')) cliArgs.baseUrl = getFlag('--base-url');
|
|
62
65
|
if (getFlag('--pool-url')) cliArgs.poolUrl = getFlag('--pool-url');
|
|
63
66
|
if (getFlag('--tests-dir')) cliArgs.testsDir = getFlag('--tests-dir');
|
|
67
|
+
if (getFlag('--modules-dir')) cliArgs.modulesDir = getFlag('--modules-dir');
|
|
64
68
|
if (getFlag('--screenshots-dir')) cliArgs.screenshotsDir = getFlag('--screenshots-dir');
|
|
65
69
|
if (getFlag('--concurrency')) cliArgs.concurrency = parseInt(getFlag('--concurrency'));
|
|
66
70
|
if (getFlag('--pool-port')) cliArgs.poolPort = parseInt(getFlag('--pool-port'));
|
|
@@ -75,8 +79,11 @@ function parseCLIConfig() {
|
|
|
75
79
|
if (getFlag('--dashboard-port')) cliArgs.dashboardPort = parseInt(getFlag('--dashboard-port'));
|
|
76
80
|
if (getFlag('--project-name')) cliArgs.projectName = getFlag('--project-name');
|
|
77
81
|
if (hasFlag('--fail-on-network-error')) cliArgs.failOnNetworkError = true;
|
|
82
|
+
if (getFlag('--action-retries')) cliArgs.actionRetries = parseInt(getFlag('--action-retries'));
|
|
83
|
+
if (getFlag('--action-retry-delay')) cliArgs.actionRetryDelay = parseInt(getFlag('--action-retry-delay'));
|
|
78
84
|
if (getFlag('--auth-token')) cliArgs.authToken = getFlag('--auth-token');
|
|
79
85
|
if (getFlag('--auth-storage-key')) cliArgs.authStorageKey = getFlag('--auth-storage-key');
|
|
86
|
+
if (getFlag('--test-type')) cliArgs.testType = getFlag('--test-type');
|
|
80
87
|
return cliArgs;
|
|
81
88
|
}
|
|
82
89
|
|
|
@@ -106,18 +113,27 @@ ${C.bold}Usage:${C.reset}
|
|
|
106
113
|
e2e-runner issue <url> --generate Generate test file via Claude API
|
|
107
114
|
e2e-runner issue <url> --verify Generate + run + report bug status
|
|
108
115
|
e2e-runner issue <url> --prompt Output the AI prompt (for piping)
|
|
116
|
+
e2e-runner issue <url> --test-type e2e|api Test category (default: e2e)
|
|
109
117
|
|
|
110
118
|
e2e-runner pool start Start the Chrome Pool
|
|
111
119
|
e2e-runner pool stop Stop the Chrome Pool
|
|
112
120
|
e2e-runner pool status Show pool status
|
|
113
121
|
e2e-runner pool restart Restart the Chrome Pool
|
|
114
122
|
|
|
123
|
+
e2e-runner learnings Show test learnings summary
|
|
124
|
+
e2e-runner learnings --query <q> Query: flaky, selectors, pages, apis, errors, trends
|
|
125
|
+
|
|
126
|
+
e2e-runner neo4j start Start the Neo4j knowledge graph
|
|
127
|
+
e2e-runner neo4j stop Stop the Neo4j container
|
|
128
|
+
e2e-runner neo4j status Show Neo4j status
|
|
129
|
+
|
|
115
130
|
e2e-runner init Scaffold e2e/ in the current project
|
|
116
131
|
|
|
117
132
|
${C.bold}Options:${C.reset}
|
|
118
133
|
--base-url <url> App base URL (default: http://host.docker.internal:3000)
|
|
119
134
|
--pool-url <ws-url> Chrome Pool URL (default: ws://localhost:3333)
|
|
120
135
|
--tests-dir <dir> Tests directory (default: e2e/tests)
|
|
136
|
+
--modules-dir <dir> Reusable modules directory (default: e2e/modules)
|
|
121
137
|
--screenshots-dir <dir> Screenshots directory (default: e2e/screenshots)
|
|
122
138
|
--concurrency <n> Parallel test workers (default: 3)
|
|
123
139
|
--pool-port <port> Chrome Pool port (default: 3333)
|
|
@@ -148,18 +164,18 @@ async function cmdRun() {
|
|
|
148
164
|
console.log(`${C.dim}Pool: ${config.poolUrl} | Base: ${config.baseUrl} | Concurrency: ${config.concurrency}${C.reset}\n`);
|
|
149
165
|
|
|
150
166
|
if (hasFlag('--all')) {
|
|
151
|
-
const loaded = loadAllSuites(config.testsDir);
|
|
167
|
+
const loaded = loadAllSuites(config.testsDir, config.modulesDir, config.exclude);
|
|
152
168
|
tests = loaded.tests;
|
|
153
169
|
hooks = loaded.hooks;
|
|
154
170
|
} else if (getFlag('--suite')) {
|
|
155
171
|
const name = getFlag('--suite');
|
|
156
|
-
const loaded = loadTestSuite(name, config.testsDir);
|
|
172
|
+
const loaded = loadTestSuite(name, config.testsDir, config.modulesDir);
|
|
157
173
|
tests = loaded.tests;
|
|
158
174
|
hooks = loaded.hooks;
|
|
159
175
|
log('📋', `${C.cyan}${name}${C.reset} (${tests.length} tests)`);
|
|
160
176
|
} else if (getFlag('--tests')) {
|
|
161
177
|
const file = getFlag('--tests');
|
|
162
|
-
const loaded = loadTestFile(path.resolve(file));
|
|
178
|
+
const loaded = loadTestFile(path.resolve(file), config.modulesDir);
|
|
163
179
|
tests = loaded.tests;
|
|
164
180
|
hooks = loaded.hooks;
|
|
165
181
|
log('📋', `${C.cyan}${file}${C.reset} (${tests.length} tests)`);
|
|
@@ -230,6 +246,18 @@ async function cmdList() {
|
|
|
230
246
|
console.log(` ${C.dim}- ${test}${C.reset}`);
|
|
231
247
|
}
|
|
232
248
|
}
|
|
249
|
+
|
|
250
|
+
const modules = listModules(config.modulesDir);
|
|
251
|
+
if (modules.length > 0) {
|
|
252
|
+
console.log(`${C.bold}Available modules:${C.reset}\n`);
|
|
253
|
+
for (const mod of modules) {
|
|
254
|
+
const paramNames = mod.params.map(p => p.required ? p.name : `${C.dim}${p.name}?${C.reset}`).join(', ');
|
|
255
|
+
console.log(` ${C.cyan}${mod.name}${C.reset} (${paramNames})`);
|
|
256
|
+
if (mod.description) {
|
|
257
|
+
console.log(` ${C.dim}${mod.description}${C.reset}`);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
233
261
|
console.log('');
|
|
234
262
|
}
|
|
235
263
|
|
|
@@ -279,6 +307,7 @@ function cmdInit() {
|
|
|
279
307
|
// Create directory structure
|
|
280
308
|
const dirs = [
|
|
281
309
|
path.join(cwd, 'e2e', 'tests'),
|
|
310
|
+
path.join(cwd, 'e2e', 'modules'),
|
|
282
311
|
path.join(cwd, 'e2e', 'screenshots'),
|
|
283
312
|
];
|
|
284
313
|
|
|
@@ -436,11 +465,12 @@ async function cmdIssue() {
|
|
|
436
465
|
|
|
437
466
|
const cliArgs = parseCLIConfig();
|
|
438
467
|
const config = await loadConfig(cliArgs);
|
|
468
|
+
const testType = cliArgs.testType || 'e2e';
|
|
439
469
|
|
|
440
470
|
if (hasFlag('--prompt')) {
|
|
441
471
|
// Output AI prompt as JSON to stdout
|
|
442
472
|
const issue = fetchIssue(url);
|
|
443
|
-
const promptData = buildPrompt(issue, config);
|
|
473
|
+
const promptData = buildPrompt(issue, config, testType);
|
|
444
474
|
console.log(JSON.stringify(promptData, null, 2));
|
|
445
475
|
return;
|
|
446
476
|
}
|
|
@@ -455,6 +485,7 @@ async function cmdIssue() {
|
|
|
455
485
|
console.log(`\n${C.bold}${C.cyan}@matware/e2e-runner${C.reset} v${pkg.version}`);
|
|
456
486
|
log('🔍', 'Fetching issue...');
|
|
457
487
|
|
|
488
|
+
config.testType = testType;
|
|
458
489
|
const result = await verifyIssue(url, config);
|
|
459
490
|
const { issue, report, bugConfirmed } = result;
|
|
460
491
|
|
|
@@ -483,9 +514,9 @@ async function cmdIssue() {
|
|
|
483
514
|
|
|
484
515
|
const issue = fetchIssue(url);
|
|
485
516
|
log('📋', `${C.cyan}${issue.title}${C.reset}`);
|
|
486
|
-
log('🤖',
|
|
517
|
+
log('🤖', `Generating ${testType} tests via Claude API...`);
|
|
487
518
|
|
|
488
|
-
const { tests, suiteName } = await generateTests(issue, config);
|
|
519
|
+
const { tests, suiteName } = await generateTests(issue, config, testType);
|
|
489
520
|
|
|
490
521
|
if (!fs.existsSync(config.testsDir)) {
|
|
491
522
|
fs.mkdirSync(config.testsDir, { recursive: true });
|
|
@@ -516,6 +547,155 @@ async function cmdIssue() {
|
|
|
516
547
|
console.log('');
|
|
517
548
|
}
|
|
518
549
|
|
|
550
|
+
async function cmdLearnings() {
|
|
551
|
+
const cliArgs = parseCLIConfig();
|
|
552
|
+
const config = await loadConfig(cliArgs);
|
|
553
|
+
const projectId = ensureProject(config._cwd, config.projectName, config.screenshotsDir, config.testsDir);
|
|
554
|
+
const days = config.learningsDays || 30;
|
|
555
|
+
const query = getFlag('--query') || 'summary';
|
|
556
|
+
|
|
557
|
+
console.log(`\n${C.bold}${C.cyan}@matware/e2e-runner${C.reset} v${pkg.version}`);
|
|
558
|
+
console.log(`${C.dim}Project: ${config.projectName} | Analysis window: ${days} days${C.reset}\n`);
|
|
559
|
+
|
|
560
|
+
switch (query) {
|
|
561
|
+
case 'summary': {
|
|
562
|
+
const summary = getLearningsSummary(projectId);
|
|
563
|
+
if (summary.totalRuns === 0) {
|
|
564
|
+
console.log(`${C.dim}No learnings data yet. Run some tests to start building knowledge.${C.reset}\n`);
|
|
565
|
+
return;
|
|
566
|
+
}
|
|
567
|
+
console.log(`${C.bold}Health Overview${C.reset}`);
|
|
568
|
+
console.log(`${'─'.repeat(50)}`);
|
|
569
|
+
console.log(` Total Runs: ${C.bold}${summary.totalRuns}${C.reset}`);
|
|
570
|
+
console.log(` Total Tests: ${C.bold}${summary.totalTests}${C.reset}`);
|
|
571
|
+
console.log(` Pass Rate: ${summary.overallPassRate >= 90 ? C.green : summary.overallPassRate >= 70 ? '' : C.red}${summary.overallPassRate}%${C.reset}`);
|
|
572
|
+
console.log(` Avg Duration: ${summary.avgDurationMs < 1000 ? summary.avgDurationMs + 'ms' : (summary.avgDurationMs / 1000).toFixed(1) + 's'}`);
|
|
573
|
+
console.log(` Flaky Tests: ${summary.flakyTests.length > 0 ? C.red : C.green}${summary.flakyTests.length}${C.reset}`);
|
|
574
|
+
console.log(` Unstable Selectors: ${summary.unstableSelectors.length > 0 ? C.red : C.green}${summary.unstableSelectors.length}${C.reset}`);
|
|
575
|
+
|
|
576
|
+
if (summary.flakyTests.length > 0) {
|
|
577
|
+
console.log(`\n${C.bold}Top Flaky Tests${C.reset}`);
|
|
578
|
+
summary.flakyTests.slice(0, 5).forEach(f => {
|
|
579
|
+
console.log(` ${C.yellow}⚠${C.reset} ${f.test_name} — ${f.flaky_rate}% flaky`);
|
|
580
|
+
});
|
|
581
|
+
}
|
|
582
|
+
if (summary.topErrors.length > 0) {
|
|
583
|
+
console.log(`\n${C.bold}Top Errors${C.reset}`);
|
|
584
|
+
summary.topErrors.slice(0, 5).forEach(e => {
|
|
585
|
+
console.log(` ${C.red}✗${C.reset} [${e.category}] ${e.pattern.slice(0, 60)}${e.pattern.length > 60 ? '...' : ''} (${e.occurrence_count}x)`);
|
|
586
|
+
});
|
|
587
|
+
}
|
|
588
|
+
console.log('');
|
|
589
|
+
break;
|
|
590
|
+
}
|
|
591
|
+
case 'flaky': {
|
|
592
|
+
const flaky = getFlakySummary(projectId, days);
|
|
593
|
+
if (flaky.length === 0) { console.log(`${C.green}No flaky tests found.${C.reset}\n`); return; }
|
|
594
|
+
console.log(`${C.bold}Flaky Tests${C.reset}\n`);
|
|
595
|
+
flaky.forEach(f => {
|
|
596
|
+
console.log(` ${C.yellow}⚠${C.reset} ${C.bold}${f.test_name}${C.reset}`);
|
|
597
|
+
console.log(` Rate: ${f.flaky_rate}% | Occurrences: ${f.flaky_count}/${f.total_runs} | Avg attempts: ${f.avg_attempts}`);
|
|
598
|
+
});
|
|
599
|
+
console.log('');
|
|
600
|
+
break;
|
|
601
|
+
}
|
|
602
|
+
case 'selectors': {
|
|
603
|
+
const sels = getSelectorStability(projectId, days);
|
|
604
|
+
if (sels.length === 0) { console.log(`${C.green}All selectors are stable.${C.reset}\n`); return; }
|
|
605
|
+
console.log(`${C.bold}Unstable Selectors${C.reset}\n`);
|
|
606
|
+
sels.forEach(s => {
|
|
607
|
+
console.log(` ${C.red}✗${C.reset} ${C.dim}${s.selector}${C.reset}`);
|
|
608
|
+
console.log(` Action: ${s.action_type} | Fail: ${s.fail_rate}% | Uses: ${s.total_uses} | Tests: ${s.used_by_tests}`);
|
|
609
|
+
});
|
|
610
|
+
console.log('');
|
|
611
|
+
break;
|
|
612
|
+
}
|
|
613
|
+
case 'pages': {
|
|
614
|
+
const pages = getPageHealth(projectId, days);
|
|
615
|
+
const failing = pages.filter(p => p.fail_rate > 0);
|
|
616
|
+
if (failing.length === 0) { console.log(`${C.green}All pages are healthy.${C.reset}\n`); return; }
|
|
617
|
+
console.log(`${C.bold}Failing Pages${C.reset}\n`);
|
|
618
|
+
failing.forEach(p => {
|
|
619
|
+
console.log(` ${C.red}✗${C.reset} ${C.bold}${p.url_path}${C.reset}`);
|
|
620
|
+
console.log(` Fail: ${p.fail_rate}% | Visits: ${p.total_visits} | Console errors: ${p.console_errors} | Network errors: ${p.network_errors}`);
|
|
621
|
+
});
|
|
622
|
+
console.log('');
|
|
623
|
+
break;
|
|
624
|
+
}
|
|
625
|
+
case 'apis': {
|
|
626
|
+
const apis = getApiHealth(projectId, days);
|
|
627
|
+
const issues = apis.filter(a => a.error_rate > 0);
|
|
628
|
+
if (issues.length === 0) { console.log(`${C.green}All API endpoints are healthy.${C.reset}\n`); return; }
|
|
629
|
+
console.log(`${C.bold}API Issues${C.reset}\n`);
|
|
630
|
+
issues.forEach(a => {
|
|
631
|
+
console.log(` ${C.red}✗${C.reset} ${C.bold}${a.endpoint}${C.reset}`);
|
|
632
|
+
console.log(` Error: ${a.error_rate}% | Calls: ${a.total_calls} | Avg: ${Math.round(a.avg_duration_ms)}ms | Status: ${a.status_codes}`);
|
|
633
|
+
});
|
|
634
|
+
console.log('');
|
|
635
|
+
break;
|
|
636
|
+
}
|
|
637
|
+
case 'errors': {
|
|
638
|
+
const errors = getErrorPatterns(projectId);
|
|
639
|
+
if (errors.length === 0) { console.log(`${C.green}No error patterns recorded.${C.reset}\n`); return; }
|
|
640
|
+
console.log(`${C.bold}Error Patterns${C.reset}\n`);
|
|
641
|
+
errors.forEach(e => {
|
|
642
|
+
console.log(` ${C.red}✗${C.reset} [${e.category}] ${e.pattern.slice(0, 70)}${e.pattern.length > 70 ? '...' : ''}`);
|
|
643
|
+
console.log(` Count: ${e.occurrence_count} | Last: ${(e.last_seen || '').split('T')[0]} | Test: ${e.example_test || '-'}`);
|
|
644
|
+
});
|
|
645
|
+
console.log('');
|
|
646
|
+
break;
|
|
647
|
+
}
|
|
648
|
+
case 'trends': {
|
|
649
|
+
const trends = getTestTrends(projectId, days);
|
|
650
|
+
if (trends.length === 0) { console.log(`${C.dim}No trend data available.${C.reset}\n`); return; }
|
|
651
|
+
console.log(`${C.bold}Test Trends (${days} days)${C.reset}\n`);
|
|
652
|
+
console.log(` ${'Date'.padEnd(12)} ${'Pass Rate'.padEnd(11)} ${'Tests'.padEnd(7)} ${'Pass'.padEnd(6)} ${'Fail'.padEnd(6)} Flaky`);
|
|
653
|
+
console.log(` ${'─'.repeat(55)}`);
|
|
654
|
+
trends.forEach(t => {
|
|
655
|
+
const rateColor = t.pass_rate >= 90 ? C.green : t.pass_rate >= 70 ? '' : C.red;
|
|
656
|
+
console.log(` ${t.date.padEnd(12)} ${rateColor}${(t.pass_rate + '%').padEnd(11)}${C.reset} ${String(t.total_tests).padEnd(7)} ${C.green}${String(t.passed).padEnd(6)}${C.reset} ${t.failed > 0 ? C.red : ''}${String(t.failed).padEnd(6)}${C.reset} ${t.flaky_count}`);
|
|
657
|
+
});
|
|
658
|
+
console.log('');
|
|
659
|
+
break;
|
|
660
|
+
}
|
|
661
|
+
default:
|
|
662
|
+
console.error(`${C.red}Unknown query: ${query}. Available: summary, flaky, selectors, pages, apis, errors, trends${C.reset}`);
|
|
663
|
+
process.exit(1);
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
async function cmdNeo4j() {
|
|
668
|
+
const subCmd = args[1];
|
|
669
|
+
const cliArgs = parseCLIConfig();
|
|
670
|
+
const config = await loadConfig(cliArgs);
|
|
671
|
+
|
|
672
|
+
switch (subCmd) {
|
|
673
|
+
case 'start':
|
|
674
|
+
startNeo4j(config);
|
|
675
|
+
break;
|
|
676
|
+
case 'stop':
|
|
677
|
+
stopNeo4j(config);
|
|
678
|
+
break;
|
|
679
|
+
case 'status': {
|
|
680
|
+
const status = getNeo4jStatus(config);
|
|
681
|
+
console.log(`\n${C.bold}Neo4j Status:${C.reset}\n`);
|
|
682
|
+
if (status.running) {
|
|
683
|
+
console.log(` Status: ${C.green}Running${C.reset}`);
|
|
684
|
+
console.log(` Bolt: ${C.cyan}bolt://localhost:${status.boltPort}${C.reset}`);
|
|
685
|
+
console.log(` Browser: ${C.cyan}http://localhost:${status.httpPort}${C.reset}`);
|
|
686
|
+
} else {
|
|
687
|
+
console.log(` Status: ${C.red}Stopped${C.reset}`);
|
|
688
|
+
if (status.error) console.log(` ${C.dim}${status.error}${C.reset}`);
|
|
689
|
+
}
|
|
690
|
+
console.log('');
|
|
691
|
+
break;
|
|
692
|
+
}
|
|
693
|
+
default:
|
|
694
|
+
console.error(`${C.red}Unknown subcommand: ${subCmd}. Available: start, stop, status${C.reset}`);
|
|
695
|
+
process.exit(1);
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
|
|
519
699
|
// ==================== Main ====================
|
|
520
700
|
|
|
521
701
|
async function main() {
|
|
@@ -556,6 +736,14 @@ async function main() {
|
|
|
556
736
|
await cmdIssue();
|
|
557
737
|
break;
|
|
558
738
|
|
|
739
|
+
case 'learnings':
|
|
740
|
+
await cmdLearnings();
|
|
741
|
+
break;
|
|
742
|
+
|
|
743
|
+
case 'neo4j':
|
|
744
|
+
await cmdNeo4j();
|
|
745
|
+
break;
|
|
746
|
+
|
|
559
747
|
case 'init':
|
|
560
748
|
cmdInit();
|
|
561
749
|
break;
|