@athenaflow/plugin-e2e-test-builder 2.0.9 → 2.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.codex-plugin/plugin.json +1 -1
  3. package/dist/{2.0.8 → 2.0.10}/.agents/plugins/marketplace.json +1 -1
  4. package/dist/{2.0.9 → 2.0.10}/claude/plugin/.claude-plugin/plugin.json +1 -1
  5. package/dist/{2.0.9 → 2.0.10}/claude/plugin/package.json +8 -2
  6. package/dist/{2.0.9 → 2.0.10}/claude/plugin/skills/add-e2e-tests/SKILL.md +18 -65
  7. package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/add-e2e-tests/agents/openai.yaml +1 -1
  8. package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/add-e2e-tests/references/error-recovery.md +3 -3
  9. package/dist/{2.0.8/codex → 2.0.10/claude}/plugin/skills/add-e2e-tests/references/scaffolding.md +1 -1
  10. package/dist/{2.0.9 → 2.0.10}/claude/plugin/skills/fix-flaky-tests/SKILL.md +1 -1
  11. package/dist/{2.0.8/codex → 2.0.10/claude}/plugin/skills/fix-flaky-tests/references/fix-patterns.md +3 -2
  12. package/dist/{2.0.9 → 2.0.10}/claude/plugin/skills/generate-test-cases/SKILL.md +8 -2
  13. package/dist/{2.0.9 → 2.0.10}/claude/plugin/skills/plan-test-coverage/SKILL.md +7 -6
  14. package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/review-test-cases/SKILL.md +3 -4
  15. package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/write-test-code/SKILL.md +4 -3
  16. package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/write-test-code/references/api-setup-teardown.md +1 -1
  17. package/dist/{2.0.9 → 2.0.10}/codex/plugin/.codex-plugin/plugin.json +1 -1
  18. package/dist/{2.0.9 → 2.0.10}/codex/plugin/package.json +8 -2
  19. package/dist/{2.0.9 → 2.0.10}/codex/plugin/skills/add-e2e-tests/SKILL.md +18 -65
  20. package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/add-e2e-tests/agents/openai.yaml +1 -1
  21. package/dist/{2.0.9/claude → 2.0.10/codex}/plugin/skills/add-e2e-tests/references/error-recovery.md +3 -3
  22. package/dist/{2.0.9/claude → 2.0.10/codex}/plugin/skills/add-e2e-tests/references/scaffolding.md +1 -1
  23. package/dist/{2.0.8/claude → 2.0.10/codex}/plugin/skills/fix-flaky-tests/SKILL.md +1 -1
  24. package/dist/{2.0.9/claude → 2.0.10/codex}/plugin/skills/fix-flaky-tests/references/fix-patterns.md +3 -2
  25. package/dist/{2.0.9 → 2.0.10}/codex/plugin/skills/generate-test-cases/SKILL.md +8 -2
  26. package/dist/{2.0.9 → 2.0.10}/codex/plugin/skills/plan-test-coverage/SKILL.md +7 -6
  27. package/dist/{2.0.9/claude → 2.0.10/codex}/plugin/skills/review-test-cases/SKILL.md +3 -4
  28. package/dist/{2.0.9/claude → 2.0.10/codex}/plugin/skills/write-test-code/SKILL.md +4 -3
  29. package/dist/{2.0.9/claude → 2.0.10/codex}/plugin/skills/write-test-code/references/api-setup-teardown.md +1 -1
  30. package/dist/{2.0.9 → 2.0.10}/release.json +1 -1
  31. package/package.json +7 -1
  32. package/skills/add-e2e-tests/SKILL.md +18 -65
  33. package/skills/add-e2e-tests/agents/openai.yaml +1 -1
  34. package/skills/add-e2e-tests/references/error-recovery.md +3 -3
  35. package/skills/add-e2e-tests/references/scaffolding.md +1 -1
  36. package/skills/fix-flaky-tests/SKILL.md +1 -1
  37. package/skills/fix-flaky-tests/references/fix-patterns.md +3 -2
  38. package/skills/generate-test-cases/SKILL.md +8 -2
  39. package/skills/plan-test-coverage/SKILL.md +7 -6
  40. package/skills/review-test-cases/SKILL.md +3 -4
  41. package/skills/write-test-code/SKILL.md +4 -3
  42. package/skills/write-test-code/references/api-setup-teardown.md +1 -1
  43. package/dist/2.0.8/claude/plugin/.claude-plugin/plugin.json +0 -20
  44. package/dist/2.0.8/claude/plugin/package.json +0 -9
  45. package/dist/2.0.8/claude/plugin/skills/add-e2e-tests/SKILL.md +0 -217
  46. package/dist/2.0.8/claude/plugin/skills/add-e2e-tests/agents/claude.yaml +0 -1
  47. package/dist/2.0.8/claude/plugin/skills/add-e2e-tests/references/scaffolding.md +0 -12
  48. package/dist/2.0.8/claude/plugin/skills/add-e2e-tests/references/tracker-template.md +0 -53
  49. package/dist/2.0.8/claude/plugin/skills/fix-flaky-tests/references/fix-patterns.md +0 -91
  50. package/dist/2.0.8/claude/plugin/skills/generate-test-cases/SKILL.md +0 -184
  51. package/dist/2.0.8/claude/plugin/skills/plan-test-coverage/SKILL.md +0 -116
  52. package/dist/2.0.8/codex/plugin/.codex-plugin/plugin.json +0 -15
  53. package/dist/2.0.8/codex/plugin/package.json +0 -9
  54. package/dist/2.0.8/codex/plugin/skills/add-e2e-tests/SKILL.md +0 -217
  55. package/dist/2.0.8/codex/plugin/skills/add-e2e-tests/agents/claude.yaml +0 -1
  56. package/dist/2.0.8/codex/plugin/skills/add-e2e-tests/references/error-recovery.md +0 -43
  57. package/dist/2.0.8/codex/plugin/skills/add-e2e-tests/references/tracker-template.md +0 -53
  58. package/dist/2.0.8/codex/plugin/skills/fix-flaky-tests/SKILL.md +0 -160
  59. package/dist/2.0.8/codex/plugin/skills/generate-test-cases/SKILL.md +0 -184
  60. package/dist/2.0.8/codex/plugin/skills/plan-test-coverage/SKILL.md +0 -116
  61. package/dist/2.0.8/codex/plugin/skills/review-test-cases/SKILL.md +0 -147
  62. package/dist/2.0.8/codex/plugin/skills/write-test-code/SKILL.md +0 -227
  63. package/dist/2.0.8/codex/plugin/skills/write-test-code/references/api-setup-teardown.md +0 -83
  64. package/dist/2.0.8/release.json +0 -18
  65. package/dist/2.0.9/.agents/plugins/marketplace.json +0 -14
  66. package/dist/2.0.9/claude/plugin/skills/add-e2e-tests/agents/openai.yaml +0 -10
  67. package/dist/2.0.9/claude/plugin/skills/add-e2e-tests/references/authentication.md +0 -8
  68. package/dist/2.0.9/claude/plugin/skills/add-e2e-tests/references/tracker-template.md +0 -53
  69. package/dist/2.0.9/claude/plugin/skills/analyze-test-codebase/SKILL.md +0 -142
  70. package/dist/2.0.9/claude/plugin/skills/analyze-test-codebase/agents/claude.yaml +0 -3
  71. package/dist/2.0.9/claude/plugin/skills/analyze-test-codebase/agents/openai.yaml +0 -4
  72. package/dist/2.0.9/claude/plugin/skills/fix-flaky-tests/agents/claude.yaml +0 -3
  73. package/dist/2.0.9/claude/plugin/skills/fix-flaky-tests/agents/openai.yaml +0 -10
  74. package/dist/2.0.9/claude/plugin/skills/generate-test-cases/agents/claude.yaml +0 -3
  75. package/dist/2.0.9/claude/plugin/skills/generate-test-cases/agents/openai.yaml +0 -10
  76. package/dist/2.0.9/claude/plugin/skills/generate-test-cases/references/scenario-categories.md +0 -36
  77. package/dist/2.0.9/claude/plugin/skills/plan-test-coverage/agents/claude.yaml +0 -3
  78. package/dist/2.0.9/claude/plugin/skills/plan-test-coverage/agents/openai.yaml +0 -10
  79. package/dist/2.0.9/claude/plugin/skills/review-test-cases/agents/claude.yaml +0 -3
  80. package/dist/2.0.9/claude/plugin/skills/review-test-cases/agents/openai.yaml +0 -10
  81. package/dist/2.0.9/claude/plugin/skills/review-test-code/SKILL.md +0 -189
  82. package/dist/2.0.9/claude/plugin/skills/review-test-code/agents/claude.yaml +0 -3
  83. package/dist/2.0.9/claude/plugin/skills/review-test-code/agents/openai.yaml +0 -10
  84. package/dist/2.0.9/claude/plugin/skills/write-test-code/agents/claude.yaml +0 -3
  85. package/dist/2.0.9/claude/plugin/skills/write-test-code/agents/openai.yaml +0 -10
  86. package/dist/2.0.9/claude/plugin/skills/write-test-code/references/anti-patterns.md +0 -88
  87. package/dist/2.0.9/claude/plugin/skills/write-test-code/references/auth-patterns.md +0 -63
  88. package/dist/2.0.9/claude/plugin/skills/write-test-code/references/mapping-tables.md +0 -56
  89. package/dist/2.0.9/claude/plugin/skills/write-test-code/references/network-interception.md +0 -56
  90. package/dist/2.0.9/codex/plugin/skills/add-e2e-tests/agents/openai.yaml +0 -10
  91. package/dist/2.0.9/codex/plugin/skills/add-e2e-tests/references/authentication.md +0 -8
  92. package/dist/2.0.9/codex/plugin/skills/add-e2e-tests/references/error-recovery.md +0 -43
  93. package/dist/2.0.9/codex/plugin/skills/add-e2e-tests/references/scaffolding.md +0 -12
  94. package/dist/2.0.9/codex/plugin/skills/add-e2e-tests/references/tracker-template.md +0 -53
  95. package/dist/2.0.9/codex/plugin/skills/analyze-test-codebase/SKILL.md +0 -142
  96. package/dist/2.0.9/codex/plugin/skills/analyze-test-codebase/agents/claude.yaml +0 -3
  97. package/dist/2.0.9/codex/plugin/skills/analyze-test-codebase/agents/openai.yaml +0 -4
  98. package/dist/2.0.9/codex/plugin/skills/fix-flaky-tests/SKILL.md +0 -160
  99. package/dist/2.0.9/codex/plugin/skills/fix-flaky-tests/agents/claude.yaml +0 -3
  100. package/dist/2.0.9/codex/plugin/skills/fix-flaky-tests/agents/openai.yaml +0 -10
  101. package/dist/2.0.9/codex/plugin/skills/fix-flaky-tests/references/fix-patterns.md +0 -91
  102. package/dist/2.0.9/codex/plugin/skills/generate-test-cases/agents/claude.yaml +0 -3
  103. package/dist/2.0.9/codex/plugin/skills/generate-test-cases/agents/openai.yaml +0 -10
  104. package/dist/2.0.9/codex/plugin/skills/generate-test-cases/references/scenario-categories.md +0 -36
  105. package/dist/2.0.9/codex/plugin/skills/plan-test-coverage/agents/claude.yaml +0 -3
  106. package/dist/2.0.9/codex/plugin/skills/plan-test-coverage/agents/openai.yaml +0 -10
  107. package/dist/2.0.9/codex/plugin/skills/review-test-cases/SKILL.md +0 -147
  108. package/dist/2.0.9/codex/plugin/skills/review-test-cases/agents/claude.yaml +0 -3
  109. package/dist/2.0.9/codex/plugin/skills/review-test-cases/agents/openai.yaml +0 -10
  110. package/dist/2.0.9/codex/plugin/skills/review-test-code/SKILL.md +0 -189
  111. package/dist/2.0.9/codex/plugin/skills/review-test-code/agents/claude.yaml +0 -3
  112. package/dist/2.0.9/codex/plugin/skills/review-test-code/agents/openai.yaml +0 -10
  113. package/dist/2.0.9/codex/plugin/skills/write-test-code/SKILL.md +0 -227
  114. package/dist/2.0.9/codex/plugin/skills/write-test-code/agents/claude.yaml +0 -3
  115. package/dist/2.0.9/codex/plugin/skills/write-test-code/agents/openai.yaml +0 -10
  116. package/dist/2.0.9/codex/plugin/skills/write-test-code/references/anti-patterns.md +0 -88
  117. package/dist/2.0.9/codex/plugin/skills/write-test-code/references/api-setup-teardown.md +0 -83
  118. package/dist/2.0.9/codex/plugin/skills/write-test-code/references/auth-patterns.md +0 -63
  119. package/dist/2.0.9/codex/plugin/skills/write-test-code/references/mapping-tables.md +0 -56
  120. package/dist/2.0.9/codex/plugin/skills/write-test-code/references/network-interception.md +0 -56
  121. package/skills/add-e2e-tests/references/tracker-template.md +0 -53
  122. /package/dist/{2.0.9 → 2.0.10}/claude/plugin/skills/add-e2e-tests/agents/claude.yaml +0 -0
  123. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/add-e2e-tests/references/authentication.md +0 -0
  124. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/analyze-test-codebase/SKILL.md +0 -0
  125. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/analyze-test-codebase/agents/claude.yaml +0 -0
  126. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/analyze-test-codebase/agents/openai.yaml +0 -0
  127. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/fix-flaky-tests/agents/claude.yaml +0 -0
  128. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/fix-flaky-tests/agents/openai.yaml +0 -0
  129. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/generate-test-cases/agents/claude.yaml +0 -0
  130. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/generate-test-cases/agents/openai.yaml +0 -0
  131. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/generate-test-cases/references/scenario-categories.md +0 -0
  132. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/plan-test-coverage/agents/claude.yaml +0 -0
  133. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/plan-test-coverage/agents/openai.yaml +0 -0
  134. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/review-test-cases/agents/claude.yaml +0 -0
  135. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/review-test-cases/agents/openai.yaml +0 -0
  136. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/review-test-code/SKILL.md +0 -0
  137. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/review-test-code/agents/claude.yaml +0 -0
  138. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/review-test-code/agents/openai.yaml +0 -0
  139. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/write-test-code/agents/claude.yaml +0 -0
  140. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/write-test-code/agents/openai.yaml +0 -0
  141. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/write-test-code/references/anti-patterns.md +0 -0
  142. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/write-test-code/references/auth-patterns.md +0 -0
  143. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/write-test-code/references/mapping-tables.md +0 -0
  144. /package/dist/{2.0.8 → 2.0.10}/claude/plugin/skills/write-test-code/references/network-interception.md +0 -0
  145. /package/dist/{2.0.9 → 2.0.10}/codex/plugin/skills/add-e2e-tests/agents/claude.yaml +0 -0
  146. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/add-e2e-tests/references/authentication.md +0 -0
  147. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/analyze-test-codebase/SKILL.md +0 -0
  148. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/analyze-test-codebase/agents/claude.yaml +0 -0
  149. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/analyze-test-codebase/agents/openai.yaml +0 -0
  150. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/fix-flaky-tests/agents/claude.yaml +0 -0
  151. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/fix-flaky-tests/agents/openai.yaml +0 -0
  152. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/generate-test-cases/agents/claude.yaml +0 -0
  153. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/generate-test-cases/agents/openai.yaml +0 -0
  154. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/generate-test-cases/references/scenario-categories.md +0 -0
  155. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/plan-test-coverage/agents/claude.yaml +0 -0
  156. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/plan-test-coverage/agents/openai.yaml +0 -0
  157. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/review-test-cases/agents/claude.yaml +0 -0
  158. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/review-test-cases/agents/openai.yaml +0 -0
  159. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/review-test-code/SKILL.md +0 -0
  160. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/review-test-code/agents/claude.yaml +0 -0
  161. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/review-test-code/agents/openai.yaml +0 -0
  162. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/write-test-code/agents/claude.yaml +0 -0
  163. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/write-test-code/agents/openai.yaml +0 -0
  164. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/write-test-code/references/anti-patterns.md +0 -0
  165. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/write-test-code/references/auth-patterns.md +0 -0
  166. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/write-test-code/references/mapping-tables.md +0 -0
  167. /package/dist/{2.0.8 → 2.0.10}/codex/plugin/skills/write-test-code/references/network-interception.md +0 -0
@@ -1,160 +0,0 @@
1
- ---
2
- name: fix-flaky-tests
3
- description: >
4
- This skill should be used when a Playwright test is failing, flaky, timing out, or behaving
5
- inconsistently. It provides structured root cause analysis for: stabilizing intermittent tests,
6
- debugging timeouts ("Test timeout of 30000ms exceeded"), fixing race conditions, investigating
7
- local-vs-CI divergence, running repeated stability checks (--repeat-each).
8
- IMPORTANT: If running tests with --repeat-each, --retries, or multiple times to check stability,
9
- STOP and load this skill first — it has structured root cause analysis that prevents brute-force
10
- approaches. Triggers: "stabilize", "intermittent", "flaky", "keeps failing", "fails in CI",
11
- "timeout on", "race condition", "run N times to check stability", "verify tests are stable".
12
- NOT for writing new tests (use write-test-code) or analyzing setup (use analyze-test-codebase).
13
- allowed-tools: Read Write Edit Bash Glob Grep Task
14
- ---
15
-
16
- # Fix Flaky Tests
17
-
18
- Systematically diagnose and fix intermittent Playwright test failures using root cause analysis. A flaky test is worse than no test — it trains teams to ignore failures.
19
-
20
- ## Input
21
-
22
- Parse the test file path or test name from: $ARGUMENTS
23
-
24
- If no argument provided, ask: "Which test file or test name is flaky?"
25
-
26
- ## Workflow
27
-
28
- ### Step 1: Reproduce and Classify
29
-
30
- 1. **Read the test file** to understand what it tests and how
31
- 2. **Run the test multiple times** to observe the failure pattern:
32
- ```bash
33
- npx playwright test <file> --repeat-each=5 --reporter=list 2>&1
34
- ```
35
- 3. **Run in isolation** if it passed above — it may only fail with other tests:
36
- ```bash
37
- npx playwright test --reporter=list 2>&1
38
- ```
39
- 4. **Classify the failure** into one of these root cause categories:
40
-
41
- | Category | Symptoms |
42
- |----------|----------|
43
- | **Timing** | Timeout errors, "element not found", "not visible yet" |
44
- | **State leakage** | Passes alone, fails when run with other tests |
45
- | **Data dependency** | Fails when expected data doesn't exist or has changed |
46
- | **Race condition** | Action fires before page is ready (hydration, animation) |
47
- | **Selector fragility** | Element found but wrong one, or `.first()` picks different element |
48
- | **Environment** | Passes locally, fails in CI (viewport, speed, resources) |
49
-
50
- ### Step 2: Root Cause Analysis
51
-
52
- Investigate based on the classification:
53
-
54
- **Timing issues:**
55
- - Look for assertions immediately after actions with no wait for the resulting state change
56
- - Check if the test asserts before an API response arrives — search for missing `waitForResponse`
57
- - Look for animations/transitions that affect element state (CSS transitions, skeleton screens)
58
- - Check for `waitForTimeout` being used as a "fix" — this is a symptom, not a cure
59
- - Check if `networkidle` or `load` waitUntil would help for navigation
60
-
61
- **State leakage:**
62
- - Run the failing test alone: `npx playwright test --grep "<test name>"`
63
- - Check if tests share mutable state: global variables, database rows, cookies, localStorage
64
- - Look for missing cleanup in `afterEach`/`afterAll`
65
- - Check if `storageState` bleeds between tests or test files
66
- - Check for test data created by one test that another test depends on
67
-
68
- **Race conditions:**
69
- - Identify the race: what two things are happening concurrently?
70
- - Check for click handlers that fire before JavaScript hydration completes
71
- - Look for optimistic UI updates that revert on API response
72
- - Check for actions during navigation transitions (click during page load)
73
- - Look for double-clicks or rapid interactions that trigger duplicate actions
74
-
75
- **Selector fragility:**
76
- - Navigate to the page in the browser and verify the selector currently matches the intended element
77
- - Check if the selector matches multiple elements — `.first()` or `.nth()` is a smell
78
- - Look for dynamically generated IDs, classes, or attributes
79
- - Check for conditional rendering that changes element order or presence
80
- - Verify locators against current DOM structure using `find` and `get_element`
81
-
82
- **Environment issues:**
83
- - Compare CI viewport size vs local — element may be off-screen in CI
84
- - Check for timezone-dependent assertions (dates, timestamps)
85
- - Check for locale-dependent formatting (numbers, currency)
86
- - Check if CI has slower network/CPU affecting timing
87
- - Look for third-party scripts (analytics, chat widgets) that load differently in CI
88
-
89
- ### Step 3: Apply the Correct Fix
90
-
91
- Use the right fix pattern for the diagnosed root cause. **Never apply a fix without understanding the cause.** See [references/fix-patterns.md](references/fix-patterns.md) for full code examples.
92
-
93
- | Category | Principle |
94
- |----------|-----------|
95
- | **Timing** | Replace sleeps with event-driven waits (`waitForResponse`, auto-retrying assertions) |
96
- | **State isolation** | Unique data per test, API-based reset in `beforeEach`, no shared mutable state |
97
- | **Race condition** | Use `Promise.all` for action + expected response; wait for hydration before interaction |
98
- | **Selector** | Scope locators to containers with unique content; avoid `.first()` and position-dependent selectors |
99
- | **Environment** | Explicit viewport, timezone-agnostic assertions, block interfering third-party scripts |
100
-
101
- ### Step 4: Verify the Fix
102
-
103
- 1. **Run the test 5+ times** to confirm stability:
104
- ```bash
105
- npx playwright test <file> --repeat-each=5 --reporter=list 2>&1
106
- ```
107
- 2. **Run with the full test suite** to verify no state leakage:
108
- ```bash
109
- npx playwright test --reporter=list 2>&1
110
- ```
111
- 3. If still flaky → return to Step 2 with the new failure output. The initial classification may have been wrong.
112
- 4. **Maximum 3 fix-and-rerun cycles.** If the test is still flaky after 3 attempts, stop and report the diagnostic findings (root cause hypothesis, fixes attempted, remaining failure output) so the user can decide next steps. Do not continue looping.
113
-
114
- ### Step 5: Summarize
115
-
116
- Report:
117
- 1. **Root cause** — what made the test flaky and why
118
- 2. **Fix applied** — what changed and why this fix addresses the root cause
119
- 3. **Verification** — how many consecutive runs passed
120
- 4. **Prevention** — what pattern to follow in future tests to avoid this class of flakiness
121
-
122
- ## Flakiness Checklist (Less Obvious Causes)
123
-
124
- When the standard categories don't fit, check these:
125
-
126
- - [ ] **Viewport size** — element off-screen in CI (smaller viewport)
127
- - [ ] **Font rendering** — text matching fails due to font differences across OS
128
- - [ ] **Timezone** — date/time assertions fail in different timezones
129
- - [ ] **Locale** — number/currency formatting differs (1,000 vs 1.000)
130
- - [ ] **Third-party scripts** — analytics/chat widgets change DOM or block clicks
131
- - [ ] **Cookie consent banners** — overlay blocks click targets
132
- - [ ] **Feature flags** — different features enabled in different environments
133
- - [ ] **Database state** — shared test database with stale or conflicting data
134
- - [ ] **Parallel execution** — tests interfere when run in parallel workers
135
- - [ ] **Browser caching** — cached responses differ from fresh ones
136
- - [ ] **Service workers** — intercepting requests differently than expected
137
- - [ ] **Lazy loading** — elements not yet in DOM when test tries to interact
138
-
139
- ## Anti-Patterns: What is NOT a Fix
140
-
141
- These mask the problem. Never apply them without a real fix:
142
-
143
- | "Fix" | Why It's Wrong | Real Fix |
144
- |-------|---------------|----------|
145
- | `waitForTimeout(3000)` | Hides timing race, will break under load | Wait for the specific event |
146
- | `.first()` added | Hides selector ambiguity | Narrow the selector |
147
- | Increased timeout to 30s | Hides missing wait or slow setup | Find what you're actually waiting for |
148
- | `test.skip()` | Ignoring the problem | Diagnose and fix |
149
- | `retries: 3` without fix | Masks real failures, wastes CI time | Fix the root cause, then keep retries as safety net |
150
- | `{ force: true }` | Bypasses actionability checks, hides overlapping elements or disabled state | Find and fix the actionability issue: wait for overlay to disappear, scroll element into view, or wait for enabled state |
151
- | `try/catch` swallowing errors | Test passes but doesn't verify anything | Fix the assertion |
152
-
153
- ## Multiple Flaky Tests
154
-
155
- When a suite has several flaky tests:
156
-
157
- 1. **Triage first.** Run the full suite once and group failures by root cause category (timing, state leakage, etc.). Shared root causes (broken fixture, leaking state) should be fixed once, not per-test.
158
- 2. **Fix shared infrastructure issues first.** A bad `beforeEach`, a leaking `storageState`, or a missing cleanup can cause many tests to fail. One fix resolves many failures.
159
- 3. **Split independent fixes across subagents** when the fix scopes do not overlap (different test files, no shared fixtures). Pass each subagent the test file path, this diagnostic workflow, and the root cause classification table.
160
- 4. The 3 fix-and-rerun cycle limit applies **per test**, not globally.
@@ -1,3 +0,0 @@
1
- frontmatter:
2
- argument-hint: "<path to flaky test file or test name>"
3
- user-invocable: true
@@ -1,10 +0,0 @@
1
- interface:
2
- display_name: "Fix Flaky Playwright Tests"
3
- short_description: "Diagnose unstable Playwright tests and fix the root cause"
4
- default_prompt: "Diagnose this flaky Playwright test, reproduce the failure, and fix the root cause."
5
-
6
- dependencies:
7
- tools:
8
- - type: "mcp"
9
- value: "agent-web-interface"
10
- description: "Browser automation MCP used to reproduce and inspect unstable flows"
@@ -1,91 +0,0 @@
1
- # Fix Patterns by Root Cause
2
-
3
- Code examples for each root cause category. Apply only after diagnosing the cause in Step 2.
4
-
5
- ## Timing Fixes — Replace Sleeps with Event-Driven Waits
6
-
7
- ```typescript
8
- // BAD: arbitrary sleep
9
- await page.waitForTimeout(2000);
10
- await expect(element).toBeVisible();
11
-
12
- // GOOD: wait for the network event that loads the content
13
- await page.waitForResponse(resp => resp.url().includes('/api/data'));
14
- await expect(element).toBeVisible();
15
-
16
- // GOOD: wait for loading indicator to disappear
17
- await expect(page.getByRole('progressbar')).toBeHidden();
18
- await expect(element).toBeVisible();
19
-
20
- // GOOD: wait for navigation to complete
21
- await page.goto('/page', { waitUntil: 'networkidle' });
22
-
23
- // GOOD: use auto-retrying assertion (retries until timeout)
24
- await expect(page.getByText(/loaded/i)).toBeVisible({ timeout: 10000 });
25
- ```
26
-
27
- ## State Isolation Fixes
28
-
29
- ```typescript
30
- // Unique data per test
31
- const uniqueEmail = `test-${Date.now()}@example.com`;
32
-
33
- // Reset state via API before each test
34
- test.beforeEach(async ({ request }) => {
35
- await request.post('/api/test/reset');
36
- });
37
-
38
- // Use fresh browser context (default in Playwright, but verify)
39
- // Do NOT share page or context between tests
40
- ```
41
-
42
- ## Race Condition Fixes
43
-
44
- ```typescript
45
- // Wait for hydration/framework readiness
46
- await page.waitForFunction(() =>
47
- document.querySelector('[data-hydrated="true"]')
48
- );
49
-
50
- // Use Promise.all for action + expected response
51
- const [response] = await Promise.all([
52
- page.waitForResponse('**/api/submit'),
53
- submitButton.click(),
54
- ]);
55
-
56
- // Wait for animation/transition to complete
57
- await expect(modal).toBeVisible();
58
- await page.waitForFunction(() =>
59
- !document.querySelector('.modal-animating')
60
- );
61
- ```
62
-
63
- ## Selector Fixes
64
-
65
- ```typescript
66
- // BAD: position-dependent, matches wrong element if order changes
67
- page.locator('.item').first();
68
-
69
- // GOOD: scoped to container with unique content
70
- page.getByRole('listitem').filter({ hasText: 'Specific Item' });
71
-
72
- // GOOD: use test IDs for ambiguous elements
73
- page.getByTestId('cart-item-sku-123');
74
-
75
- // GOOD: scope to a region first, then find within
76
- page.locator('main').getByRole('button', { name: /submit/i });
77
- ```
78
-
79
- ## Environment Fixes
80
-
81
- ```typescript
82
- // Set explicit viewport in test or config
83
- test.use({ viewport: { width: 1280, height: 720 } });
84
-
85
- // Use timezone-agnostic assertions
86
- await expect(dateElement).toContainText(/\d{4}/); // year, not full date string
87
-
88
- // Block third-party scripts that interfere
89
- await page.route('**/analytics/**', route => route.abort());
90
- await page.route('**/chat-widget/**', route => route.abort());
91
- ```
@@ -1,3 +0,0 @@
1
- frontmatter:
2
- argument-hint: "<url> <user journey description>"
3
- user-invocable: true
@@ -1,10 +0,0 @@
1
- interface:
2
- display_name: "Write TC-ID Specs"
3
- short_description: "Explore the feature and write detailed TC-ID test case specs"
4
- default_prompt: "Explore this feature and generate detailed TC-ID test case specifications."
5
-
6
- dependencies:
7
- tools:
8
- - type: "mcp"
9
- value: "agent-web-interface"
10
- description: "Browser automation MCP used to inspect the live flow before writing specs"
@@ -1,36 +0,0 @@
1
- # Scenario Categories — Detailed Checklists
2
-
3
- These checklists support Step 4 of the generate-test-cases skill. Each category covers scenarios that may not be directly triggerable during browser exploration but must be included in comprehensive test specifications.
4
-
5
- ## Network & Performance
6
-
7
- - Network failure during form submission (mock 500, timeout)
8
- - Slow API response (loading states, skeleton screens, spinners)
9
- - Large data sets (pagination, infinite scroll, 100+ items)
10
- - Offline behavior (if PWA or service worker is present)
11
-
12
- ## Accessibility (WCAG 2.1 AA)
13
-
14
- - Keyboard-only navigation through the entire flow (Tab, Enter, Escape)
15
- - Screen reader announcements for dynamic content (ARIA live regions)
16
- - Focus management after modal open/close, page transitions
17
- - Color contrast for error states and disabled elements
18
- - Form error association (`aria-describedby` linking errors to fields)
19
-
20
- ## Visual Consistency
21
-
22
- - Layout stability (no unexpected content shifts after load)
23
- - Responsive behavior at standard breakpoints (mobile 375px, tablet 768px, desktop 1280px)
24
- - Dark mode rendering if supported
25
-
26
- ## Cross-browser Considerations
27
-
28
- - Safari-specific behavior (date inputs, smooth scrolling, storage quirks)
29
- - Firefox form validation differences
30
- - Mobile browser touch targets and gestures
31
-
32
- ## Concurrent & Session
33
-
34
- - Session expiry mid-flow (cookie cleared during multi-step)
35
- - Concurrent access (two tabs, same user)
36
- - Race conditions (double-click submit, rapid navigation)
@@ -1,3 +0,0 @@
1
- frontmatter:
2
- argument-hint: "<url> <feature or area to test>"
3
- user-invocable: true
@@ -1,10 +0,0 @@
1
- interface:
2
- display_name: "Plan Coverage Priorities"
3
- short_description: "Identify coverage gaps and prioritize what to test first"
4
- default_prompt: "Review this feature and produce a prioritized E2E coverage plan without writing specs or code."
5
-
6
- dependencies:
7
- tools:
8
- - type: "mcp"
9
- value: "agent-web-interface"
10
- description: "Browser automation MCP used for lightweight site inspection"
@@ -1,147 +0,0 @@
1
- ---
2
- name: review-test-cases
3
- description: >
4
- This skill should be used when a quality review of TC-ID test case specifications is needed before writing executable
5
- test code. It reviews the spec artifact only; it does not implement or rewrite tests.
6
- Triggers: "review test cases", "check test specs", "review TC-IDs", "audit test coverage",
7
- "are my test cases good", "validate test specs", "review test-cases/*.md",
8
- "check for gaps in test cases", "review before writing tests", "quality check test specs".
9
- Inserted as a quality gate between generate-test-cases and write-test-code — catches
10
- gaps, duplication, weak assertions, missing error paths, and invented scenarios before they get
11
- encoded into test code. Review-only — does NOT modify the spec file, does NOT write test code.
12
- The write-test-code skill should be used for implementation.
13
- allowed-tools: Read Glob Grep Task
14
- ---
15
-
16
- # Review Test Cases
17
-
18
- Review TC-ID test case specifications for completeness, accuracy, and quality before they are implemented as executable Playwright tests. This is a quality gate — catch problems in the spec, not in the code.
19
-
20
- ## Input
21
-
22
- Parse the spec file path from: $ARGUMENTS
23
-
24
- If no argument provided, search for `test-cases/*.md` files and review the most recently modified one.
25
-
26
- ## Workflow
27
-
28
- ### Step 1: Load the Spec and Context
29
-
30
- 1. Read the test case spec file
31
- 2. Read any related files for context:
32
- - `e2e-plan/conventions.md` or `e2e-plan/coverage-plan.md` if they exist
33
- - `e2e-tracker.md` if it exists (to understand what was explored)
34
- 3. Extract the target URL from the spec header
35
-
36
- ### Step 2: Run the Review Checklist
37
-
38
- Evaluate every test case against each criterion. Track findings by severity:
39
-
40
- - **BLOCKER** — must fix before writing tests (missing critical paths, invented behavior, wrong URL)
41
- - **WARNING** — should fix, will cause problems in implementation (vague steps, weak assertions, duplication)
42
- - **SUGGESTION** — optional improvement (priority adjustment, better categorization, additional edge case)
43
-
44
- #### 2a. Coverage Completeness
45
-
46
- | Check | What to Look For |
47
- |-------|-----------------|
48
- | Happy path present | At least one Critical-priority test covers the primary success flow end-to-end |
49
- | Error paths covered (MINIMUM) | Every spec MUST have at least: (1) one server error test (500), (2) one network failure test (timeout/offline), (3) one empty state test. If auth is involved: (4) one session expiry test. Missing any of these is a BLOCKER, not a suggestion |
50
- | Boundary conditions | Min/max values, empty inputs, special characters, long strings |
51
- | Authentication edge cases | Session expiry, unauthorized access, role-based differences (if applicable) |
52
- | Navigation edge cases | Back/forward, direct URL access, refresh mid-flow |
53
- | Missing user actions | Every interactive element on the page should appear in at least one test case |
54
-
55
- #### 2b. Specification Quality
56
-
57
- | Check | What to Look For |
58
- |-------|-----------------|
59
- | Steps are concrete | "Click the Submit button" not "submit the form"; "Enter 'test@example.com' in Email field" not "enter email" |
60
- | Expected results are observable | Specific text, URL change, element state — not "page updates" or "works correctly" |
61
- | Preconditions are explicit | Auth state, test data, feature flags, starting URL — nothing assumed |
62
- | TC-IDs are sequential | No gaps, no duplicates, correct feature prefix |
63
- | Priority is justified | Critical = blocks core journey; not everything is Critical |
64
- | Categories are accurate | Happy Path vs Validation vs Edge Case — correctly classified |
65
-
66
- #### 2c. Invented vs Observed
67
-
68
- This is the most important check. Test cases should trace back to behavior that was actually observed or deliberately triggered during exploration, not assumed.
69
-
70
- Red flags for invented scenarios:
71
- - Specific error message text that wasn't observed (e.g., "Please enter a valid email" when the actual message might differ)
72
- - Assumptions about validation rules without exploration evidence (e.g., "minimum 8 characters" without trying it)
73
- - Test cases for UI elements that may not exist (e.g., "retry button" on error page without visiting the error page)
74
- - Server-side behavior assumptions (e.g., "rate limit after 5 attempts" without evidence)
75
-
76
- When suspicious: delegate a spot-check to a subagent with browser access (Task tool). Pass it the target URL, the specific TC-IDs under suspicion, and the claims to verify (element existence, error message text, validation behavior). The subagent should return structured evidence: what it found, what matched, what differed.
77
-
78
- #### 2d. Duplication and Overlap
79
-
80
- - Flag test cases that test the same behavior with trivially different inputs
81
- - Flag test cases where the steps are identical but expected results differ only cosmetically
82
- - Merging candidates: cases that could be combined into a single parameterized test without losing coverage
83
-
84
- #### 2e. Implementability
85
-
86
- - Flag steps that cannot be automated with Playwright (e.g., "verify email arrives", "check database directly")
87
- - Flag preconditions that require manual setup with no automation path
88
- - Flag assertions that require visual comparison without specifying tolerance
89
- - Flag test cases that depend on third-party services (payment processors, OAuth providers) without a mock strategy
90
-
91
- ### Step 3: Produce the Review Report
92
-
93
- Output a structured review with this format:
94
-
95
- ```markdown
96
- # Test Case Review: <feature>
97
-
98
- **Spec file:** <path>
99
- **Total test cases:** <count>
100
- **Review date:** <date>
101
-
102
- ## Verdict: PASS | PASS WITH WARNINGS | NEEDS REVISION
103
-
104
- ## Blockers (<count>)
105
- - **TC-<ID>**: <issue description>
106
-
107
- ## Warnings (<count>)
108
- - **TC-<ID>**: <issue description>
109
-
110
- ## Suggestions (<count>)
111
- - **TC-<ID>**: <issue description>
112
-
113
- ## Coverage Gaps
114
- - <Missing scenario that should be added>
115
-
116
- ## Duplication
117
- - **TC-<ID>** and **TC-<ID>**: <overlap description>
118
-
119
- ## Summary
120
- <2-3 sentences on overall spec quality and what to address before implementation>
121
- ```
122
-
123
- ### Step 4: Verdict Rules
124
-
125
- - **PASS** — no blockers, 2 or fewer warnings. Proceed to write-test-code.
126
- - **PASS WITH WARNINGS** — no blockers, 3+ warnings. Can proceed but should address warnings.
127
- - **NEEDS REVISION** — 1+ blockers. Do not proceed to write-test-code until blockers are resolved.
128
-
129
- Example: 0 blockers + 2 warnings = PASS. 0 blockers + 3 warnings = PASS WITH WARNINGS. 1+ blockers = NEEDS REVISION regardless of warning count.
130
-
131
- ## Principles
132
-
133
- - **Review-only** — never modify the spec file; report findings for the author to act on
134
- - **Evidence over opinion** — cite specific TC-IDs and quote specific steps/assertions when flagging issues
135
- - **Spot-check against live site** — delegate to a subagent with browser access to verify 2-3 suspicious claims rather than trusting all text at face value
136
- - **Bounded output** — the review report should be actionable and finite, not an exhaustive rewrite
137
- - **Severity matters** — distinguish blockers from suggestions; not every imperfection is worth fixing before implementation
138
-
139
- ## Example Usage
140
-
141
- ```
142
- Claude Code: /review-test-cases test-cases/login.md
143
- Codex: $review-test-cases test-cases/login.md
144
-
145
- Claude Code: /review-test-cases test-cases/checkout.md
146
- Codex: $review-test-cases test-cases/checkout.md
147
- ```
@@ -1,3 +0,0 @@
1
- frontmatter:
2
- argument-hint: "<path to test-cases/*.md spec file>"
3
- user-invocable: true
@@ -1,10 +0,0 @@
1
- interface:
2
- display_name: "Review TC-ID Specs"
3
- short_description: "Review TC-ID specs for gaps, duplication, and weak assertions"
4
- default_prompt: "Review these TC-ID test case specifications before implementation and flag quality issues."
5
-
6
- dependencies:
7
- tools:
8
- - type: "mcp"
9
- value: "agent-web-interface"
10
- description: "Browser automation MCP used to spot-check observed behavior claims against live site"