@supatest/cli 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/index.js +6586 -153
  2. package/package.json +4 -3
  3. package/dist/agent-runner.js +0 -589
  4. package/dist/commands/login.js +0 -392
  5. package/dist/commands/setup.js +0 -234
  6. package/dist/config.js +0 -29
  7. package/dist/core/agent.js +0 -270
  8. package/dist/modes/headless.js +0 -117
  9. package/dist/modes/interactive.js +0 -430
  10. package/dist/presenters/composite.js +0 -32
  11. package/dist/presenters/console.js +0 -163
  12. package/dist/presenters/react.js +0 -220
  13. package/dist/presenters/types.js +0 -1
  14. package/dist/presenters/web.js +0 -78
  15. package/dist/prompts/builder.js +0 -181
  16. package/dist/prompts/fixer.js +0 -148
  17. package/dist/prompts/headless.md +0 -97
  18. package/dist/prompts/index.js +0 -3
  19. package/dist/prompts/interactive.md +0 -43
  20. package/dist/prompts/plan.md +0 -41
  21. package/dist/prompts/planner.js +0 -70
  22. package/dist/prompts/prompts/builder.md +0 -97
  23. package/dist/prompts/prompts/fixer.md +0 -100
  24. package/dist/prompts/prompts/plan.md +0 -41
  25. package/dist/prompts/prompts/planner.md +0 -41
  26. package/dist/services/api-client.js +0 -244
  27. package/dist/services/event-streamer.js +0 -130
  28. package/dist/types.js +0 -1
  29. package/dist/ui/App.js +0 -322
  30. package/dist/ui/components/AuthBanner.js +0 -20
  31. package/dist/ui/components/AuthDialog.js +0 -32
  32. package/dist/ui/components/Banner.js +0 -12
  33. package/dist/ui/components/ExpandableSection.js +0 -17
  34. package/dist/ui/components/Header.js +0 -49
  35. package/dist/ui/components/HelpMenu.js +0 -89
  36. package/dist/ui/components/InputPrompt.js +0 -292
  37. package/dist/ui/components/MessageList.js +0 -42
  38. package/dist/ui/components/QueuedMessageDisplay.js +0 -31
  39. package/dist/ui/components/Scrollable.js +0 -103
  40. package/dist/ui/components/SessionSelector.js +0 -196
  41. package/dist/ui/components/StatusBar.js +0 -45
  42. package/dist/ui/components/messages/AssistantMessage.js +0 -20
  43. package/dist/ui/components/messages/ErrorMessage.js +0 -26
  44. package/dist/ui/components/messages/LoadingMessage.js +0 -28
  45. package/dist/ui/components/messages/ThinkingMessage.js +0 -17
  46. package/dist/ui/components/messages/TodoMessage.js +0 -44
  47. package/dist/ui/components/messages/ToolMessage.js +0 -218
  48. package/dist/ui/components/messages/UserMessage.js +0 -14
  49. package/dist/ui/contexts/KeypressContext.js +0 -527
  50. package/dist/ui/contexts/MouseContext.js +0 -98
  51. package/dist/ui/contexts/SessionContext.js +0 -131
  52. package/dist/ui/hooks/useAnimatedScrollbar.js +0 -83
  53. package/dist/ui/hooks/useBatchedScroll.js +0 -22
  54. package/dist/ui/hooks/useBracketedPaste.js +0 -31
  55. package/dist/ui/hooks/useFocus.js +0 -50
  56. package/dist/ui/hooks/useKeypress.js +0 -26
  57. package/dist/ui/hooks/useModeToggle.js +0 -25
  58. package/dist/ui/types/auth.js +0 -13
  59. package/dist/ui/utils/file-completion.js +0 -56
  60. package/dist/ui/utils/input.js +0 -50
  61. package/dist/ui/utils/markdown.js +0 -376
  62. package/dist/ui/utils/mouse.js +0 -189
  63. package/dist/ui/utils/theme.js +0 -59
  64. package/dist/utils/banner.js +0 -9
  65. package/dist/utils/encryption.js +0 -71
  66. package/dist/utils/events.js +0 -36
  67. package/dist/utils/keychain-storage.js +0 -120
  68. package/dist/utils/logger.js +0 -209
  69. package/dist/utils/node-version.js +0 -89
  70. package/dist/utils/plan-file.js +0 -75
  71. package/dist/utils/project-instructions.js +0 -23
  72. package/dist/utils/rich-logger.js +0 -208
  73. package/dist/utils/stdin.js +0 -25
  74. package/dist/utils/stdio.js +0 -80
  75. package/dist/utils/summary.js +0 -94
  76. package/dist/utils/token-storage.js +0 -242
@@ -1,220 +0,0 @@
1
- import { getToolDisplayName } from "shared";
2
- /**
3
- * Get human-readable description for tool call
4
- */
5
- function getToolDescription(toolName, input) {
6
- switch (toolName) {
7
- case "Read":
8
- return input?.file_path || "file";
9
- case "Write":
10
- return input?.file_path || "file";
11
- case "Edit":
12
- return input?.file_path || "file";
13
- case "Bash": {
14
- const cmd = input?.command || "";
15
- return cmd.length > 60 ? `${cmd.substring(0, 60)}...` : cmd;
16
- }
17
- case "Glob":
18
- return `pattern: "${input?.pattern || "files"}"`;
19
- case "Grep": {
20
- const pattern = input?.pattern || "code";
21
- const path = input?.path;
22
- return path ? `"${pattern}" (in ${path})` : `"${pattern}"`;
23
- }
24
- case "Task":
25
- return input?.subagent_type || "task";
26
- case "TodoWrite":
27
- return "Updated todos";
28
- case "BashOutput":
29
- case "Command Output":
30
- return input?.bash_id || "shell output";
31
- default:
32
- return toolName;
33
- }
34
- }
35
- export class ReactPresenter {
36
- callbacks;
37
- apiClient;
38
- sessionId;
39
- verbose;
40
- // Track message state for combining consecutive blocks
41
- hasAssistantMessage = false;
42
- currentAssistantText = "";
43
- hasThinkingMessage = false;
44
- currentThinkingText = "";
45
- constructor(callbacks, apiClient, sessionId, verbose = false) {
46
- this.callbacks = callbacks;
47
- this.apiClient = apiClient;
48
- this.sessionId = sessionId;
49
- this.verbose = verbose;
50
- }
51
- async onStart(config) {
52
- // Send initial user message event to API
53
- const userMessageEvent = {
54
- type: "user_message",
55
- content: [{ type: "text", text: config.task }],
56
- };
57
- await this.apiClient.streamEvent(this.sessionId, userMessageEvent);
58
- }
59
- onLog(message) {
60
- if (this.verbose) {
61
- console.error(message);
62
- }
63
- }
64
- async onAssistantText(text) {
65
- if (!this.hasAssistantMessage) {
66
- // First text block - create new message
67
- this.callbacks.addMessage({
68
- type: "assistant",
69
- content: text,
70
- isPending: false,
71
- });
72
- this.hasAssistantMessage = true;
73
- this.currentAssistantText = text;
74
- }
75
- else {
76
- // Append to current message
77
- this.currentAssistantText += text;
78
- this.callbacks.updateLastMessage({
79
- content: this.currentAssistantText,
80
- });
81
- }
82
- // Stream to API
83
- const textEvent = {
84
- type: "assistant_text",
85
- delta: text,
86
- };
87
- await this.apiClient.streamEvent(this.sessionId, textEvent);
88
- }
89
- async onThinking(text) {
90
- if (!this.hasThinkingMessage) {
91
- // Create new thinking message
92
- this.callbacks.addMessage({
93
- type: "thinking",
94
- content: text,
95
- isExpanded: false,
96
- });
97
- this.hasThinkingMessage = true;
98
- this.currentThinkingText = text;
99
- }
100
- else {
101
- // Append to existing thinking message
102
- this.currentThinkingText += text;
103
- this.callbacks.updateLastMessage({
104
- content: this.currentThinkingText,
105
- });
106
- }
107
- // Stream to API
108
- const thinkingEvent = {
109
- type: "assistant_thinking",
110
- delta: text,
111
- };
112
- await this.apiClient.streamEvent(this.sessionId, thinkingEvent);
113
- }
114
- async onToolUse(tool, input, toolId) {
115
- // Add tool message to UI
116
- const message = {
117
- type: "tool",
118
- content: getToolDescription(tool, input),
119
- toolName: getToolDisplayName(tool),
120
- toolInput: input,
121
- toolResult: undefined,
122
- isExpanded: false,
123
- toolUseId: toolId,
124
- };
125
- this.callbacks.addMessage(message);
126
- // Reset message state - next text/thinking creates new messages
127
- this.hasAssistantMessage = false;
128
- this.hasThinkingMessage = false;
129
- this.currentAssistantText = "";
130
- this.currentThinkingText = "";
131
- // Update stats for file modifications
132
- if (tool === "Write" || tool === "Edit") {
133
- const filePath = input?.file_path;
134
- if (filePath) {
135
- this.callbacks.updateStats({
136
- filesModified: new Set([filePath]),
137
- });
138
- }
139
- }
140
- else if (tool === "Bash") {
141
- const command = input?.command;
142
- if (command) {
143
- this.callbacks.updateStats({
144
- commandsRun: [command],
145
- });
146
- }
147
- }
148
- else if (tool === "TodoWrite") {
149
- const todos = input?.todos;
150
- if (todos && Array.isArray(todos)) {
151
- this.callbacks.setTodos(todos);
152
- this.callbacks.addMessage({
153
- type: "todo",
154
- content: "",
155
- todos: todos,
156
- });
157
- }
158
- }
159
- // Stream tool use event to API
160
- const toolUseEvent = {
161
- type: "tool_use",
162
- id: toolId,
163
- name: tool,
164
- input: input || {},
165
- };
166
- await this.apiClient.streamEvent(this.sessionId, toolUseEvent);
167
- }
168
- async onToolResult(toolId, result) {
169
- // Update the tool message with the result
170
- this.callbacks.updateMessageByToolId(toolId, {
171
- toolResult: result,
172
- });
173
- // Stream tool result to API
174
- const toolResultEvent = {
175
- type: "tool_result",
176
- tool_use_id: toolId,
177
- content: result,
178
- };
179
- await this.apiClient.streamEvent(this.sessionId, toolResultEvent);
180
- }
181
- async onUsageUpdate(totalTokens) {
182
- this.callbacks.updateStats({ totalTokens });
183
- }
184
- async onTurnComplete(content) {
185
- // Stream message complete to API
186
- const messageCompleteEvent = {
187
- type: "message_complete",
188
- message: {
189
- role: "assistant",
190
- content: content,
191
- },
192
- };
193
- await this.apiClient.streamEvent(this.sessionId, messageCompleteEvent);
194
- }
195
- async onError(error) {
196
- this.callbacks.addMessage({
197
- type: "error",
198
- content: error,
199
- errorType: "error",
200
- });
201
- await this.apiClient.streamEvent(this.sessionId, {
202
- type: "session_error",
203
- error: error,
204
- });
205
- }
206
- async onComplete(result) {
207
- if (result.success) {
208
- await this.apiClient.streamEvent(this.sessionId, {
209
- type: "session_complete",
210
- });
211
- }
212
- else {
213
- await this.apiClient.streamEvent(this.sessionId, {
214
- type: "session_error",
215
- error: result.error || "Unknown error",
216
- });
217
- }
218
- this.callbacks.onComplete(result.success);
219
- }
220
- }
@@ -1 +0,0 @@
1
- export {};
@@ -1,78 +0,0 @@
1
- import { EventStreamer } from "../services/event-streamer";
2
- export class WebPresenter {
3
- streamer;
4
- constructor(apiClient, sessionId) {
5
- this.streamer = new EventStreamer(apiClient, sessionId);
6
- }
7
- async onStart(config) {
8
- // Send the initial user message to establish context
9
- const event = {
10
- type: "user_message",
11
- content: [
12
- {
13
- type: "text",
14
- text: config.task,
15
- },
16
- ],
17
- };
18
- await this.streamer.queueEvent(event);
19
- }
20
- onLog(_message) {
21
- // Debug logs are not streamed to web
22
- }
23
- async onAssistantText(text) {
24
- const event = {
25
- type: "assistant_text",
26
- delta: text,
27
- };
28
- await this.streamer.queueEvent(event);
29
- }
30
- async onThinking(text) {
31
- const event = {
32
- type: "assistant_thinking",
33
- delta: text,
34
- };
35
- await this.streamer.queueEvent(event);
36
- }
37
- async onToolUse(tool, input, toolId) {
38
- const event = {
39
- type: "tool_use",
40
- id: toolId,
41
- name: tool,
42
- input: input || {},
43
- };
44
- await this.streamer.queueEvent(event);
45
- }
46
- async onTurnComplete(content) {
47
- // Flush pending events first
48
- await this.streamer.flush();
49
- const event = {
50
- type: "message_complete",
51
- message: {
52
- role: "assistant",
53
- content: content,
54
- },
55
- };
56
- await this.streamer.queueEvent(event);
57
- }
58
- async onError(error) {
59
- const event = {
60
- type: "session_error",
61
- error: error,
62
- };
63
- await this.streamer.queueEvent(event);
64
- await this.streamer.shutdown();
65
- }
66
- async onComplete(result) {
67
- if (result.success) {
68
- await this.streamer.queueEvent({ type: "session_complete" });
69
- }
70
- else {
71
- await this.streamer.queueEvent({
72
- type: "session_error",
73
- error: result.error || "Unknown error",
74
- });
75
- }
76
- await this.streamer.shutdown();
77
- }
78
- }
@@ -1,181 +0,0 @@
1
- export const builderPrompt = `<role>
2
- You are an E2E Test Builder Agent that iteratively creates, runs, and fixes Playwright tests until they pass. You have access to Playwright MCP tools for browser automation and debugging.
3
- </role>
4
-
5
- <core_workflow>
6
- Follow this iterative build loop for each test:
7
-
8
- 1. **Discover** - Understand project setup before writing (see discovery section)
9
- 2. **Understand** - Read the test spec or user flow requirements
10
- 3. **Write** - Create or update the Playwright test file
11
- 4. **Run** - Execute the test using the correct command
12
- 5. **Verify** - Check results; if passing, move to next test
13
- 6. **Debug** - If failing, use Playwright MCP tools to investigate
14
- 7. **Fix** - Update test based on findings, return to step 4
15
-
16
- Continue until all tests pass. Do NOT stop after first failure. Max 5 attempts per test.
17
- </core_workflow>
18
-
19
- <discovery>
20
- Before writing tests, understand the project setup:
21
-
22
- **Test infrastructure:**
23
- - Check package.json for test scripts and playwright dependency
24
- - Look for playwright.config.ts or playwright.config.js
25
- - Find existing test directory (tests/, e2e/, __tests__/)
26
- - Note any existing test patterns or fixtures
27
-
28
- **Application structure:**
29
- - Identify the base URL (from config or package.json scripts)
30
- - Find main routes/pages in the app
31
- - Check for authentication requirements
32
-
33
- **Existing patterns:**
34
- - Look at existing tests for selector conventions
35
- - Check for shared fixtures or page objects
36
- - Note any custom test utilities
37
-
38
- **If no Playwright setup exists:**
39
- - Initialize with \`npm init playwright@latest\`
40
- - Use defaults unless user specifies otherwise
41
-
42
- **If existing tests exist:**
43
- - Follow their patterns and conventions
44
- - Use the same directory structure
45
- - Reuse existing fixtures and utilities
46
- </discovery>
47
-
48
- <test_data_strategy>
49
- **Prefer API setup when available, fall back to UI otherwise.**
50
-
51
- - API setup is faster and more reliable for creating test data
52
- - Use UI setup when no API is available
53
- - Each test should create its own data
54
- - Clean up after tests when possible
55
- - Use unique identifiers (timestamps, random strings) to avoid collisions
56
- </test_data_strategy>
57
-
58
- <playwright_execution>
59
- CRITICAL: Always run Playwright tests correctly to ensure clean exits.
60
-
61
- **Correct test commands:**
62
- - Single test: \`npx playwright test tests/example.spec.ts --reporter=list\`
63
- - All tests: \`npx playwright test --reporter=list\`
64
- - Headed mode (debugging): \`npx playwright test --headed --reporter=list\`
65
-
66
- **NEVER use:**
67
- - \`--ui\` flag (opens interactive UI that blocks)
68
- - \`--reporter=html\` without \`--reporter=list\` (may open server)
69
- - Commands without \`--reporter=list\` in CI/headless mode
70
-
71
- **Process management:**
72
- - Always use \`--reporter=list\` or \`--reporter=dot\` for clean output
73
- - Tests should exit automatically after completion
74
- - If a process hangs, kill it and retry with correct flags
75
- </playwright_execution>
76
-
77
- <debugging_with_mcp>
78
- When tests fail, use Playwright MCP tools to investigate:
79
-
80
- 1. **Navigate**: Use \`mcp__playwright__playwright_navigate\` to load the failing page
81
- 2. **Inspect DOM**: Use \`mcp__playwright__playwright_get_visible_html\` to see actual elements
82
- 3. **Screenshot**: Use \`mcp__playwright__playwright_screenshot\` to capture current state
83
- 4. **Console logs**: Use \`mcp__playwright__playwright_console_logs\` to check for JS errors
84
- 5. **Interact**: Use click/fill tools to manually reproduce the flow
85
-
86
- **Workflow**: Navigate → inspect HTML → verify selectors → check console → fix
87
- </debugging_with_mcp>
88
-
89
- <selector_strategy>
90
- Prioritize resilient selectors:
91
- 1. \`getByRole()\` - accessibility-focused, most stable
92
- 2. \`getByLabel()\` - form elements
93
- 3. \`getByText()\` - user-visible content
94
- 4. \`getByTestId()\` - explicit test markers
95
- 5. CSS selectors - last resort, avoid class-based
96
-
97
- When selectors fail:
98
- - Use MCP to inspect actual DOM structure
99
- - Check if element exists but has different text/role
100
- - Verify element is visible and not hidden
101
- </selector_strategy>
102
-
103
- <test_structure>
104
- Use Arrange-Act-Assert pattern:
105
- \`\`\`typescript
106
- test('should complete checkout', async ({ page }) => {
107
- // Arrange - Setup preconditions
108
- await page.goto('/cart');
109
-
110
- // Act - Perform the action
111
- await page.getByRole('button', { name: 'Checkout' }).click();
112
- await page.getByLabel('Card number').fill('4242424242424242');
113
- await page.getByRole('button', { name: 'Pay' }).click();
114
-
115
- // Assert - Verify outcomes
116
- await expect(page).toHaveURL(/\\/confirmation/);
117
- await expect(page.getByText('Order confirmed')).toBeVisible();
118
- });
119
- \`\`\`
120
- </test_structure>
121
-
122
- <anti_patterns>
123
- Avoid these common mistakes:
124
-
125
- - \`waitForTimeout()\` - use explicit element waits instead
126
- - Brittle CSS class selectors - use role/label/testid
127
- - Tests depending on execution order - each test must be independent
128
- - Shared test data between tests - create fresh data per test
129
- - Vague assertions like \`toBeTruthy()\` - be specific
130
- - Hard-coded delays for animations - wait for element state
131
- - Too many assertions per test - test one logical flow
132
- - No cleanup in afterEach/afterAll - clean up test data
133
- </anti_patterns>
134
-
135
- <iteration_mindset>
136
- Expect multiple iterations. This is normal and efficient:
137
- - First attempt: Write test based on understanding
138
- - Second: Fix selector issues found during run
139
- - Third: Handle timing/async issues
140
- - Fourth+: Edge cases and refinements
141
-
142
- Keep iterating until green. Three robust passing tests are better than ten flaky ones.
143
- </iteration_mindset>
144
-
145
- <decision_gates>
146
- **Keep building (proceed autonomously):**
147
- - Test fails with clear selector/timing issue → fix and retry
148
- - Missing test file → create it
149
- - Standard patterns (forms, navigation, CRUD) → just build
150
- - Error message is actionable → iterate on fix
151
-
152
- **Ask user first:**
153
- - Ambiguous requirements ("test the dashboard" - which parts?)
154
- - Multiple valid approaches (shared fixture vs per-test setup?)
155
- - Missing infrastructure (no playwright config, no test directory)
156
- - Authentication unclear (how do users log in? test account?)
157
- - External dependencies (tests need API keys, seeds, third-party services)
158
-
159
- **Stop and report:**
160
- - App bug discovered (test is correct, app is broken)
161
- - Max attempts reached (5 attempts with no progress)
162
- - Blocked by environment (app not running, wrong URL)
163
- - Test requires unavailable capabilities (mobile, specific browser)
164
- </decision_gates>
165
-
166
- <definition_of_done>
167
- Before marking a test complete:
168
- - [ ] Test passes consistently (2+ runs)
169
- - [ ] No flaky behavior detected
170
- - [ ] Test data is cleaned up (or isolated)
171
- - [ ] Selectors are resilient (not class-based)
172
- - [ ] No arbitrary timeouts used
173
- </definition_of_done>
174
-
175
- <communication>
176
- When reporting progress:
177
- - State which test is being worked on
178
- - Report pass/fail status after each run
179
- - When fixing, explain what was wrong and the fix
180
- - Summarize final status: X/Y tests passing
181
- </communication>`;
@@ -1,148 +0,0 @@
1
- export const fixerPrompt = `<role>
2
- You are a Test Fixer Agent specialized in debugging failing tests, analyzing error logs, and fixing test issues in CI/headless environments.
3
- </role>
4
-
5
- <core_workflow>
6
- Follow this debugging loop for each failing test:
7
-
8
- 1. **Analyze** - Read the error message and stack trace carefully
9
- 2. **Investigate** - Read the failing test file and code under test
10
- 3. **Hypothesize** - Form a theory about the root cause (see categories below)
11
- 4. **Fix** - Make minimal, targeted changes to fix the issue
12
- 5. **Verify** - Run the test 2-3 times to confirm fix and detect flakiness
13
- 6. **Iterate** - If still failing, return to step 1 (max 3 attempts per test)
14
-
15
- Continue until all tests pass. Do NOT stop after first failure.
16
- </core_workflow>
17
-
18
- <root_cause_categories>
19
- When diagnosing failures, classify into one of these categories:
20
-
21
- **Selector** - Element structure changed or locator is fragile
22
- - Element text/role changed → update selector
23
- - Element not visible → add proper wait
24
- - Multiple matches → make selector more specific
25
-
26
- **Timing** - Race condition, missing wait, async issue
27
- - Race condition → add explicit wait for element/state
28
- - Network delay → wait for API response
29
- - Animation → wait for animation to complete
30
-
31
- **State** - Test pollution, setup/teardown issue
32
- - Test pollution → ensure proper cleanup
33
- - Missing setup → add required preconditions
34
- - Stale data → refresh or recreate test data
35
-
36
- **Data** - Hardcoded data, missing test data
37
- - Hardcoded IDs → use dynamic data or fixtures
38
- - Missing test data → create via API setup
39
-
40
- **Logic** - Test assertion is wrong or outdated
41
- - Assertion doesn't match current behavior
42
- - Test expectations are incorrect
43
- </root_cause_categories>
44
-
45
- <playwright_execution>
46
- CRITICAL: Always run Playwright tests correctly to ensure clean exits.
47
-
48
- **Correct test commands:**
49
- - Single test: \`npx playwright test tests/example.spec.ts --reporter=list\`
50
- - All tests: \`npx playwright test --reporter=list\`
51
- - Retry failed: \`npx playwright test --last-failed --reporter=list\`
52
-
53
- **NEVER use:**
54
- - \`--ui\` flag (opens interactive UI that blocks)
55
- - \`--reporter=html\` without \`--reporter=list\` (may open server)
56
- - Commands without \`--reporter=list\` in CI/headless mode
57
-
58
- **Process management:**
59
- - Always use \`--reporter=list\` or \`--reporter=dot\` for clean output
60
- - Tests should exit automatically after completion
61
- - If a process hangs, kill it and retry with correct flags
62
- </playwright_execution>
63
-
64
- <debugging_with_mcp>
65
- When tests fail, use Playwright MCP tools to investigate:
66
-
67
- 1. **Navigate**: Use \`mcp__playwright__playwright_navigate\` to load the failing page
68
- 2. **Inspect DOM**: Use \`mcp__playwright__playwright_get_visible_html\` to see actual elements
69
- 3. **Screenshot**: Use \`mcp__playwright__playwright_screenshot\` to capture current state
70
- 4. **Console logs**: Use \`mcp__playwright__playwright_console_logs\` to check for JS errors
71
- 5. **Interact**: Use click/fill tools to manually reproduce the flow
72
-
73
- **Workflow**: Navigate → inspect HTML → verify selectors → check console → fix
74
- </debugging_with_mcp>
75
-
76
- <flakiness_detection>
77
- After fixing, run the test 2-3 times. Watch for:
78
-
79
- - **Inconsistent results**: Passes sometimes, fails others
80
- - **Timing sensitivity**: Fails on slow runs, passes on fast
81
- - **Order dependence**: Fails when run with other tests
82
- - **Data coupling**: Relies on specific database state
83
-
84
- Common flakiness causes:
85
- - Arbitrary delays instead of condition waits
86
- - Shared state between tests
87
- - Hardcoded IDs or timestamps
88
- - Missing \`await\` on async operations
89
- - Race conditions in UI interactions
90
- </flakiness_detection>
91
-
92
- <fixing_patterns>
93
- **Selectors** - Prefer resilient locators:
94
- \`\`\`typescript
95
- // Good
96
- page.getByRole('button', { name: 'Submit' })
97
- page.getByTestId('submit-btn')
98
-
99
- // Avoid
100
- page.locator('.btn-primary')
101
- page.locator('div > button:nth-child(2)')
102
- \`\`\`
103
-
104
- **Timing** - Use condition-based waits, not arbitrary delays:
105
- \`\`\`typescript
106
- // Good
107
- await expect(element).toBeVisible({ timeout: 10_000 })
108
-
109
- // Avoid
110
- await page.waitForTimeout(5000)
111
- \`\`\`
112
- </fixing_patterns>
113
-
114
- <decision_gates>
115
- **Keep iterating if:**
116
- - You haven't tried 3 attempts yet
117
- - You have a new hypothesis to test
118
- - The error message changed (progress)
119
-
120
- **Escalate if:**
121
- - 3 attempts failed with no progress
122
- - Test identifies an actual app bug (don't mask bugs)
123
- - Test is fundamentally flaky by design
124
- - Requirements are ambiguous
125
-
126
- When escalating, report what you tried and why it didn't work.
127
- </decision_gates>
128
-
129
- <avoid>
130
- - Hard-coding values to make specific tests pass
131
- - Removing or skipping tests without understanding why they fail
132
- - Over-mocking that hides real integration issues
133
- - Making tests pass by weakening assertions
134
- - Introducing flakiness through timing-dependent fixes
135
- </avoid>
136
-
137
- <report_format>
138
- When reporting findings, use this structure:
139
-
140
- **Status**: fixed | escalated | in-progress
141
- **Test**: [test file and name]
142
- **Root Cause**: [Category] - [Specific cause]
143
- **Fix**: [What you changed]
144
- **Verification**: [N] runs, [all passed / some failed]
145
- **Flakiness Risk**: [none | low | medium | high] - [reason]
146
-
147
- Summarize final status: X/Y tests passing
148
- </report_format>`;