@supatest/cli 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,589 +0,0 @@
1
- import { createRequire } from "node:module";
2
- import { dirname, join } from "node:path";
3
- import { query } from "@anthropic-ai/claude-agent-sdk";
4
- import chalk from "chalk";
5
- import ora from "ora";
6
- import { config as envConfig } from "./config";
7
- import { ApiClient } from "./services/api-client";
8
- import { EventStreamer } from "./services/event-streamer";
9
- import { logger } from "./utils/logger";
10
- import { generateSummary } from "./utils/summary";
11
- const CLI_VERSION = "0.0.1";
12
- // Fun spinner messages that rotate randomly
13
- const SPINNER_MESSAGES = [
14
- "Brainstorming...",
15
- "Brewing coffee...",
16
- "Sipping espresso...",
17
- "Testing theories...",
18
- "Making magic...",
19
- "Multiplying matrices...",
20
- ];
21
- function getRandomSpinnerMessage() {
22
- return SPINNER_MESSAGES[Math.floor(Math.random() * SPINNER_MESSAGES.length)];
23
- }
24
- // Create shimmer effect frames that include the spinner icon
25
- function createShimmerFrames(text) {
26
- const frames = [];
27
- const baseText = text;
28
- // Ora's default dots spinner frames
29
- const spinnerFrames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
30
- // Create frames with moving highlight across the text
31
- for (let i = 0; i <= baseText.length; i++) {
32
- const spinnerIcon = spinnerFrames[i % spinnerFrames.length];
33
- const before = chalk.white(baseText.slice(0, i));
34
- const current = baseText[i] || '';
35
- const after = chalk.white(baseText.slice(i + 1));
36
- const shimmerText = before + chalk.cyan.bold(current) + after;
37
- frames.push(`${chalk.cyan(spinnerIcon)} ${shimmerText}`);
38
- }
39
- return frames;
40
- }
41
- export async function runAgent(config) {
42
- const stats = {
43
- startTime: Date.now(),
44
- iterations: 0,
45
- filesModified: new Set(),
46
- commandsRun: [],
47
- errors: [],
48
- };
49
- let claudeCodeStderr = "";
50
- logger.setVerbose(config.verbose);
51
- // Display metadata
52
- logger.raw("");
53
- // Get git branch if available
54
- let gitBranch = "";
55
- try {
56
- const { execSync } = await import("node:child_process");
57
- gitBranch = execSync("git rev-parse --abbrev-ref HEAD", {
58
- encoding: "utf8",
59
- stdio: ["pipe", "pipe", "ignore"]
60
- }).trim();
61
- }
62
- catch {
63
- // Not in a git repo or git not available
64
- }
65
- const metadataParts = [
66
- chalk.dim("Supatest AI ") + chalk.cyan(`v${CLI_VERSION}`),
67
- chalk.dim("Model: ") + chalk.cyan(envConfig.anthropicModelName),
68
- ];
69
- if (gitBranch) {
70
- metadataParts.push(chalk.dim("Branch: ") + chalk.cyan(gitBranch));
71
- }
72
- logger.raw(metadataParts.join(chalk.dim(" • ")));
73
- logger.divider();
74
- // Show environment info in verbose mode
75
- if (config.verbose) {
76
- logger.raw("");
77
- logger.debug("Environment & System Info:");
78
- // Node.js version
79
- logger.debug(` Node.js: ${process.version}`);
80
- // OS & Platform
81
- const os = await import("node:os");
82
- logger.debug(` Platform: ${os.platform()} ${os.arch()} (${os.type()} ${os.release()})`);
83
- // Working directory
84
- logger.debug(` Working Dir: ${process.cwd()}`);
85
- // Git status
86
- try {
87
- const { execSync } = await import("node:child_process");
88
- const gitStatus = execSync("git status --porcelain", {
89
- encoding: "utf8",
90
- stdio: ["pipe", "pipe", "ignore"]
91
- }).trim();
92
- const statusText = gitStatus ? "dirty (uncommitted changes)" : "clean";
93
- logger.debug(` Git Status: ${statusText}`);
94
- }
95
- catch {
96
- logger.debug(` Git Status: not a git repository`);
97
- }
98
- // Available disk space
99
- try {
100
- const fs = await import("node:fs");
101
- const stats = fs.statfsSync(process.cwd());
102
- const availableGB = ((stats.bavail * stats.bsize) / (1024 ** 3)).toFixed(2);
103
- logger.debug(` Available Disk: ${availableGB} GB`);
104
- }
105
- catch {
106
- logger.debug(` Available Disk: unable to determine`);
107
- }
108
- }
109
- logger.raw("");
110
- logger.raw("");
111
- logger.raw(chalk.white.bold("Task:") + " " + chalk.cyan(config.task));
112
- if (config.logs) {
113
- logger.info("Processing provided logs...");
114
- }
115
- logger.raw("");
116
- // Create session on backend and initialize event streaming
117
- const apiUrl = config.supatestApiUrl || "https://api.supatest.ai";
118
- const apiClient = new ApiClient(apiUrl, config.supatestApiKey);
119
- let sessionId;
120
- let webUrl;
121
- let eventStreamer;
122
- try {
123
- const session = await apiClient.createSession(config.task, {
124
- cliVersion: CLI_VERSION,
125
- cwd: process.cwd(),
126
- });
127
- sessionId = session.sessionId;
128
- webUrl = session.webUrl;
129
- eventStreamer = new EventStreamer(apiClient, sessionId);
130
- logger.raw("");
131
- logger.divider();
132
- logger.raw(chalk.white.bold("View session live: ") +
133
- chalk.cyan.underline(webUrl));
134
- logger.divider();
135
- logger.raw("");
136
- }
137
- catch (error) {
138
- logger.warn(`Failed to create session on backend: ${error.message}`);
139
- logger.warn("Continuing without web streaming...");
140
- }
141
- logger.raw("");
142
- // Initialize spinner variable (will be created on first agent turn)
143
- let spinner = null;
144
- // Resolve path to Claude Code executable
145
- let claudeCodePath;
146
- try {
147
- // Build the prompt
148
- let prompt = config.task;
149
- if (config.logs) {
150
- prompt = `${config.task}\n\nHere are the logs to analyze:\n\`\`\`\n${config.logs}\n\`\`\``;
151
- }
152
- const proxyUrl = config.supatestApiUrl || "https://api.supatest.ai";
153
- // Build base URL with session ID embedded in the path for message tracking
154
- // The proxy will extract the session ID from the path and lookup the message ID in Redis
155
- // Format: {proxyUrl}/v1/sessions/{sessionId}/anthropic
156
- let baseUrl = `${proxyUrl}/public`;
157
- if (sessionId) {
158
- baseUrl = `${proxyUrl}/v1/sessions/${sessionId}/anthropic`;
159
- if (config.verbose) {
160
- logger.debug(`Using session-based proxy URL: ${baseUrl}`);
161
- }
162
- }
163
- process.env.ANTHROPIC_BASE_URL = baseUrl;
164
- process.env.ANTHROPIC_API_KEY = config.supatestApiKey;
165
- if (config.verbose) {
166
- logger.debug(`Using Supatest proxy: ${proxyUrl}/public`);
167
- logger.debug(`Supatest API key: ${config.supatestApiKey?.substring(0, 15)}...`);
168
- }
169
- // Allow override via environment variable for testing/debugging
170
- if (envConfig.claudeCodeExecutablePath) {
171
- claudeCodePath = envConfig.claudeCodeExecutablePath;
172
- if (config.verbose) {
173
- logger.debug(`Using CLAUDE_CODE_EXECUTABLE_PATH: ${claudeCodePath}`);
174
- }
175
- }
176
- else {
177
- // Determine binary directory
178
- // For compiled binaries: same directory as the executable
179
- // For development: use the SDK's bundled cli.js from node_modules
180
- const isCompiledBinary = process.execPath && !process.execPath.includes("node");
181
- if (isCompiledBinary) {
182
- // Production: claude-code-cli.js should be next to the binary
183
- claudeCodePath = join(dirname(process.execPath), "claude-code-cli.js");
184
- if (config.verbose) {
185
- logger.debug(`Production mode: ${claudeCodePath}`);
186
- }
187
- }
188
- else {
189
- // Development: use SDK's cli.js from node_modules
190
- const require = createRequire(import.meta.url);
191
- const sdkPath = require.resolve("@anthropic-ai/claude-agent-sdk/sdk.mjs");
192
- claudeCodePath = join(dirname(sdkPath), "cli.js");
193
- if (config.verbose) {
194
- logger.debug(`Development mode: ${claudeCodePath}`);
195
- }
196
- }
197
- // Verify the file exists
198
- const fs = await import("node:fs/promises");
199
- try {
200
- await fs.access(claudeCodePath);
201
- if (config.verbose) {
202
- logger.debug(`✓ Claude Code CLI found: ${claudeCodePath}`);
203
- }
204
- }
205
- catch {
206
- throw new Error(`Claude Code executable not found at: ${claudeCodePath}\n` +
207
- "For compiled binaries, ensure claude-code-cli.js is in the same directory as the binary.\n" +
208
- "Set CLAUDE_CODE_EXECUTABLE_PATH environment variable to override.");
209
- }
210
- }
211
- let resultText = "";
212
- let hasError = false;
213
- // Log SDK configuration for debugging
214
- if (config.verbose) {
215
- logger.debug(`\nSDK Configuration:`);
216
- logger.debug(` Prompt: ${prompt.substring(0, 100)}${prompt.length > 100 ? '...' : ''}`);
217
- logger.debug(` Max turns: ${config.maxIterations}`);
218
- logger.debug(` Working directory: ${process.cwd()}`);
219
- logger.debug(` Model: ${envConfig.anthropicModelName}`);
220
- logger.debug(` Claude Code executable: ${claudeCodePath}`);
221
- logger.debug(` Supatest API Key: ${config.supatestApiKey?.substring(0, 10)}...`);
222
- logger.debug(` Environment ANTHROPIC_API_KEY: ${process.env.ANTHROPIC_API_KEY ? 'set' : 'not set'}`);
223
- }
224
- // Stream initial user message and capture assistant message ID for usage tracking
225
- let assistantMessageId;
226
- if (eventStreamer && sessionId) {
227
- const userMessageEvent = {
228
- type: "user_message",
229
- content: [{ type: "text", text: prompt }],
230
- };
231
- const eventResponse = await apiClient.streamEvent(sessionId, userMessageEvent);
232
- assistantMessageId = eventResponse.assistantMessageId;
233
- if (assistantMessageId && config.verbose) {
234
- logger.debug(`Assistant message ID for tracking: ${assistantMessageId}`);
235
- }
236
- }
237
- // Run the agent using the SDK
238
- // Note: The proxy will automatically look up the message ID via Redis using the API key
239
- const queryOptions = {
240
- maxTurns: config.maxIterations,
241
- cwd: process.cwd(),
242
- model: envConfig.anthropicModelName,
243
- permissionMode: "bypassPermissions",
244
- allowDangerouslySkipPermissions: true,
245
- pathToClaudeCodeExecutable: claudeCodePath,
246
- // Enable streaming delta events for real-time updates
247
- includePartialMessages: true,
248
- // Force Node.js runtime even when running from a Bun binary
249
- // The claude-code-cli.js is a large minified JS file that Bun can't parse correctly
250
- executable: 'node',
251
- // Explicitly pass environment variables to the subprocess
252
- env: {
253
- ...process.env,
254
- ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY || '',
255
- ANTHROPIC_BASE_URL: process.env.ANTHROPIC_BASE_URL || '',
256
- ANTHROPIC_AUTH_TOKEN: '', // Clear stored OAuth token
257
- CLAUDE_CODE_AUTH_TOKEN: '', // Clear any other auth tokens
258
- },
259
- stderr: (msg) => {
260
- claudeCodeStderr += msg + "\n";
261
- if (config.verbose) {
262
- logger.debug(`[Claude Code stderr] ${msg}`);
263
- }
264
- },
265
- };
266
- // Start initial spinner while waiting for first assistant message (skip in silent mode)
267
- if (!logger.isSilent()) {
268
- const message = getRandomSpinnerMessage();
269
- spinner = ora({
270
- spinner: {
271
- interval: 80,
272
- frames: createShimmerFrames(message),
273
- }
274
- });
275
- spinner.start();
276
- }
277
- for await (const msg of query({
278
- prompt,
279
- options: queryOptions,
280
- })) {
281
- if (config.verbose) {
282
- logger.debug(`Received SDK message: ${msg.type}`);
283
- }
284
- // Handle different message types
285
- if (msg.type === "assistant") {
286
- stats.iterations++;
287
- if (spinner) {
288
- spinner.stop();
289
- }
290
- // Extract text content and tool uses
291
- const content = msg.message.content;
292
- if (Array.isArray(content)) {
293
- for (const block of content) {
294
- if (block.type === "text") {
295
- logger.raw(block.text);
296
- resultText += block.text + "\n";
297
- // WORKAROUND: Since SDK doesn't emit stream_event with includePartialMessages,
298
- // send the complete text as an assistant_text event immediately
299
- if (eventStreamer && block.text) {
300
- const textEvent = {
301
- type: "assistant_text",
302
- delta: block.text,
303
- };
304
- await eventStreamer.queueEvent(textEvent);
305
- }
306
- }
307
- else if (block.type === "thinking") {
308
- // Send thinking blocks as well
309
- if (eventStreamer && block.thinking) {
310
- const thinkingEvent = {
311
- type: "assistant_thinking",
312
- delta: block.thinking,
313
- };
314
- await eventStreamer.queueEvent(thinkingEvent);
315
- }
316
- }
317
- else if (block.type === "tool_use") {
318
- const toolName = block.name;
319
- const input = block.input;
320
- // Stream tool use event
321
- if (eventStreamer) {
322
- const toolUseEvent = {
323
- type: "tool_use",
324
- id: block.id,
325
- name: toolName,
326
- input: input || {},
327
- };
328
- await eventStreamer.queueEvent(toolUseEvent);
329
- }
330
- // Display tool calls to user
331
- if (toolName === "Read") {
332
- const filePath = input?.file_path || 'file';
333
- logger.toolRead(filePath);
334
- }
335
- else if (toolName === "Write") {
336
- const filePath = input?.file_path;
337
- if (filePath) {
338
- stats.filesModified.add(filePath);
339
- logger.toolWrite(filePath);
340
- }
341
- }
342
- else if (toolName === "Edit") {
343
- const filePath = input?.file_path;
344
- if (filePath) {
345
- stats.filesModified.add(filePath);
346
- logger.toolEdit(filePath);
347
- }
348
- }
349
- else if (toolName === "Bash") {
350
- const command = input?.command;
351
- if (command) {
352
- stats.commandsRun.push(command);
353
- const shortCmd = command.length > 60 ? `${command.substring(0, 60)}...` : command;
354
- logger.toolBash(shortCmd);
355
- }
356
- }
357
- else if (toolName === "Glob") {
358
- logger.toolSearch("files", input?.pattern || '');
359
- }
360
- else if (toolName === "Grep") {
361
- logger.toolSearch("code", input?.pattern || '');
362
- }
363
- else if (toolName === "Task") {
364
- logger.toolAgent(input?.subagent_type || 'task');
365
- }
366
- else if (toolName === "TodoWrite") {
367
- const todos = input?.todos;
368
- if (Array.isArray(todos)) {
369
- logger.todoUpdate(todos);
370
- }
371
- else {
372
- logger.info("📝 Updated todos");
373
- }
374
- }
375
- }
376
- }
377
- }
378
- // Stream message_complete event with full content
379
- if (eventStreamer) {
380
- // Flush any pending delta events first to ensure they arrive before message_complete
381
- await eventStreamer.flush();
382
- const messageCompleteEvent = {
383
- type: "message_complete",
384
- message: {
385
- role: "assistant",
386
- content: content, // Cast to avoid type mismatch between SDK and shared types
387
- },
388
- };
389
- await eventStreamer.queueEvent(messageCompleteEvent);
390
- }
391
- logger.raw("");
392
- // Stop and clear previous spinner if it exists
393
- if (spinner) {
394
- spinner.stop();
395
- spinner.clear();
396
- }
397
- // Create a new spinner instance with a random message (skip in silent mode)
398
- if (!logger.isSilent()) {
399
- const message = getRandomSpinnerMessage();
400
- spinner = ora({
401
- spinner: {
402
- interval: 80,
403
- frames: createShimmerFrames(message),
404
- }
405
- });
406
- spinner.start();
407
- }
408
- }
409
- else if (msg.type === "stream_event") {
410
- // NOTE: This code path is currently not triggered due to an SDK issue with includePartialMessages
411
- // We've implemented a workaround above to send text immediately when assistant messages arrive
412
- // Keeping this code in case future SDK versions fix the streaming support
413
- const event = msg.event;
414
- if (event.type === "content_block_delta") {
415
- const delta = event.delta;
416
- if (delta.type === "text_delta" && eventStreamer) {
417
- const textDeltaEvent = {
418
- type: "assistant_text",
419
- delta: delta.text,
420
- };
421
- await eventStreamer.queueEvent(textDeltaEvent);
422
- }
423
- else if (delta.type === "thinking_delta" && eventStreamer) {
424
- const thinkingDeltaEvent = {
425
- type: "assistant_thinking",
426
- delta: delta.thinking,
427
- };
428
- await eventStreamer.queueEvent(thinkingDeltaEvent);
429
- }
430
- }
431
- }
432
- else if (msg.type === "tool_progress") {
433
- spinner.text = `Using ${msg.tool_name}... (${msg.elapsed_time_seconds.toFixed(1)}s)`;
434
- }
435
- else if (msg.type === "result") {
436
- spinner.stop();
437
- stats.iterations = msg.num_turns;
438
- if (msg.subtype === "success") {
439
- resultText = msg.result || resultText;
440
- }
441
- else {
442
- hasError = true;
443
- if ("errors" in msg && Array.isArray(msg.errors)) {
444
- stats.errors.push(...msg.errors);
445
- for (const error of msg.errors) {
446
- logger.error(error);
447
- }
448
- }
449
- }
450
- }
451
- }
452
- if (spinner) {
453
- spinner.stop();
454
- }
455
- stats.endTime = Date.now();
456
- // Complete usage tracking for this message turn
457
- if (assistantMessageId && apiClient) {
458
- try {
459
- await apiClient.completeUsage(assistantMessageId);
460
- }
461
- catch (error) {
462
- logger.warn(`Failed to complete usage tracking: ${error.message}`);
463
- // Don't fail the task if usage tracking fails
464
- }
465
- }
466
- // Generate result
467
- const result = {
468
- success: !hasError && stats.errors.length === 0,
469
- summary: resultText || "Task completed",
470
- filesModified: Array.from(stats.filesModified),
471
- iterations: stats.iterations,
472
- error: stats.errors.length > 0 ? stats.errors.join("; ") : undefined,
473
- };
474
- // Stream session completion or error
475
- if (eventStreamer) {
476
- if (result.success) {
477
- await eventStreamer.queueEvent({ type: "session_complete" });
478
- }
479
- else {
480
- await eventStreamer.queueEvent({
481
- type: "session_error",
482
- error: result.error || "Unknown error",
483
- });
484
- }
485
- // Flush and shutdown event streamer
486
- await eventStreamer.shutdown();
487
- }
488
- // Print summary
489
- const summaryText = generateSummary(stats, result, config.verbose);
490
- logger.raw(summaryText);
491
- // Display web URL again at the end if available
492
- if (webUrl) {
493
- logger.raw("");
494
- logger.divider();
495
- logger.raw(chalk.white.bold("Continue on web: ") +
496
- chalk.cyan.underline(webUrl));
497
- logger.divider();
498
- }
499
- return result;
500
- }
501
- catch (error) {
502
- if (spinner) {
503
- spinner.stop();
504
- }
505
- stats.endTime = Date.now();
506
- const errorMessage = error instanceof Error ? error.message : String(error);
507
- // Check if this is a Claude Code process error and extract details
508
- const isProcessError = errorMessage.includes("Claude Code process exited");
509
- let exitCode;
510
- let stderr;
511
- let stdout;
512
- // Try to extract exit code from error message (e.g., "exited with code 1")
513
- const exitCodeMatch = errorMessage.match(/exited with code (\d+)/i);
514
- if (exitCodeMatch) {
515
- exitCode = Number.parseInt(exitCodeMatch[1], 10);
516
- }
517
- if (error && typeof error === "object") {
518
- // Try to extract exit code and process output from error object
519
- if ("exitCode" in error && typeof error.exitCode === "number") {
520
- exitCode = error.exitCode;
521
- }
522
- if ("code" in error && typeof error.code === "number") {
523
- exitCode = error.code;
524
- }
525
- if ("stderr" in error) {
526
- stderr = String(error.stderr);
527
- }
528
- if ("stdout" in error) {
529
- stdout = String(error.stdout);
530
- }
531
- }
532
- logger.error(`Fatal error: ${errorMessage}`);
533
- // Show captured stderr if available
534
- if (claudeCodeStderr && claudeCodeStderr.trim()) {
535
- logger.error("\nClaude Code stderr output:");
536
- logger.error(claudeCodeStderr);
537
- }
538
- if (isProcessError) {
539
- if (exitCode !== undefined) {
540
- logger.error(`Claude Code process exited with code ${exitCode}`);
541
- }
542
- else {
543
- logger.error("Claude Code process exited with an error");
544
- }
545
- // Provide helpful guidance for process errors
546
- // Show guidance for exit code 1 or if exit code is unknown
547
- if (exitCode === 1 || exitCode === undefined) {
548
- logger.warn("\nPossible causes:");
549
- logger.warn(" • Task may not be actionable (Claude Code works with code-related tasks)");
550
- logger.warn(" • Task description may be too vague or not code-focused");
551
- logger.warn(" • API authentication or rate limiting issue");
552
- logger.warn("\nTry tasks like:");
553
- logger.warn(" • 'Fix the failing tests in calculator.test.js'");
554
- logger.warn(" • 'Add error handling to the divide function'");
555
- logger.warn(" • 'Create a new file hello.js with a greeting function'");
556
- logger.warn(" • 'Update imports to use the new package structure'");
557
- }
558
- }
559
- if (config.verbose && error instanceof Error && error.stack) {
560
- logger.error(error.stack);
561
- }
562
- const result = {
563
- success: false,
564
- summary: `Failed: ${errorMessage}`,
565
- filesModified: Array.from(stats.filesModified),
566
- iterations: stats.iterations,
567
- error: errorMessage,
568
- };
569
- // Stream session error and shutdown event streamer
570
- if (eventStreamer) {
571
- await eventStreamer.queueEvent({
572
- type: "session_error",
573
- error: errorMessage,
574
- });
575
- await eventStreamer.shutdown();
576
- }
577
- const summaryText = generateSummary(stats, result, config.verbose);
578
- logger.raw(summaryText);
579
- // Display web URL at the end if available
580
- if (webUrl) {
581
- logger.raw("");
582
- logger.divider();
583
- logger.raw(chalk.white.bold("View session: ") +
584
- chalk.cyan.underline(webUrl));
585
- logger.divider();
586
- }
587
- return result;
588
- }
589
- }
@@ -1,97 +0,0 @@
1
- <role>
2
- You are an E2E Test Builder Agent that iteratively creates, runs, and fixes Playwright tests until they pass. You have access to Playwright MCP tools for browser automation and debugging.
3
- </role>
4
-
5
- <core_workflow>
6
- Follow this iterative build loop for each test:
7
-
8
- 1. **Understand** - Read the test spec or user flow requirements
9
- 2. **Write** - Create or update the Playwright test file
10
- 3. **Run** - Execute the test using the correct command
11
- 4. **Verify** - Check results; if passing, move to next test
12
- 5. **Debug** - If failing, use Playwright MCP tools to investigate
13
- 6. **Fix** - Update test based on findings, return to step 3
14
-
15
- Continue until all tests pass. Do NOT stop after first failure.
16
- </core_workflow>
17
-
18
- <playwright_execution>
19
- CRITICAL: Always run Playwright tests correctly to ensure clean exits.
20
-
21
- **Correct test commands:**
22
- - Single test: `npx playwright test tests/example.spec.ts --reporter=list`
23
- - All tests: `npx playwright test --reporter=list`
24
- - Headed mode (debugging): `npx playwright test --headed --reporter=list`
25
-
26
- **NEVER use:**
27
- - `--ui` flag (opens interactive UI that blocks)
28
- - `--reporter=html` without `--reporter=list` (may open server)
29
- - Commands without `--reporter=list` in CI/headless mode
30
-
31
- **Process management:**
32
- - Always use `--reporter=list` or `--reporter=dot` for clean output
33
- - Tests should exit automatically after completion
34
- - If a process hangs, kill it and retry with correct flags
35
- </playwright_execution>
36
-
37
- <debugging_with_mcp>
38
- When tests fail, use Playwright MCP tools to investigate:
39
-
40
- 1. **Navigate**: Use `mcp__playwright__playwright_navigate` to load the failing page
41
- 2. **Inspect DOM**: Use `mcp__playwright__playwright_get_visible_html` to see actual elements
42
- 3. **Screenshot**: Use `mcp__playwright__playwright_screenshot` to capture current state
43
- 4. **Console logs**: Use `mcp__playwright__playwright_console_logs` to check for JS errors
44
- 5. **Interact**: Use click/fill tools to manually reproduce the flow
45
-
46
- This visual debugging helps identify:
47
- - Missing or changed selectors
48
- - Timing issues (element not ready)
49
- - Network/API failures
50
- - JavaScript errors preventing interactions
51
- </debugging_with_mcp>
52
-
53
- <selector_strategy>
54
- Prioritize resilient selectors:
55
- 1. `getByRole()` - accessibility-focused, most stable
56
- 2. `getByLabel()` - form elements
57
- 3. `getByText()` - user-visible content
58
- 4. `getByTestId()` - explicit test markers
59
- 5. CSS selectors - last resort, avoid class-based
60
-
61
- When selectors fail:
62
- - Use MCP to inspect actual DOM structure
63
- - Check if element exists but has different text/role
64
- - Verify element is visible and not hidden
65
- </selector_strategy>
66
-
67
- <test_quality>
68
- Write production-ready tests:
69
- - **Arrange-Act-Assert** structure for clarity
70
- - **Explicit waits** over arbitrary timeouts
71
- - **Independent tests** that don't share state
72
- - **Meaningful assertions** that verify outcomes
73
-
74
- Avoid:
75
- - `waitForTimeout()` - use explicit element waits
76
- - Brittle selectors based on CSS classes
77
- - Tests that depend on execution order
78
- - Vague assertions like `toBeTruthy()`
79
- </test_quality>
80
-
81
- <iteration_mindset>
82
- Expect multiple iterations. This is normal and efficient:
83
- - First attempt: Write test based on understanding
84
- - Second: Fix selector issues found during run
85
- - Third: Handle timing/async issues
86
- - Fourth+: Edge cases and refinements
87
-
88
- Keep iterating until green. Three robust passing tests are better than ten flaky ones.
89
- </iteration_mindset>
90
-
91
- <communication>
92
- When reporting progress:
93
- - State which test is being worked on
94
- - Report pass/fail status after each run
95
- - When fixing, explain what was wrong and the fix
96
- - Summarize final status: X/Y tests passing
97
- </communication>