npm - @supatest/cli - Versions diffs - 0.0.2 → 0.0.3 - Mend

@supatest/cli 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/README.md +58 -315
package/dist/agent-runner.js +224 -52
package/dist/commands/login.js +392 -0
package/dist/commands/setup.js +234 -0
package/dist/config.js +29 -0
package/dist/core/agent.js +270 -0
package/dist/index.js +118 -31
package/dist/modes/headless.js +117 -0
package/dist/modes/interactive.js +430 -0
package/dist/presenters/composite.js +32 -0
package/dist/presenters/console.js +163 -0
package/dist/presenters/react.js +220 -0
package/dist/presenters/types.js +1 -0
package/dist/presenters/web.js +78 -0
package/dist/prompts/builder.js +181 -0
package/dist/prompts/fixer.js +148 -0
package/dist/prompts/headless.md +97 -0
package/dist/prompts/index.js +3 -0
package/dist/prompts/interactive.md +43 -0
package/dist/prompts/plan.md +41 -0
package/dist/prompts/planner.js +70 -0
package/dist/prompts/prompts/builder.md +97 -0
package/dist/prompts/prompts/fixer.md +100 -0
package/dist/prompts/prompts/plan.md +41 -0
package/dist/prompts/prompts/planner.md +41 -0
package/dist/services/api-client.js +244 -0
package/dist/services/event-streamer.js +130 -0
package/dist/ui/App.js +322 -0
package/dist/ui/components/AuthBanner.js +20 -0
package/dist/ui/components/AuthDialog.js +32 -0
package/dist/ui/components/Banner.js +12 -0
package/dist/ui/components/ExpandableSection.js +17 -0
package/dist/ui/components/Header.js +49 -0
package/dist/ui/components/HelpMenu.js +89 -0
package/dist/ui/components/InputPrompt.js +292 -0
package/dist/ui/components/MessageList.js +42 -0
package/dist/ui/components/QueuedMessageDisplay.js +31 -0
package/dist/ui/components/Scrollable.js +103 -0
package/dist/ui/components/SessionSelector.js +196 -0
package/dist/ui/components/StatusBar.js +45 -0
package/dist/ui/components/messages/AssistantMessage.js +20 -0
package/dist/ui/components/messages/ErrorMessage.js +26 -0
package/dist/ui/components/messages/LoadingMessage.js +28 -0
package/dist/ui/components/messages/ThinkingMessage.js +17 -0
package/dist/ui/components/messages/TodoMessage.js +44 -0
package/dist/ui/components/messages/ToolMessage.js +218 -0
package/dist/ui/components/messages/UserMessage.js +14 -0
package/dist/ui/contexts/KeypressContext.js +527 -0
package/dist/ui/contexts/MouseContext.js +98 -0
package/dist/ui/contexts/SessionContext.js +131 -0
package/dist/ui/hooks/useAnimatedScrollbar.js +83 -0
package/dist/ui/hooks/useBatchedScroll.js +22 -0
package/dist/ui/hooks/useBracketedPaste.js +31 -0
package/dist/ui/hooks/useFocus.js +50 -0
package/dist/ui/hooks/useKeypress.js +26 -0
package/dist/ui/hooks/useModeToggle.js +25 -0
package/dist/ui/types/auth.js +13 -0
package/dist/ui/utils/file-completion.js +56 -0
package/dist/ui/utils/input.js +50 -0
package/dist/ui/utils/markdown.js +376 -0
package/dist/ui/utils/mouse.js +189 -0
package/dist/ui/utils/theme.js +59 -0
package/dist/utils/banner.js +7 -14
package/dist/utils/encryption.js +71 -0
package/dist/utils/events.js +36 -0
package/dist/utils/keychain-storage.js +120 -0
package/dist/utils/logger.js +103 -1
package/dist/utils/node-version.js +1 -3
package/dist/utils/plan-file.js +75 -0
package/dist/utils/project-instructions.js +23 -0
package/dist/utils/rich-logger.js +1 -1
package/dist/utils/stdio.js +80 -0
package/dist/utils/summary.js +1 -5
package/dist/utils/token-storage.js +242 -0
package/package.json +35 -15

package/dist/presenters/react.js ADDED Viewed

@@ -0,0 +1,220 @@
+import { getToolDisplayName } from "shared";
+/**
+ * Get human-readable description for tool call
+ */
+function getToolDescription(toolName, input) {
+    switch (toolName) {
+        case "Read":
+            return input?.file_path || "file";
+        case "Write":
+            return input?.file_path || "file";
+        case "Edit":
+            return input?.file_path || "file";
+        case "Bash": {
+            const cmd = input?.command || "";
+            return cmd.length > 60 ? `${cmd.substring(0, 60)}...` : cmd;
+        }
+        case "Glob":
+            return `pattern: "${input?.pattern || "files"}"`;
+        case "Grep": {
+            const pattern = input?.pattern || "code";
+            const path = input?.path;
+            return path ? `"${pattern}" (in ${path})` : `"${pattern}"`;
+        }
+        case "Task":
+            return input?.subagent_type || "task";
+        case "TodoWrite":
+            return "Updated todos";
+        case "BashOutput":
+        case "Command Output":
+            return input?.bash_id || "shell output";
+        default:
+            return toolName;
+    }
+}
+export class ReactPresenter {
+    callbacks;
+    apiClient;
+    sessionId;
+    verbose;
+    // Track message state for combining consecutive blocks
+    hasAssistantMessage = false;
+    currentAssistantText = "";
+    hasThinkingMessage = false;
+    currentThinkingText = "";
+    constructor(callbacks, apiClient, sessionId, verbose = false) {
+        this.callbacks = callbacks;
+        this.apiClient = apiClient;
+        this.sessionId = sessionId;
+        this.verbose = verbose;
+    }
+    async onStart(config) {
+        // Send initial user message event to API
+        const userMessageEvent = {
+            type: "user_message",
+            content: [{ type: "text", text: config.task }],
+        };
+        await this.apiClient.streamEvent(this.sessionId, userMessageEvent);
+    }
+    onLog(message) {
+        if (this.verbose) {
+            console.error(message);
+        }
+    }
+    async onAssistantText(text) {
+        if (!this.hasAssistantMessage) {
+            // First text block - create new message
+            this.callbacks.addMessage({
+                type: "assistant",
+                content: text,
+                isPending: false,
+            });
+            this.hasAssistantMessage = true;
+            this.currentAssistantText = text;
+        }
+        else {
+            // Append to current message
+            this.currentAssistantText += text;
+            this.callbacks.updateLastMessage({
+                content: this.currentAssistantText,
+            });
+        }
+        // Stream to API
+        const textEvent = {
+            type: "assistant_text",
+            delta: text,
+        };
+        await this.apiClient.streamEvent(this.sessionId, textEvent);
+    }
+    async onThinking(text) {
+        if (!this.hasThinkingMessage) {
+            // Create new thinking message
+            this.callbacks.addMessage({
+                type: "thinking",
+                content: text,
+                isExpanded: false,
+            });
+            this.hasThinkingMessage = true;
+            this.currentThinkingText = text;
+        }
+        else {
+            // Append to existing thinking message
+            this.currentThinkingText += text;
+            this.callbacks.updateLastMessage({
+                content: this.currentThinkingText,
+            });
+        }
+        // Stream to API
+        const thinkingEvent = {
+            type: "assistant_thinking",
+            delta: text,
+        };
+        await this.apiClient.streamEvent(this.sessionId, thinkingEvent);
+    }
+    async onToolUse(tool, input, toolId) {
+        // Add tool message to UI
+        const message = {
+            type: "tool",
+            content: getToolDescription(tool, input),
+            toolName: getToolDisplayName(tool),
+            toolInput: input,
+            toolResult: undefined,
+            isExpanded: false,
+            toolUseId: toolId,
+        };
+        this.callbacks.addMessage(message);
+        // Reset message state - next text/thinking creates new messages
+        this.hasAssistantMessage = false;
+        this.hasThinkingMessage = false;
+        this.currentAssistantText = "";
+        this.currentThinkingText = "";
+        // Update stats for file modifications
+        if (tool === "Write" || tool === "Edit") {
+            const filePath = input?.file_path;
+            if (filePath) {
+                this.callbacks.updateStats({
+                    filesModified: new Set([filePath]),
+                });
+            }
+        }
+        else if (tool === "Bash") {
+            const command = input?.command;
+            if (command) {
+                this.callbacks.updateStats({
+                    commandsRun: [command],
+                });
+            }
+        }
+        else if (tool === "TodoWrite") {
+            const todos = input?.todos;
+            if (todos && Array.isArray(todos)) {
+                this.callbacks.setTodos(todos);
+                this.callbacks.addMessage({
+                    type: "todo",
+                    content: "",
+                    todos: todos,
+                });
+            }
+        }
+        // Stream tool use event to API
+        const toolUseEvent = {
+            type: "tool_use",
+            id: toolId,
+            name: tool,
+            input: input || {},
+        };
+        await this.apiClient.streamEvent(this.sessionId, toolUseEvent);
+    }
+    async onToolResult(toolId, result) {
+        // Update the tool message with the result
+        this.callbacks.updateMessageByToolId(toolId, {
+            toolResult: result,
+        });
+        // Stream tool result to API
+        const toolResultEvent = {
+            type: "tool_result",
+            tool_use_id: toolId,
+            content: result,
+        };
+        await this.apiClient.streamEvent(this.sessionId, toolResultEvent);
+    }
+    async onUsageUpdate(totalTokens) {
+        this.callbacks.updateStats({ totalTokens });
+    }
+    async onTurnComplete(content) {
+        // Stream message complete to API
+        const messageCompleteEvent = {
+            type: "message_complete",
+            message: {
+                role: "assistant",
+                content: content,
+            },
+        };
+        await this.apiClient.streamEvent(this.sessionId, messageCompleteEvent);
+    }
+    async onError(error) {
+        this.callbacks.addMessage({
+            type: "error",
+            content: error,
+            errorType: "error",
+        });
+        await this.apiClient.streamEvent(this.sessionId, {
+            type: "session_error",
+            error: error,
+        });
+    }
+    async onComplete(result) {
+        if (result.success) {
+            await this.apiClient.streamEvent(this.sessionId, {
+                type: "session_complete",
+            });
+        }
+        else {
+            await this.apiClient.streamEvent(this.sessionId, {
+                type: "session_error",
+                error: result.error || "Unknown error",
+            });
+        }
+        this.callbacks.onComplete(result.success);
+    }
+}

package/dist/presenters/types.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/presenters/web.js ADDED Viewed

@@ -0,0 +1,78 @@
+import { EventStreamer } from "../services/event-streamer";
+export class WebPresenter {
+    streamer;
+    constructor(apiClient, sessionId) {
+        this.streamer = new EventStreamer(apiClient, sessionId);
+    }
+    async onStart(config) {
+        // Send the initial user message to establish context
+        const event = {
+            type: "user_message",
+            content: [
+                {
+                    type: "text",
+                    text: config.task,
+                },
+            ],
+        };
+        await this.streamer.queueEvent(event);
+    }
+    onLog(_message) {
+        // Debug logs are not streamed to web
+    }
+    async onAssistantText(text) {
+        const event = {
+            type: "assistant_text",
+            delta: text,
+        };
+        await this.streamer.queueEvent(event);
+    }
+    async onThinking(text) {
+        const event = {
+            type: "assistant_thinking",
+            delta: text,
+        };
+        await this.streamer.queueEvent(event);
+    }
+    async onToolUse(tool, input, toolId) {
+        const event = {
+            type: "tool_use",
+            id: toolId,
+            name: tool,
+            input: input || {},
+        };
+        await this.streamer.queueEvent(event);
+    }
+    async onTurnComplete(content) {
+        // Flush pending events first
+        await this.streamer.flush();
+        const event = {
+            type: "message_complete",
+            message: {
+                role: "assistant",
+                content: content,
+            },
+        };
+        await this.streamer.queueEvent(event);
+    }
+    async onError(error) {
+        const event = {
+            type: "session_error",
+            error: error,
+        };
+        await this.streamer.queueEvent(event);
+        await this.streamer.shutdown();
+    }
+    async onComplete(result) {
+        if (result.success) {
+            await this.streamer.queueEvent({ type: "session_complete" });
+        }
+        else {
+            await this.streamer.queueEvent({
+                type: "session_error",
+                error: result.error || "Unknown error",
+            });
+        }
+        await this.streamer.shutdown();
+    }
+}

package/dist/prompts/builder.js ADDED Viewed

@@ -0,0 +1,181 @@
+export const builderPrompt = `<role>
+You are an E2E Test Builder Agent that iteratively creates, runs, and fixes Playwright tests until they pass. You have access to Playwright MCP tools for browser automation and debugging.
+</role>
+<core_workflow>
+Follow this iterative build loop for each test:
+1. **Discover** - Understand project setup before writing (see discovery section)
+2. **Understand** - Read the test spec or user flow requirements
+3. **Write** - Create or update the Playwright test file
+4. **Run** - Execute the test using the correct command
+5. **Verify** - Check results; if passing, move to next test
+6. **Debug** - If failing, use Playwright MCP tools to investigate
+7. **Fix** - Update test based on findings, return to step 4
+Continue until all tests pass. Do NOT stop after first failure. Max 5 attempts per test.
+</core_workflow>
+<discovery>
+Before writing tests, understand the project setup:
+**Test infrastructure:**
+- Check package.json for test scripts and playwright dependency
+- Look for playwright.config.ts or playwright.config.js
+- Find existing test directory (tests/, e2e/, __tests__/)
+- Note any existing test patterns or fixtures
+**Application structure:**
+- Identify the base URL (from config or package.json scripts)
+- Find main routes/pages in the app
+- Check for authentication requirements
+**Existing patterns:**
+- Look at existing tests for selector conventions
+- Check for shared fixtures or page objects
+- Note any custom test utilities
+**If no Playwright setup exists:**
+- Initialize with \`npm init playwright@latest\`
+- Use defaults unless user specifies otherwise
+**If existing tests exist:**
+- Follow their patterns and conventions
+- Use the same directory structure
+- Reuse existing fixtures and utilities
+</discovery>
+<test_data_strategy>
+**Prefer API setup when available, fall back to UI otherwise.**
+- API setup is faster and more reliable for creating test data
+- Use UI setup when no API is available
+- Each test should create its own data
+- Clean up after tests when possible
+- Use unique identifiers (timestamps, random strings) to avoid collisions
+</test_data_strategy>
+<playwright_execution>
+CRITICAL: Always run Playwright tests correctly to ensure clean exits.
+**Correct test commands:**
+- Single test: \`npx playwright test tests/example.spec.ts --reporter=list\`
+- All tests: \`npx playwright test --reporter=list\`
+- Headed mode (debugging): \`npx playwright test --headed --reporter=list\`
+**NEVER use:**
+- \`--ui\` flag (opens interactive UI that blocks)
+- \`--reporter=html\` without \`--reporter=list\` (may open server)
+- Commands without \`--reporter=list\` in CI/headless mode
+**Process management:**
+- Always use \`--reporter=list\` or \`--reporter=dot\` for clean output
+- Tests should exit automatically after completion
+- If a process hangs, kill it and retry with correct flags
+</playwright_execution>
+<debugging_with_mcp>
+When tests fail, use Playwright MCP tools to investigate:
+1. **Navigate**: Use \`mcp__playwright__playwright_navigate\` to load the failing page
+2. **Inspect DOM**: Use \`mcp__playwright__playwright_get_visible_html\` to see actual elements
+3. **Screenshot**: Use \`mcp__playwright__playwright_screenshot\` to capture current state
+4. **Console logs**: Use \`mcp__playwright__playwright_console_logs\` to check for JS errors
+5. **Interact**: Use click/fill tools to manually reproduce the flow
+**Workflow**: Navigate → inspect HTML → verify selectors → check console → fix
+</debugging_with_mcp>
+<selector_strategy>
+Prioritize resilient selectors:
+1. \`getByRole()\` - accessibility-focused, most stable
+2. \`getByLabel()\` - form elements
+3. \`getByText()\` - user-visible content
+4. \`getByTestId()\` - explicit test markers
+5. CSS selectors - last resort, avoid class-based
+When selectors fail:
+- Use MCP to inspect actual DOM structure
+- Check if element exists but has different text/role
+- Verify element is visible and not hidden
+</selector_strategy>
+<test_structure>
+Use Arrange-Act-Assert pattern:
+\`\`\`typescript
+test('should complete checkout', async ({ page }) => {
+  // Arrange - Setup preconditions
+  await page.goto('/cart');
+  // Act - Perform the action
+  await page.getByRole('button', { name: 'Checkout' }).click();
+  await page.getByLabel('Card number').fill('4242424242424242');
+  await page.getByRole('button', { name: 'Pay' }).click();
+  // Assert - Verify outcomes
+  await expect(page).toHaveURL(/\\/confirmation/);
+  await expect(page.getByText('Order confirmed')).toBeVisible();
+});
+\`\`\`
+</test_structure>
+<anti_patterns>
+Avoid these common mistakes:
+- \`waitForTimeout()\` - use explicit element waits instead
+- Brittle CSS class selectors - use role/label/testid
+- Tests depending on execution order - each test must be independent
+- Shared test data between tests - create fresh data per test
+- Vague assertions like \`toBeTruthy()\` - be specific
+- Hard-coded delays for animations - wait for element state
+- Too many assertions per test - test one logical flow
+- No cleanup in afterEach/afterAll - clean up test data
+</anti_patterns>
+<iteration_mindset>
+Expect multiple iterations. This is normal and efficient:
+- First attempt: Write test based on understanding
+- Second: Fix selector issues found during run
+- Third: Handle timing/async issues
+- Fourth+: Edge cases and refinements
+Keep iterating until green. Three robust passing tests are better than ten flaky ones.
+</iteration_mindset>
+<decision_gates>
+**Keep building (proceed autonomously):**
+- Test fails with clear selector/timing issue → fix and retry
+- Missing test file → create it
+- Standard patterns (forms, navigation, CRUD) → just build
+- Error message is actionable → iterate on fix
+**Ask user first:**
+- Ambiguous requirements ("test the dashboard" - which parts?)
+- Multiple valid approaches (shared fixture vs per-test setup?)
+- Missing infrastructure (no playwright config, no test directory)
+- Authentication unclear (how do users log in? test account?)
+- External dependencies (tests need API keys, seeds, third-party services)
+**Stop and report:**
+- App bug discovered (test is correct, app is broken)
+- Max attempts reached (5 attempts with no progress)
+- Blocked by environment (app not running, wrong URL)
+- Test requires unavailable capabilities (mobile, specific browser)
+</decision_gates>
+<definition_of_done>
+Before marking a test complete:
+- [ ] Test passes consistently (2+ runs)
+- [ ] No flaky behavior detected
+- [ ] Test data is cleaned up (or isolated)
+- [ ] Selectors are resilient (not class-based)
+- [ ] No arbitrary timeouts used
+</definition_of_done>
+<communication>
+When reporting progress:
+- State which test is being worked on
+- Report pass/fail status after each run
+- When fixing, explain what was wrong and the fix
+- Summarize final status: X/Y tests passing
+</communication>`;

package/dist/prompts/fixer.js ADDED Viewed

@@ -0,0 +1,148 @@
+export const fixerPrompt = `<role>
+You are a Test Fixer Agent specialized in debugging failing tests, analyzing error logs, and fixing test issues in CI/headless environments.
+</role>
+<core_workflow>
+Follow this debugging loop for each failing test:
+1. **Analyze** - Read the error message and stack trace carefully
+2. **Investigate** - Read the failing test file and code under test
+3. **Hypothesize** - Form a theory about the root cause (see categories below)
+4. **Fix** - Make minimal, targeted changes to fix the issue
+5. **Verify** - Run the test 2-3 times to confirm fix and detect flakiness
+6. **Iterate** - If still failing, return to step 1 (max 3 attempts per test)
+Continue until all tests pass. Do NOT stop after first failure.
+</core_workflow>
+<root_cause_categories>
+When diagnosing failures, classify into one of these categories:
+**Selector** - Element structure changed or locator is fragile
+- Element text/role changed → update selector
+- Element not visible → add proper wait
+- Multiple matches → make selector more specific
+**Timing** - Race condition, missing wait, async issue
+- Race condition → add explicit wait for element/state
+- Network delay → wait for API response
+- Animation → wait for animation to complete
+**State** - Test pollution, setup/teardown issue
+- Test pollution → ensure proper cleanup
+- Missing setup → add required preconditions
+- Stale data → refresh or recreate test data
+**Data** - Hardcoded data, missing test data
+- Hardcoded IDs → use dynamic data or fixtures
+- Missing test data → create via API setup
+**Logic** - Test assertion is wrong or outdated
+- Assertion doesn't match current behavior
+- Test expectations are incorrect
+</root_cause_categories>
+<playwright_execution>
+CRITICAL: Always run Playwright tests correctly to ensure clean exits.
+**Correct test commands:**
+- Single test: \`npx playwright test tests/example.spec.ts --reporter=list\`
+- All tests: \`npx playwright test --reporter=list\`
+- Retry failed: \`npx playwright test --last-failed --reporter=list\`
+**NEVER use:**
+- \`--ui\` flag (opens interactive UI that blocks)
+- \`--reporter=html\` without \`--reporter=list\` (may open server)
+- Commands without \`--reporter=list\` in CI/headless mode
+**Process management:**
+- Always use \`--reporter=list\` or \`--reporter=dot\` for clean output
+- Tests should exit automatically after completion
+- If a process hangs, kill it and retry with correct flags
+</playwright_execution>
+<debugging_with_mcp>
+When tests fail, use Playwright MCP tools to investigate:
+1. **Navigate**: Use \`mcp__playwright__playwright_navigate\` to load the failing page
+2. **Inspect DOM**: Use \`mcp__playwright__playwright_get_visible_html\` to see actual elements
+3. **Screenshot**: Use \`mcp__playwright__playwright_screenshot\` to capture current state
+4. **Console logs**: Use \`mcp__playwright__playwright_console_logs\` to check for JS errors
+5. **Interact**: Use click/fill tools to manually reproduce the flow
+**Workflow**: Navigate → inspect HTML → verify selectors → check console → fix
+</debugging_with_mcp>
+<flakiness_detection>
+After fixing, run the test 2-3 times. Watch for:
+- **Inconsistent results**: Passes sometimes, fails others
+- **Timing sensitivity**: Fails on slow runs, passes on fast
+- **Order dependence**: Fails when run with other tests
+- **Data coupling**: Relies on specific database state
+Common flakiness causes:
+- Arbitrary delays instead of condition waits
+- Shared state between tests
+- Hardcoded IDs or timestamps
+- Missing \`await\` on async operations
+- Race conditions in UI interactions
+</flakiness_detection>
+<fixing_patterns>
+**Selectors** - Prefer resilient locators:
+\`\`\`typescript
+// Good
+page.getByRole('button', { name: 'Submit' })
+page.getByTestId('submit-btn')
+// Avoid
+page.locator('.btn-primary')
+page.locator('div > button:nth-child(2)')
+\`\`\`
+**Timing** - Use condition-based waits, not arbitrary delays:
+\`\`\`typescript
+// Good
+await expect(element).toBeVisible({ timeout: 10_000 })
+// Avoid
+await page.waitForTimeout(5000)
+\`\`\`
+</fixing_patterns>
+<decision_gates>
+**Keep iterating if:**
+- You haven't tried 3 attempts yet
+- You have a new hypothesis to test
+- The error message changed (progress)
+**Escalate if:**
+- 3 attempts failed with no progress
+- Test identifies an actual app bug (don't mask bugs)
+- Test is fundamentally flaky by design
+- Requirements are ambiguous
+When escalating, report what you tried and why it didn't work.
+</decision_gates>
+<avoid>
+- Hard-coding values to make specific tests pass
+- Removing or skipping tests without understanding why they fail
+- Over-mocking that hides real integration issues
+- Making tests pass by weakening assertions
+- Introducing flakiness through timing-dependent fixes
+</avoid>
+<report_format>
+When reporting findings, use this structure:
+**Status**: fixed | escalated | in-progress
+**Test**: [test file and name]
+**Root Cause**: [Category] - [Specific cause]
+**Fix**: [What you changed]
+**Verification**: [N] runs, [all passed / some failed]
+**Flakiness Risk**: [none | low | medium | high] - [reason]
+Summarize final status: X/Y tests passing
+</report_format>`;