npm - wiggum-cli - Versions diffs - 0.17.2 → 0.18.3 - Mend

wiggum-cli 0.17.2 → 0.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/README.md +58 -14
package/dist/agent/orchestrator.d.ts +21 -3
package/dist/agent/orchestrator.js +394 -187
package/dist/agent/resolve-config.js +1 -1
package/dist/agent/scheduler.d.ts +29 -0
package/dist/agent/scheduler.js +1149 -0
package/dist/agent/tools/backlog.d.ts +6 -0
package/dist/agent/tools/backlog.js +23 -4
package/dist/agent/tools/execution.js +1 -1
package/dist/agent/tools/introspection.js +26 -4
package/dist/agent/types.d.ts +113 -0
package/dist/ai/conversation/url-fetcher.js +46 -13
package/dist/ai/enhancer.js +1 -2
package/dist/ai/providers.js +4 -4
package/dist/commands/agent.d.ts +1 -0
package/dist/commands/agent.js +53 -1
package/dist/commands/config.js +100 -6
package/dist/commands/run.d.ts +2 -0
package/dist/commands/run.js +47 -2
package/dist/commands/sync.js +2 -2
package/dist/generator/config.js +13 -2
package/dist/index.js +11 -3
package/dist/repl/command-parser.d.ts +1 -1
package/dist/repl/command-parser.js +1 -1
package/dist/templates/config/ralph.config.cjs.tmpl +9 -2
package/dist/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
package/dist/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
package/dist/templates/prompts/PROMPT_feature.md.tmpl +12 -98
package/dist/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
package/dist/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
package/dist/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
package/dist/templates/prompts/PROMPT_verify.md.tmpl +7 -0
package/dist/templates/root/README.md.tmpl +2 -3
package/dist/templates/scripts/feature-loop.sh.tmpl +835 -93
package/dist/templates/scripts/loop.sh.tmpl +5 -1
package/dist/templates/scripts/ralph-monitor.sh.tmpl +0 -2
package/dist/tui/app.d.ts +5 -1
package/dist/tui/app.js +22 -3
package/dist/tui/components/HeaderContent.d.ts +4 -1
package/dist/tui/components/HeaderContent.js +4 -2
package/dist/tui/hooks/useAgentOrchestrator.d.ts +2 -1
package/dist/tui/hooks/useAgentOrchestrator.js +86 -33
package/dist/tui/hooks/useInit.d.ts +5 -1
package/dist/tui/hooks/useInit.js +20 -2
package/dist/tui/screens/AgentScreen.js +3 -1
package/dist/tui/screens/InitScreen.js +12 -1
package/dist/tui/screens/MainShell.js +70 -6
package/dist/tui/screens/RunScreen.d.ts +6 -2
package/dist/tui/screens/RunScreen.js +48 -6
package/dist/tui/utils/loop-status.d.ts +15 -0
package/dist/tui/utils/loop-status.js +89 -27
package/dist/tui/utils/polishGoal.js +14 -1
package/dist/utils/config.d.ts +7 -0
package/dist/utils/config.js +14 -0
package/dist/utils/env.js +7 -1
package/dist/utils/github.d.ts +13 -0
package/dist/utils/github.js +63 -4
package/dist/utils/logger.js +1 -1
package/package.json +9 -7
package/src/templates/config/ralph.config.cjs.tmpl +9 -2
package/src/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
package/src/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
package/src/templates/prompts/PROMPT_feature.md.tmpl +12 -98
package/src/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
package/src/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
package/src/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
package/src/templates/prompts/PROMPT_verify.md.tmpl +7 -0
package/src/templates/root/README.md.tmpl +2 -3
package/src/templates/scripts/feature-loop.sh.tmpl +835 -93
package/src/templates/scripts/loop.sh.tmpl +5 -1
package/src/templates/scripts/ralph-monitor.sh.tmpl +0 -2

package/dist/commands/run.js CHANGED Viewed

@@ -3,11 +3,34 @@
  * Executes the feature development loop for a specific feature
  */
 import { spawn } from 'node:child_process';
-import { existsSync } from 'node:fs';
+import { existsSync, readFileSync } from 'node:fs';
 import { join, dirname } from 'node:path';
 import { logger } from '../utils/logger.js';
 import { loadConfigWithDefaults, hasConfig, } from '../utils/config.js';
 import pc from 'picocolors';
+const SUPPORTED_LOOP_CLIS = ['claude', 'codex'];
+const DEFAULT_CODEX_LOOP_MODEL = 'gpt-5.3-codex';
+function isSupportedLoopCli(value) {
+    return SUPPORTED_LOOP_CLIS.includes(value);
+}
+function scriptSupportsCliFlags(scriptPath) {
+    try {
+        const script = readFileSync(scriptPath, 'utf-8');
+        return script.includes('--cli') && script.includes('--review-cli');
+    }
+    catch {
+        return false;
+    }
+}
+function getModelDisplayLabel(modelOverride, codingCli, reviewCli, config) {
+    if (modelOverride)
+        return modelOverride;
+    if (codingCli === 'codex' && reviewCli === 'codex')
+        return DEFAULT_CODEX_LOOP_MODEL;
+    if (codingCli === 'claude' && reviewCli === 'claude')
+        return config.loop.defaultModel;
+    return `${config.loop.defaultModel} (claude) / ${DEFAULT_CODEX_LOOP_MODEL} (codex)`;
+}
 /**
  * Find the feature-loop.sh script
  * Checks: 1) .ralph/scripts/ 2) ralph/ (parent ralph repo)
@@ -120,6 +143,26 @@ export async function runCommand(feature, options = {}) {
     if (options.model) {
         args.push('--model', options.model);
     }
+    // Resolve and validate coding CLI
+    const codingCli = options.cli ?? config.loop.codingCli ?? 'claude';
+    if (!isSupportedLoopCli(codingCli)) {
+        logger.error(`Invalid CLI '${codingCli}'. Allowed values are 'claude' or 'codex'.`);
+        process.exit(1);
+    }
+    // Resolve and validate review CLI
+    const reviewCli = options.reviewCli ?? config.loop.reviewCli ?? codingCli;
+    if (!isSupportedLoopCli(reviewCli)) {
+        logger.error(`Invalid review CLI '${reviewCli}'. Allowed values are 'claude' or 'codex'.`);
+        process.exit(1);
+    }
+    // Guard against stale generated scripts that don't support CLI flags.
+    if ((codingCli !== 'claude' || reviewCli !== 'claude') && !scriptSupportsCliFlags(scriptPath)) {
+        logger.error('The current feature-loop.sh does not support --cli/--review-cli flags.');
+        logger.info('Regenerate scripts with "wiggum init" (or re-run /init), then retry.');
+        process.exit(1);
+    }
+    args.push('--cli', codingCli);
+    args.push('--review-cli', reviewCli);
     // Resolve and validate reviewMode
     const reviewMode = options.reviewMode ?? config.loop.reviewMode ?? 'manual';
     if (reviewMode !== 'manual' && reviewMode !== 'auto' && reviewMode !== 'merge') {
@@ -133,7 +176,9 @@ export async function runCommand(feature, options = {}) {
     console.log(`  Spec: ${specFile ?? '(on feature branch)'}`);
     console.log(`  Max Iterations: ${maxIterations}`);
     console.log(`  Max E2E Attempts: ${maxE2eAttempts}`);
-    console.log(`  Model: ${options.model || config.loop.defaultModel}`);
+    console.log(`  Model: ${getModelDisplayLabel(options.model, codingCli, reviewCli, config)}`);
+    console.log(`  Implementation CLI: ${codingCli}`);
+    console.log(`  Review CLI: ${reviewCli}`);
     console.log(`  Review Mode: ${reviewMode}`);
     console.log(`  Worktree: ${options.worktree ? 'enabled' : 'disabled'}`);
     console.log(`  Resume: ${options.resume ? 'enabled' : 'disabled'}`);

package/dist/commands/sync.js CHANGED Viewed

@@ -18,7 +18,7 @@ export async function syncProjectContext(projectRoot) {
     // Detect provider
     const provider = getAvailableProvider();
     if (!provider) {
-        throw new Error('No AI provider available. Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or OPENROUTER_API_KEY.');
+        throw new Error('No AI provider available. Configure a supported provider API key and try again.');
     }
     // Resolve model
     const recommendedModel = AVAILABLE_MODELS[provider].find((m) => m.hint?.includes('recommended'));
@@ -59,7 +59,7 @@ export async function syncCommand() {
         contextPath = await syncProjectContext(process.cwd());
     }
     catch (err) {
-        console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
+        logger.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
         process.exit(1);
         return; // unreachable, but satisfies TS control flow
     }

package/dist/generator/config.js CHANGED Viewed

@@ -3,13 +3,17 @@
  * Generates ralph.config.cjs file from scan results
  */
 import { extractVariables } from './templates.js';
+const DEFAULT_CODEX_MODEL = 'gpt-5.3-codex';
 /**
  * Generate ralph config object from scan result
  */
 export function generateConfig(scanResult, customVars = {}) {
     const vars = extractVariables(scanResult, customVars);
-    const defaultModel = customVars.defaultModel || 'sonnet';
-    const planningModel = customVars.planningModel || 'opus';
+    const codingCli = customVars.codingCli === 'codex' ? 'codex' : 'claude';
+    const reviewCli = customVars.reviewCli === 'codex' ? 'codex' : codingCli;
+    const codexEverywhere = codingCli === 'codex' && reviewCli === 'codex';
+    const defaultModel = customVars.defaultModel || (codexEverywhere ? 'gpt-5.3-codex' : 'sonnet');
+    const planningModel = customVars.planningModel || (codexEverywhere ? 'gpt-5.3-codex' : 'opus');
     const agentProvider = customVars.agentProvider || 'anthropic';
     const agentModel = customVars.agentModel || 'claude-sonnet-4-6';
     return {
@@ -48,7 +52,14 @@ export function generateConfig(scanResult, customVars = {}) {
             maxE2eAttempts: 5,
             defaultModel,
             planningModel,
+            codexModel: DEFAULT_CODEX_MODEL,
+            codingCli,
+            reviewCli,
             reviewMode: 'manual',
+            claudePermissionMode: 'default',
+            codexSandbox: 'workspace-write',
+            codexApprovalPolicy: 'never',
+            disableMcpInAutomatedRuns: true,
         },
         agent: {
             defaultProvider: agentProvider,

package/dist/index.js CHANGED Viewed

@@ -38,6 +38,8 @@ export function parseCliArgs(argv) {
     // Flags that consume the next argument as their value
     const valueFlagSet = new Set([
         '--model',
+        '--cli',
+        '--review-cli',
         '--max-iterations',
         '--max-e2e-attempts',
         '--interval',
@@ -233,6 +235,8 @@ Options for run:
   --worktree                Use git worktree isolation
   --resume                  Resume from last checkpoint
   --model <model>           AI model to use
+  --cli <cli>               Implementation CLI: claude, codex
+  --review-cli <cli>        Review CLI: claude, codex
   --max-iterations <n>      Maximum loop iterations
   --max-e2e-attempts <n>    Maximum E2E test attempts
   --review-mode <mode>      Review mode: manual, auto, merge
@@ -266,6 +270,7 @@ Options for agent:
   --review-mode <mode>      Review mode: 'manual', 'auto', or 'merge' (default: manual)
   --dry-run                 Plan what would be done without executing
   --stream                  Stream output in real-time (default: wait for completion)
+  --diagnose-gh             Run GitHub connectivity checks for agent backlog access
 In the TUI:
   /init                     Initialize or reconfigure project
@@ -338,13 +343,15 @@ Press Esc to cancel any operation.
             const feature = parsed.positionalArgs[0];
             if (!feature) {
                 console.error('Error: <feature> is required for "run"');
-                console.error('Usage: wiggum run <feature> [--worktree] [--resume] [--model <model>] [--max-iterations <n>] [--max-e2e-attempts <n>]');
+                console.error('Usage: wiggum run <feature> [--worktree] [--resume] [--model <model>] [--cli <claude|codex>] [--review-cli <claude|codex>] [--max-iterations <n>] [--max-e2e-attempts <n>]');
                 process.exit(1);
             }
             const runOptions = {
                 worktree: parsed.flags.worktree === true,
                 resume: parsed.flags.resume === true,
                 model: typeof parsed.flags.model === 'string' ? parsed.flags.model : undefined,
+                cli: typeof parsed.flags.cli === 'string' ? parsed.flags.cli : undefined,
+                reviewCli: typeof parsed.flags.reviewCli === 'string' ? parsed.flags.reviewCli : undefined,
                 maxIterations: typeof parsed.flags.maxIterations === 'string' ? parseIntFlag(parsed.flags.maxIterations, '--max-iterations') : undefined,
                 maxE2eAttempts: typeof parsed.flags.maxE2eAttempts === 'string' ? parseIntFlag(parsed.flags.maxE2eAttempts, '--max-e2e-attempts') : undefined,
                 reviewMode: typeof parsed.flags.reviewMode === 'string' ? parsed.flags.reviewMode : undefined,
@@ -425,9 +432,10 @@ Press Esc to cancel any operation.
                 reviewMode: reviewModeFlag,
                 dryRun: parsed.flags.dryRun === true,
                 stream: parsed.flags.stream === true,
+                diagnoseGh: parsed.flags.diagnoseGh === true,
             };
-            if (agentOpts.stream === true) {
-                // Explicit --stream: always headless
+            if (agentOpts.stream === true || agentOpts.diagnoseGh === true) {
+                // Explicit headless modes
                 const { agentCommand } = await import('./commands/agent.js');
                 await agentCommand(agentOpts);
             }

package/dist/repl/command-parser.d.ts CHANGED Viewed

@@ -61,7 +61,7 @@ export declare const REPL_COMMANDS: {
     };
     readonly agent: {
         readonly description: "Start the autonomous backlog agent";
-        readonly usage: "/agent [--dry-run] [--max-items <n>]";
+        readonly usage: "/agent [--dry-run] [--max-items <n>] [--max-steps <n>] [--review-mode manual|auto|merge] [--labels <l1,l2>] [--issues <n1,n2,...>]";
         readonly aliases: readonly ["a"];
     };
     readonly config: {

package/dist/repl/command-parser.js CHANGED Viewed

@@ -38,7 +38,7 @@ export const REPL_COMMANDS = {
     },
     agent: {
         description: 'Start the autonomous backlog agent',
-        usage: '/agent [--dry-run] [--max-items <n>]',
+        usage: '/agent [--dry-run] [--max-items <n>] [--max-steps <n>] [--review-mode manual|auto|merge] [--labels <l1,l2>] [--issues <n1,n2,...>]',
         aliases: ['a'],
     },
     config: {

package/dist/templates/config/ralph.config.cjs.tmpl CHANGED Viewed

@@ -32,8 +32,15 @@ module.exports = {
   loop: {
     maxIterations: 10,
     maxE2eAttempts: 5,
-    defaultModel: 'sonnet',
-    planningModel: 'opus',
+    defaultModel: 'sonnet', // Claude implementation/e2e default
+    planningModel: 'opus',  // Claude planning/review default
+    codexModel: 'gpt-5.3-codex', // Codex model default for codex phases
+    codingCli: 'claude',   // Implementation CLI: 'claude' | 'codex'
+    reviewCli: 'claude',   // Review CLI: 'claude' | 'codex'
     reviewMode: 'manual', // 'manual' = stop at PR, 'auto' = review (no merge), 'merge' = review + auto-merge
+    claudePermissionMode: 'default', // Claude permission mode: default|auto|dontAsk|acceptEdits|plan|bypassPermissions
+    codexSandbox: 'workspace-write', // Codex sandbox: read-only|workspace-write|danger-full-access
+    codexApprovalPolicy: 'never', // Codex approvals: untrusted|on-failure|on-request|never
+    disableMcpInAutomatedRuns: true, // Disable MCP servers when RALPH_AUTOMATED=1
   },
 };

package/dist/templates/prompts/PROMPT_e2e.md.tmpl CHANGED Viewed

@@ -28,8 +28,14 @@ Check the bridge is running:
 ```bash
 curl -s http://localhost:3999/health || (cd {{projectRoot}} && npm run e2e:bridge &)
 sleep 3
+curl -s http://localhost:3999/health
 ```
+If bridge health still fails (for example `listen EPERM` / socket permission errors), do not keep retrying commands in this run:
+- Mark each unchecked `- [ ] E2E:` scenario as `- [ ] E2E: ... - FAILED: bridge unavailable in sandbox`
+- Add the concrete error text under each failed scenario
+- Continue to Step 4 and record a clear blocked summary
 ### Step 2: Parse E2E Test Scenarios
 Read E2E test scenarios from @.ralph/specs/$FEATURE-implementation-plan.md.
 Each scenario is marked with `- [ ] E2E:` prefix and follows this format:
@@ -88,32 +94,27 @@ CONTENT=$(agent-browser eval "document.getElementById('terminal-mirror').textCon
 # Verify CONTENT contains expected strings
 ```
-6. **Reset between scenarios:**
+6. **Reset between scenarios (without ending the browser session):**
 ```bash
-agent-browser close
+# Re-open the next scenario URL directly in the same session.
+agent-browser open "http://localhost:3999?cmd=<next-command>&cwd=<next-path>"
 ```
 ### TUI Interaction Cheatsheet
 | Action | Command |
 |--------|---------|
-| Open TUI | `agent-browser open "http://localhost:3999?cmd=init&cwd=/path"` |
+| Open TUI | `agent-browser open "http://localhost:3999?cmd=<cmd>&cwd=<path>"` |
 | Read screen | `agent-browser eval "document.getElementById('terminal-mirror').textContent"` |
-| Take snapshot | `agent-browser snapshot -i` |
-| Click element | `agent-browser click @ref` |
-| Type text | `agent-browser type @ref "text"` |
-| Press Enter | `agent-browser key Enter` |
-| Arrow down | `agent-browser key ArrowDown` |
-| Escape | `agent-browser key Escape` |
-| Screenshot | `agent-browser screenshot e2e-failure.png` |
-| Wait for text | `agent-browser wait --text "expected" --timeout 10000` |
+| Type/key press | `agent-browser type @ref "text"` / `agent-browser key Enter` |
 | Close session | `agent-browser close` |
 ### Key Rules
 - Always wait for expected text before asserting (TUI renders async via React)
 - Use `agent-browser eval` with `terminal-mirror` for reliable text reading
 - Take screenshots on failures for debugging
-- Each scenario navigates to a fresh URL (clean state)
+- Keep one browser session across all scenarios; do not call `agent-browser close` until all scenarios are done
+- Each scenario should navigate to a fresh URL (clean state)
 - Wait 500ms after key presses before reading (Ink re-render delay)
 ### Step 4: Report Results
@@ -158,14 +159,10 @@ When all scenarios are executed:
    ```
 5. If all passed: signal ready for PR phase
 6. If any failed: failures documented, loop will retry after fix iteration
+7. Close browser once at the end: `agent-browser close`
 ## Learning Capture
-If E2E testing revealed issues worth remembering, append to @.ralph/LEARNINGS.md:
-- Flaky test patterns -> Add under "## Anti-Patterns" > "E2E Pitfalls"
-- TUI timing issues -> Add under "## Anti-Patterns"
-- Useful agent-browser techniques -> Add under "## Tool Usage"
-Format: `- [YYYY-MM-DD] [$FEATURE] Brief description`
+If useful E2E patterns found, append to @.ralph/LEARNINGS.md
 {{else}}
 ## Task
 Execute automated E2E tests for the completed feature using Playwright MCP tools.
@@ -282,56 +279,6 @@ Update @.ralph/specs/$FEATURE-implementation-plan.md for each scenario:
   - Fix needed: [suggested action]
 ```
-## Playwright MCP Tool Reference
-| Tool | Purpose | Key Parameters |
-|------|---------|----------------|
-| `browser_navigate` | Go to URL | `url` |
-| `browser_snapshot` | Get page state (use for assertions) | - |
-| `browser_click` | Click element | `element`, `ref` |
-| `browser_type` | Type into element | `element`, `ref`, `text`, `submit` |
-| `browser_fill_form` | Fill multiple fields | `fields[]` with name, type, ref, value |
-| `browser_select_option` | Select dropdown | `element`, `ref`, `values[]` |
-| `browser_wait_for` | Wait for text/time | `text`, `textGone`, `time` |
-| `browser_take_screenshot` | Capture visual state | `filename` (optional) |
-| `browser_console_messages` | Get JS console output | `level` (error/warning/info) |
-| `browser_press_key` | Press keyboard key | `key` (e.g., "Enter", "Tab") |
-| `browser_close` | Close browser/reset state | - |
-## Assertion Patterns
-### Text Verification
-```
-1. browser_snapshot -> get page content
-2. Check snapshot output for expected text strings
-3. If text not found, scenario fails
-```
-### Element State
-```
-1. browser_snapshot -> accessibility tree shows element states
-2. Check for: enabled/disabled, checked/unchecked, visible
-```
-### URL Verification
-```
-1. After navigation/action, snapshot shows current URL
-2. Verify URL contains expected path/params
-```
-{{#if hasSupabase}}### Database State
-```
-1. mcp__plugin_supabase_supabase__execute_sql with SELECT query
-2. Verify row count, column values match expectations
-```
-{{/if}}
-## Browser State Management
-- Use `browser_close` between unrelated scenarios to reset localStorage/cookies
-- Keep browser open for scenarios that test state persistence (e.g., duplicate submission)
-- Fresh browser state = clean localStorage, no prior submissions tracked
 ## Error Recovery
 If a scenario fails:
@@ -358,21 +305,6 @@ When all scenarios are executed:
 5. If all passed: signal ready for PR phase
 6. If any failed: failures documented, loop will retry after fix iteration
-## Troubleshooting
-### UI Changes Not Visible
-If code changes don't appear in the browser:
-1. Stop the dev server
-2. Clear cache: `rm -rf {{appDir}}/.next`
-3. Restart: `cd {{appDir}} && {{devCommand}}`
-4. Wait for full rebuild before testing
-### Stale Data
-- Clear browser storage: Use `browser_close` between scenarios
-{{#if hasSupabase}}- Check Supabase for stale test data from previous runs
-- Delete test data: `DELETE FROM table WHERE data->>'_test' = 'true'`
-{{/if}}
 ## Rules
 - Always get a fresh `browser_snapshot` after actions before making assertions
 - Use `browser_wait_for` when waiting for async operations (form submission, API calls)
@@ -382,10 +314,5 @@ If code changes don't appear in the browser:
 - Document failures clearly so fix iteration knows what to address
 ## Learning Capture
-If E2E testing revealed issues worth remembering, append to @.ralph/LEARNINGS.md:
-- Flaky test patterns -> Add under "## Anti-Patterns" > "E2E Pitfalls"
-- Useful Playwright techniques -> Add under "## Tool Usage"
-- Timing issues or race conditions -> Add under "## Anti-Patterns"
-Format: `- [YYYY-MM-DD] [$FEATURE] Brief description`
+If useful E2E patterns found, append to @.ralph/LEARNINGS.md
 {{/if}}

package/dist/templates/prompts/PROMPT_e2e_fix.md.tmpl ADDED Viewed

@@ -0,0 +1,55 @@
+## Context
+If @.ralph/guides/AGENTS.md exists, study it for commands and patterns.
+Study @.ralph/specs/$FEATURE-implementation-plan.md for E2E failure details.
+Study @.ralph/specs/$FEATURE.md for behavioral constraints and acceptance criteria.
+{{#if frameworkVariant}}For detailed architecture, see @{{appDir}}/.claude/CLAUDE.md{{/if}}
+## Learnings
+Read @.ralph/LEARNINGS.md — pay attention to the "E2E Pitfalls" section to avoid repeating known issues.
+## Task
+Fix all E2E test scenarios marked `- [ ] E2E: ... - FAILED` in @.ralph/specs/$FEATURE-implementation-plan.md.
+1. Read the implementation plan and identify every line matching `- [ ] E2E: ... - FAILED`.
+2. For each failure, read the `Error:` and `Fix needed:` fields to understand what went wrong.
+3. Cross-reference @.ralph/specs/$FEATURE.md to ensure fixes respect behavioral constraints.
+4. Apply the minimal code change needed to make the failing scenario pass.
+5. Do NOT touch passing scenarios or non-E2E implementation code unless required by the fix.
+{{#if isTui}}
+### TUI E2E Fix Notes
+- Fixes typically involve Ink component state, key handling, or terminal output formatting.
+- Verify fix with the agent-browser bridge: `curl -s http://localhost:3999/health` to confirm it is running.
+- After fixing, re-run the failed scenario manually via agent-browser to confirm it passes before updating the plan.
+- If bridge/agent-browser cannot start due sandbox socket restrictions (e.g. `listen EPERM`, daemon socket startup failure), treat scenarios as blocked infrastructure:
+  - Keep each affected scenario as `- [ ] E2E: ... - FAILED: bridge unavailable in sandbox`
+  - Record exact error output
+  - Do not loop on repeated identical retries in this run
+{{else}}
+### Web E2E Fix Notes
+- Fixes typically involve DOM structure, async timing, or data state issues.
+- Verify fix with the dev server running at http://localhost:3000.
+- After fixing, re-run the failed scenario via Playwright MCP to confirm it passes before updating the plan.
+{{/if}}
+## Validation
+After applying fixes, run ONLY the E2E validation command:
+```bash
+cd {{appDir}} && {{testCommand}}
+```
+Unit tests and build are already passing — do not re-run lint, typecheck, or build unless the fix touches non-E2E source files.
+## Completion
+When fixes are applied and validation passes:
+1. Update @.ralph/specs/$FEATURE-implementation-plan.md — for each fixed scenario, change `- [ ] E2E: ... - FAILED` to `- [x] E2E: ... - PASSED`.
+2. `git -C {{appDir}} add -A`
+3. `git -C {{appDir}} commit -m "fix($FEATURE): resolve failing E2E scenarios"`
+4. `git -C {{appDir}} push origin feat/$FEATURE`
+## Learning Capture
+If this fix iteration revealed E2E-specific patterns worth remembering, append to @.ralph/LEARNINGS.md:
+- Flaky test patterns or timing issues -> Add under "## Anti-Patterns (What to Avoid)" > "E2E Pitfalls"
+- Useful debugging techniques -> Add under "## Tool Usage"
+Format: `- [YYYY-MM-DD] [$FEATURE] Brief description`

package/dist/templates/prompts/PROMPT_feature.md.tmpl CHANGED Viewed

@@ -14,106 +14,20 @@ Study @.ralph/specs/$FEATURE.md for feature specification.
 3. Create @.ralph/specs/$FEATURE-implementation-plan.md with tasks
 ## Implementation Plan Format
-```markdown
-# $FEATURE Implementation Plan
-**Spec:** .ralph/specs/$FEATURE.md
-**Branch:** feat/$FEATURE
-**Status:** Planning | In Progress | PR Review | Completed
-## Tasks
-### Phase 1: Setup
-- [ ] Task 1 - [complexity: S/M/L]
-- [ ] Task 2
-### Phase 2: Core Implementation
-- [ ] Task 3
-- [ ] Task 4
-### Phase 3: Tests (Unit/Integration)
-- [ ] Write tests for [component]
-- [ ] Write tests for [feature]
-### Phase 4: Polish & Design
-- [ ] Run design checklist from @.ralph/guides/FRONTEND.md (if UI changes)
-- [ ] Verify responsive design (mobile/tablet/desktop)
-- [ ] Add loading/empty/error states
-- [ ] Verify hover/focus states on interactive elements
-{{#if styling}}- [ ] For charts: use ChartContainer pattern with tooltips + legends{{/if}}
-- [ ] Task N (additional polish)
-### Phase 5: E2E Testing
-{{#if isTui}}
-TUI E2E tests executed via xterm.js bridge + agent-browser.
-Fixture projects in `e2e/fixtures/`. Bridge at `http://localhost:3999`.
-- [ ] E2E: [Scenario name] - [brief description]
-  - **Command:** [wiggum command, e.g., init, new auth-flow]
-  - **CWD:** [working directory, e.g., e2e/fixtures/bare-project]
-  - **Steps:**
-    1. [Action] -> [expected terminal output]
-    2. [Action] -> [expected terminal output]
-  - **Verify:** [text that should appear in terminal]
-Example TUI E2E scenario:
-- [ ] E2E: Init in bare project - happy path
-  - **Command:** init
-  - **CWD:** e2e/fixtures/bare-project
-  - **Steps:**
-    1. Open bridge with init command -> Welcome screen renders
-    2. Arrow down to select option -> Option highlighted
-    3. Press Enter -> Next screen appears
-  - **Verify:** "initialized" text visible in terminal
-{{else}}
-Browser-based tests executed via Playwright MCP tools.
-- [ ] E2E: [Scenario name] - [brief description]
-  - **URL:** [starting URL, use {placeholders} for dynamic IDs]
-  - **Preconditions:** [setup requirements, e.g., "Survey must be published"]
-  - **Steps:**
-    1. [Action] -> [expected result]
-    2. [Action] -> [expected result]
-  - **Verify:** [final assertion text to check]
-  - **Database check:** [optional SQL query to verify data]
-Example E2E scenario:
-- [ ] E2E: Submit survey response - happy path
-  - **URL:** /survey/{surveyId}?utm_source=e2e_test
-  - **Preconditions:** Published survey with required questions
-  - **Steps:**
-    1. Navigate to survey URL -> Form displays with all questions
-    2. Fill required text field -> No validation error
-    3. Select rating 4 -> Button shows selected state
-    4. Click "Submit Survey" -> Loading state appears
-    5. Wait for "Thank You!" -> Success card displays
-  - **Verify:** "successfully submitted" text visible
-  - **Database check:** SELECT * FROM survey_responses WHERE survey_id = '{surveyId}'
-{{/if}}
-## Done
-- [x] Completed task - [commit hash]
-- [x] E2E: Scenario name - PASSED
-```
+Create `@.ralph/specs/$FEATURE-implementation-plan.md` with:
+- **Header:** feature name, spec path (`$FEATURE.md`), branch (`feat/$FEATURE`), status
+- **Phases:** Setup | Core Implementation | Tests (Unit/Integration) | Polish & Design | E2E Testing
+- **Tasks:** `- [ ] Task description [S/M/L]` — every task MUST use `- [ ]` checkbox syntax
+- **E2E tasks:** `- [ ] E2E: Scenario name` with required fields:
+{{#if isTui}}  - Command, CWD, Steps (action → expected terminal output), Verify text{{else}}  - URL, Preconditions, Steps (action → expected result), Verify text, optional Database check{{/if}}
+- **Done section:** `- [x] Completed task - [commit hash]`
 ## CRITICAL CONSTRAINT — PLANNING ONLY
-**You are in the PLANNING phase. Your ONLY job is to produce an implementation plan.**
-- Do NOT write any source code, test code, or configuration files
-- Do NOT create, modify, or touch any files outside `.ralph/specs/`
-- Do NOT run build, test, or lint commands
-- Do NOT make git commits
-- If you feel the urge to "just implement a small piece", STOP — that is a phase violation
-- The implementation phase runs AFTER this session ends, in a separate session
-- Violation of this constraint wastes tokens and breaks the harness automation
+Planning only. Do NOT write code, tests, configs, or run builds/commits.
+The implementation phase runs in a separate session after planning ends.
+Violation wastes tokens and breaks harness automation.
 ## Rules
-- You MUST use `- [ ]` checkbox syntax for every task in the plan
-- Do NOT use heading-based task formats (e.g., `#### Task 1:`) for individual tasks
-- The harness parses `- [ ]` lines to track progress — other formats will break automation
-- Use `### Phase N:` headings only for phase grouping, not for individual tasks
-- One task = one commit-sized unit of work (but tasks can be grouped into phases for batch implementation)
+- MUST use `- [ ]` checkbox syntax for every task — harness parses this for progress tracking
+- Use `### Phase N:` headings for phases; one task = one commit-sized unit of work
 - Every implementation task needs a corresponding test task
-- Use Supabase MCP to check existing schema
-- Use PostHog MCP to check existing analytics setup
-- For UI-heavy features (new pages, dashboards, analytics, marketing pages), reference @.ralph/guides/FRONTEND.md
-- Consider `/frontend-design` skill for features needing distinctive aesthetics