wiggum-cli 0.17.2 → 0.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +58 -14
  2. package/dist/agent/orchestrator.d.ts +21 -3
  3. package/dist/agent/orchestrator.js +394 -187
  4. package/dist/agent/resolve-config.js +1 -1
  5. package/dist/agent/scheduler.d.ts +29 -0
  6. package/dist/agent/scheduler.js +1149 -0
  7. package/dist/agent/tools/backlog.d.ts +6 -0
  8. package/dist/agent/tools/backlog.js +23 -4
  9. package/dist/agent/tools/execution.js +1 -1
  10. package/dist/agent/tools/introspection.js +26 -4
  11. package/dist/agent/types.d.ts +113 -0
  12. package/dist/ai/conversation/url-fetcher.js +46 -13
  13. package/dist/ai/enhancer.js +1 -2
  14. package/dist/ai/providers.js +4 -4
  15. package/dist/commands/agent.d.ts +1 -0
  16. package/dist/commands/agent.js +53 -1
  17. package/dist/commands/config.js +100 -6
  18. package/dist/commands/run.d.ts +2 -0
  19. package/dist/commands/run.js +47 -2
  20. package/dist/commands/sync.js +2 -2
  21. package/dist/generator/config.js +13 -2
  22. package/dist/index.js +11 -3
  23. package/dist/repl/command-parser.d.ts +1 -1
  24. package/dist/repl/command-parser.js +1 -1
  25. package/dist/templates/config/ralph.config.cjs.tmpl +9 -2
  26. package/dist/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
  27. package/dist/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
  28. package/dist/templates/prompts/PROMPT_feature.md.tmpl +12 -98
  29. package/dist/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
  30. package/dist/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
  31. package/dist/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
  32. package/dist/templates/prompts/PROMPT_verify.md.tmpl +7 -0
  33. package/dist/templates/root/README.md.tmpl +2 -3
  34. package/dist/templates/scripts/feature-loop.sh.tmpl +835 -93
  35. package/dist/templates/scripts/loop.sh.tmpl +5 -1
  36. package/dist/templates/scripts/ralph-monitor.sh.tmpl +0 -2
  37. package/dist/tui/app.d.ts +5 -1
  38. package/dist/tui/app.js +22 -3
  39. package/dist/tui/components/HeaderContent.d.ts +4 -1
  40. package/dist/tui/components/HeaderContent.js +4 -2
  41. package/dist/tui/hooks/useAgentOrchestrator.d.ts +2 -1
  42. package/dist/tui/hooks/useAgentOrchestrator.js +86 -33
  43. package/dist/tui/hooks/useInit.d.ts +5 -1
  44. package/dist/tui/hooks/useInit.js +20 -2
  45. package/dist/tui/screens/AgentScreen.js +3 -1
  46. package/dist/tui/screens/InitScreen.js +12 -1
  47. package/dist/tui/screens/MainShell.js +70 -6
  48. package/dist/tui/screens/RunScreen.d.ts +6 -2
  49. package/dist/tui/screens/RunScreen.js +48 -6
  50. package/dist/tui/utils/loop-status.d.ts +15 -0
  51. package/dist/tui/utils/loop-status.js +89 -27
  52. package/dist/tui/utils/polishGoal.js +14 -1
  53. package/dist/utils/config.d.ts +7 -0
  54. package/dist/utils/config.js +14 -0
  55. package/dist/utils/env.js +7 -1
  56. package/dist/utils/github.d.ts +13 -0
  57. package/dist/utils/github.js +63 -4
  58. package/dist/utils/logger.js +1 -1
  59. package/package.json +9 -7
  60. package/src/templates/config/ralph.config.cjs.tmpl +9 -2
  61. package/src/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
  62. package/src/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
  63. package/src/templates/prompts/PROMPT_feature.md.tmpl +12 -98
  64. package/src/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
  65. package/src/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
  66. package/src/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
  67. package/src/templates/prompts/PROMPT_verify.md.tmpl +7 -0
  68. package/src/templates/root/README.md.tmpl +2 -3
  69. package/src/templates/scripts/feature-loop.sh.tmpl +835 -93
  70. package/src/templates/scripts/loop.sh.tmpl +5 -1
  71. package/src/templates/scripts/ralph-monitor.sh.tmpl +0 -2
@@ -3,11 +3,34 @@
3
3
  * Executes the feature development loop for a specific feature
4
4
  */
5
5
  import { spawn } from 'node:child_process';
6
- import { existsSync } from 'node:fs';
6
+ import { existsSync, readFileSync } from 'node:fs';
7
7
  import { join, dirname } from 'node:path';
8
8
  import { logger } from '../utils/logger.js';
9
9
  import { loadConfigWithDefaults, hasConfig, } from '../utils/config.js';
10
10
  import pc from 'picocolors';
11
+ const SUPPORTED_LOOP_CLIS = ['claude', 'codex'];
12
+ const DEFAULT_CODEX_LOOP_MODEL = 'gpt-5.3-codex';
13
+ function isSupportedLoopCli(value) {
14
+ return SUPPORTED_LOOP_CLIS.includes(value);
15
+ }
16
+ function scriptSupportsCliFlags(scriptPath) {
17
+ try {
18
+ const script = readFileSync(scriptPath, 'utf-8');
19
+ return script.includes('--cli') && script.includes('--review-cli');
20
+ }
21
+ catch {
22
+ return false;
23
+ }
24
+ }
25
+ function getModelDisplayLabel(modelOverride, codingCli, reviewCli, config) {
26
+ if (modelOverride)
27
+ return modelOverride;
28
+ if (codingCli === 'codex' && reviewCli === 'codex')
29
+ return DEFAULT_CODEX_LOOP_MODEL;
30
+ if (codingCli === 'claude' && reviewCli === 'claude')
31
+ return config.loop.defaultModel;
32
+ return `${config.loop.defaultModel} (claude) / ${DEFAULT_CODEX_LOOP_MODEL} (codex)`;
33
+ }
11
34
  /**
12
35
  * Find the feature-loop.sh script
13
36
  * Checks: 1) .ralph/scripts/ 2) ralph/ (parent ralph repo)
@@ -120,6 +143,26 @@ export async function runCommand(feature, options = {}) {
120
143
  if (options.model) {
121
144
  args.push('--model', options.model);
122
145
  }
146
+ // Resolve and validate coding CLI
147
+ const codingCli = options.cli ?? config.loop.codingCli ?? 'claude';
148
+ if (!isSupportedLoopCli(codingCli)) {
149
+ logger.error(`Invalid CLI '${codingCli}'. Allowed values are 'claude' or 'codex'.`);
150
+ process.exit(1);
151
+ }
152
+ // Resolve and validate review CLI
153
+ const reviewCli = options.reviewCli ?? config.loop.reviewCli ?? codingCli;
154
+ if (!isSupportedLoopCli(reviewCli)) {
155
+ logger.error(`Invalid review CLI '${reviewCli}'. Allowed values are 'claude' or 'codex'.`);
156
+ process.exit(1);
157
+ }
158
+ // Guard against stale generated scripts that don't support CLI flags.
159
+ if ((codingCli !== 'claude' || reviewCli !== 'claude') && !scriptSupportsCliFlags(scriptPath)) {
160
+ logger.error('The current feature-loop.sh does not support --cli/--review-cli flags.');
161
+ logger.info('Regenerate scripts with "wiggum init" (or re-run /init), then retry.');
162
+ process.exit(1);
163
+ }
164
+ args.push('--cli', codingCli);
165
+ args.push('--review-cli', reviewCli);
123
166
  // Resolve and validate reviewMode
124
167
  const reviewMode = options.reviewMode ?? config.loop.reviewMode ?? 'manual';
125
168
  if (reviewMode !== 'manual' && reviewMode !== 'auto' && reviewMode !== 'merge') {
@@ -133,7 +176,9 @@ export async function runCommand(feature, options = {}) {
133
176
  console.log(` Spec: ${specFile ?? '(on feature branch)'}`);
134
177
  console.log(` Max Iterations: ${maxIterations}`);
135
178
  console.log(` Max E2E Attempts: ${maxE2eAttempts}`);
136
- console.log(` Model: ${options.model || config.loop.defaultModel}`);
179
+ console.log(` Model: ${getModelDisplayLabel(options.model, codingCli, reviewCli, config)}`);
180
+ console.log(` Implementation CLI: ${codingCli}`);
181
+ console.log(` Review CLI: ${reviewCli}`);
137
182
  console.log(` Review Mode: ${reviewMode}`);
138
183
  console.log(` Worktree: ${options.worktree ? 'enabled' : 'disabled'}`);
139
184
  console.log(` Resume: ${options.resume ? 'enabled' : 'disabled'}`);
@@ -18,7 +18,7 @@ export async function syncProjectContext(projectRoot) {
18
18
  // Detect provider
19
19
  const provider = getAvailableProvider();
20
20
  if (!provider) {
21
- throw new Error('No AI provider available. Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or OPENROUTER_API_KEY.');
21
+ throw new Error('No AI provider available. Configure a supported provider API key and try again.');
22
22
  }
23
23
  // Resolve model
24
24
  const recommendedModel = AVAILABLE_MODELS[provider].find((m) => m.hint?.includes('recommended'));
@@ -59,7 +59,7 @@ export async function syncCommand() {
59
59
  contextPath = await syncProjectContext(process.cwd());
60
60
  }
61
61
  catch (err) {
62
- console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
62
+ logger.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
63
63
  process.exit(1);
64
64
  return; // unreachable, but satisfies TS control flow
65
65
  }
@@ -3,13 +3,17 @@
3
3
  * Generates ralph.config.cjs file from scan results
4
4
  */
5
5
  import { extractVariables } from './templates.js';
6
+ const DEFAULT_CODEX_MODEL = 'gpt-5.3-codex';
6
7
  /**
7
8
  * Generate ralph config object from scan result
8
9
  */
9
10
  export function generateConfig(scanResult, customVars = {}) {
10
11
  const vars = extractVariables(scanResult, customVars);
11
- const defaultModel = customVars.defaultModel || 'sonnet';
12
- const planningModel = customVars.planningModel || 'opus';
12
+ const codingCli = customVars.codingCli === 'codex' ? 'codex' : 'claude';
13
+ const reviewCli = customVars.reviewCli === 'codex' ? 'codex' : codingCli;
14
+ const codexEverywhere = codingCli === 'codex' && reviewCli === 'codex';
15
+ const defaultModel = customVars.defaultModel || (codexEverywhere ? 'gpt-5.3-codex' : 'sonnet');
16
+ const planningModel = customVars.planningModel || (codexEverywhere ? 'gpt-5.3-codex' : 'opus');
13
17
  const agentProvider = customVars.agentProvider || 'anthropic';
14
18
  const agentModel = customVars.agentModel || 'claude-sonnet-4-6';
15
19
  return {
@@ -48,7 +52,14 @@ export function generateConfig(scanResult, customVars = {}) {
48
52
  maxE2eAttempts: 5,
49
53
  defaultModel,
50
54
  planningModel,
55
+ codexModel: DEFAULT_CODEX_MODEL,
56
+ codingCli,
57
+ reviewCli,
51
58
  reviewMode: 'manual',
59
+ claudePermissionMode: 'default',
60
+ codexSandbox: 'workspace-write',
61
+ codexApprovalPolicy: 'never',
62
+ disableMcpInAutomatedRuns: true,
52
63
  },
53
64
  agent: {
54
65
  defaultProvider: agentProvider,
package/dist/index.js CHANGED
@@ -38,6 +38,8 @@ export function parseCliArgs(argv) {
38
38
  // Flags that consume the next argument as their value
39
39
  const valueFlagSet = new Set([
40
40
  '--model',
41
+ '--cli',
42
+ '--review-cli',
41
43
  '--max-iterations',
42
44
  '--max-e2e-attempts',
43
45
  '--interval',
@@ -233,6 +235,8 @@ Options for run:
233
235
  --worktree Use git worktree isolation
234
236
  --resume Resume from last checkpoint
235
237
  --model <model> AI model to use
238
+ --cli <cli> Implementation CLI: claude, codex
239
+ --review-cli <cli> Review CLI: claude, codex
236
240
  --max-iterations <n> Maximum loop iterations
237
241
  --max-e2e-attempts <n> Maximum E2E test attempts
238
242
  --review-mode <mode> Review mode: manual, auto, merge
@@ -266,6 +270,7 @@ Options for agent:
266
270
  --review-mode <mode> Review mode: 'manual', 'auto', or 'merge' (default: manual)
267
271
  --dry-run Plan what would be done without executing
268
272
  --stream Stream output in real-time (default: wait for completion)
273
+ --diagnose-gh Run GitHub connectivity checks for agent backlog access
269
274
 
270
275
  In the TUI:
271
276
  /init Initialize or reconfigure project
@@ -338,13 +343,15 @@ Press Esc to cancel any operation.
338
343
  const feature = parsed.positionalArgs[0];
339
344
  if (!feature) {
340
345
  console.error('Error: <feature> is required for "run"');
341
- console.error('Usage: wiggum run <feature> [--worktree] [--resume] [--model <model>] [--max-iterations <n>] [--max-e2e-attempts <n>]');
346
+ console.error('Usage: wiggum run <feature> [--worktree] [--resume] [--model <model>] [--cli <claude|codex>] [--review-cli <claude|codex>] [--max-iterations <n>] [--max-e2e-attempts <n>]');
342
347
  process.exit(1);
343
348
  }
344
349
  const runOptions = {
345
350
  worktree: parsed.flags.worktree === true,
346
351
  resume: parsed.flags.resume === true,
347
352
  model: typeof parsed.flags.model === 'string' ? parsed.flags.model : undefined,
353
+ cli: typeof parsed.flags.cli === 'string' ? parsed.flags.cli : undefined,
354
+ reviewCli: typeof parsed.flags.reviewCli === 'string' ? parsed.flags.reviewCli : undefined,
348
355
  maxIterations: typeof parsed.flags.maxIterations === 'string' ? parseIntFlag(parsed.flags.maxIterations, '--max-iterations') : undefined,
349
356
  maxE2eAttempts: typeof parsed.flags.maxE2eAttempts === 'string' ? parseIntFlag(parsed.flags.maxE2eAttempts, '--max-e2e-attempts') : undefined,
350
357
  reviewMode: typeof parsed.flags.reviewMode === 'string' ? parsed.flags.reviewMode : undefined,
@@ -425,9 +432,10 @@ Press Esc to cancel any operation.
425
432
  reviewMode: reviewModeFlag,
426
433
  dryRun: parsed.flags.dryRun === true,
427
434
  stream: parsed.flags.stream === true,
435
+ diagnoseGh: parsed.flags.diagnoseGh === true,
428
436
  };
429
- if (agentOpts.stream === true) {
430
- // Explicit --stream: always headless
437
+ if (agentOpts.stream === true || agentOpts.diagnoseGh === true) {
438
+ // Explicit headless modes
431
439
  const { agentCommand } = await import('./commands/agent.js');
432
440
  await agentCommand(agentOpts);
433
441
  }
@@ -61,7 +61,7 @@ export declare const REPL_COMMANDS: {
61
61
  };
62
62
  readonly agent: {
63
63
  readonly description: "Start the autonomous backlog agent";
64
- readonly usage: "/agent [--dry-run] [--max-items <n>]";
64
+ readonly usage: "/agent [--dry-run] [--max-items <n>] [--max-steps <n>] [--review-mode manual|auto|merge] [--labels <l1,l2>] [--issues <n1,n2,...>]";
65
65
  readonly aliases: readonly ["a"];
66
66
  };
67
67
  readonly config: {
@@ -38,7 +38,7 @@ export const REPL_COMMANDS = {
38
38
  },
39
39
  agent: {
40
40
  description: 'Start the autonomous backlog agent',
41
- usage: '/agent [--dry-run] [--max-items <n>]',
41
+ usage: '/agent [--dry-run] [--max-items <n>] [--max-steps <n>] [--review-mode manual|auto|merge] [--labels <l1,l2>] [--issues <n1,n2,...>]',
42
42
  aliases: ['a'],
43
43
  },
44
44
  config: {
@@ -32,8 +32,15 @@ module.exports = {
32
32
  loop: {
33
33
  maxIterations: 10,
34
34
  maxE2eAttempts: 5,
35
- defaultModel: 'sonnet',
36
- planningModel: 'opus',
35
+ defaultModel: 'sonnet', // Claude implementation/e2e default
36
+ planningModel: 'opus', // Claude planning/review default
37
+ codexModel: 'gpt-5.3-codex', // Codex model default for codex phases
38
+ codingCli: 'claude', // Implementation CLI: 'claude' | 'codex'
39
+ reviewCli: 'claude', // Review CLI: 'claude' | 'codex'
37
40
  reviewMode: 'manual', // 'manual' = stop at PR, 'auto' = review (no merge), 'merge' = review + auto-merge
41
+ claudePermissionMode: 'default', // Claude permission mode: default|auto|dontAsk|acceptEdits|plan|bypassPermissions
42
+ codexSandbox: 'workspace-write', // Codex sandbox: read-only|workspace-write|danger-full-access
43
+ codexApprovalPolicy: 'never', // Codex approvals: untrusted|on-failure|on-request|never
44
+ disableMcpInAutomatedRuns: true, // Disable MCP servers when RALPH_AUTOMATED=1
38
45
  },
39
46
  };
@@ -28,8 +28,14 @@ Check the bridge is running:
28
28
  ```bash
29
29
  curl -s http://localhost:3999/health || (cd {{projectRoot}} && npm run e2e:bridge &)
30
30
  sleep 3
31
+ curl -s http://localhost:3999/health
31
32
  ```
32
33
 
34
+ If bridge health still fails (for example `listen EPERM` / socket permission errors), do not keep retrying commands in this run:
35
+ - Mark each unchecked `- [ ] E2E:` scenario as `- [ ] E2E: ... - FAILED: bridge unavailable in sandbox`
36
+ - Add the concrete error text under each failed scenario
37
+ - Continue to Step 4 and record a clear blocked summary
38
+
33
39
  ### Step 2: Parse E2E Test Scenarios
34
40
  Read E2E test scenarios from @.ralph/specs/$FEATURE-implementation-plan.md.
35
41
  Each scenario is marked with `- [ ] E2E:` prefix and follows this format:
@@ -88,32 +94,27 @@ CONTENT=$(agent-browser eval "document.getElementById('terminal-mirror').textCon
88
94
  # Verify CONTENT contains expected strings
89
95
  ```
90
96
 
91
- 6. **Reset between scenarios:**
97
+ 6. **Reset between scenarios (without ending the browser session):**
92
98
  ```bash
93
- agent-browser close
99
+ # Re-open the next scenario URL directly in the same session.
100
+ agent-browser open "http://localhost:3999?cmd=<next-command>&cwd=<next-path>"
94
101
  ```
95
102
 
96
103
  ### TUI Interaction Cheatsheet
97
104
 
98
105
  | Action | Command |
99
106
  |--------|---------|
100
- | Open TUI | `agent-browser open "http://localhost:3999?cmd=init&cwd=/path"` |
107
+ | Open TUI | `agent-browser open "http://localhost:3999?cmd=<cmd>&cwd=<path>"` |
101
108
  | Read screen | `agent-browser eval "document.getElementById('terminal-mirror').textContent"` |
102
- | Take snapshot | `agent-browser snapshot -i` |
103
- | Click element | `agent-browser click @ref` |
104
- | Type text | `agent-browser type @ref "text"` |
105
- | Press Enter | `agent-browser key Enter` |
106
- | Arrow down | `agent-browser key ArrowDown` |
107
- | Escape | `agent-browser key Escape` |
108
- | Screenshot | `agent-browser screenshot e2e-failure.png` |
109
- | Wait for text | `agent-browser wait --text "expected" --timeout 10000` |
109
+ | Type/key press | `agent-browser type @ref "text"` / `agent-browser key Enter` |
110
110
  | Close session | `agent-browser close` |
111
111
 
112
112
  ### Key Rules
113
113
  - Always wait for expected text before asserting (TUI renders async via React)
114
114
  - Use `agent-browser eval` with `terminal-mirror` for reliable text reading
115
115
  - Take screenshots on failures for debugging
116
- - Each scenario navigates to a fresh URL (clean state)
116
+ - Keep one browser session across all scenarios; do not call `agent-browser close` until all scenarios are done
117
+ - Each scenario should navigate to a fresh URL (clean state)
117
118
  - Wait 500ms after key presses before reading (Ink re-render delay)
118
119
 
119
120
  ### Step 4: Report Results
@@ -158,14 +159,10 @@ When all scenarios are executed:
158
159
  ```
159
160
  5. If all passed: signal ready for PR phase
160
161
  6. If any failed: failures documented, loop will retry after fix iteration
162
+ 7. Close browser once at the end: `agent-browser close`
161
163
 
162
164
  ## Learning Capture
163
- If E2E testing revealed issues worth remembering, append to @.ralph/LEARNINGS.md:
164
- - Flaky test patterns -> Add under "## Anti-Patterns" > "E2E Pitfalls"
165
- - TUI timing issues -> Add under "## Anti-Patterns"
166
- - Useful agent-browser techniques -> Add under "## Tool Usage"
167
-
168
- Format: `- [YYYY-MM-DD] [$FEATURE] Brief description`
165
+ If useful E2E patterns found, append to @.ralph/LEARNINGS.md
169
166
  {{else}}
170
167
  ## Task
171
168
  Execute automated E2E tests for the completed feature using Playwright MCP tools.
@@ -282,56 +279,6 @@ Update @.ralph/specs/$FEATURE-implementation-plan.md for each scenario:
282
279
  - Fix needed: [suggested action]
283
280
  ```
284
281
 
285
- ## Playwright MCP Tool Reference
286
-
287
- | Tool | Purpose | Key Parameters |
288
- |------|---------|----------------|
289
- | `browser_navigate` | Go to URL | `url` |
290
- | `browser_snapshot` | Get page state (use for assertions) | - |
291
- | `browser_click` | Click element | `element`, `ref` |
292
- | `browser_type` | Type into element | `element`, `ref`, `text`, `submit` |
293
- | `browser_fill_form` | Fill multiple fields | `fields[]` with name, type, ref, value |
294
- | `browser_select_option` | Select dropdown | `element`, `ref`, `values[]` |
295
- | `browser_wait_for` | Wait for text/time | `text`, `textGone`, `time` |
296
- | `browser_take_screenshot` | Capture visual state | `filename` (optional) |
297
- | `browser_console_messages` | Get JS console output | `level` (error/warning/info) |
298
- | `browser_press_key` | Press keyboard key | `key` (e.g., "Enter", "Tab") |
299
- | `browser_close` | Close browser/reset state | - |
300
-
301
- ## Assertion Patterns
302
-
303
- ### Text Verification
304
- ```
305
- 1. browser_snapshot -> get page content
306
- 2. Check snapshot output for expected text strings
307
- 3. If text not found, scenario fails
308
- ```
309
-
310
- ### Element State
311
- ```
312
- 1. browser_snapshot -> accessibility tree shows element states
313
- 2. Check for: enabled/disabled, checked/unchecked, visible
314
- ```
315
-
316
- ### URL Verification
317
- ```
318
- 1. After navigation/action, snapshot shows current URL
319
- 2. Verify URL contains expected path/params
320
- ```
321
-
322
- {{#if hasSupabase}}### Database State
323
- ```
324
- 1. mcp__plugin_supabase_supabase__execute_sql with SELECT query
325
- 2. Verify row count, column values match expectations
326
- ```
327
- {{/if}}
328
-
329
- ## Browser State Management
330
-
331
- - Use `browser_close` between unrelated scenarios to reset localStorage/cookies
332
- - Keep browser open for scenarios that test state persistence (e.g., duplicate submission)
333
- - Fresh browser state = clean localStorage, no prior submissions tracked
334
-
335
282
  ## Error Recovery
336
283
 
337
284
  If a scenario fails:
@@ -358,21 +305,6 @@ When all scenarios are executed:
358
305
  5. If all passed: signal ready for PR phase
359
306
  6. If any failed: failures documented, loop will retry after fix iteration
360
307
 
361
- ## Troubleshooting
362
-
363
- ### UI Changes Not Visible
364
- If code changes don't appear in the browser:
365
- 1. Stop the dev server
366
- 2. Clear cache: `rm -rf {{appDir}}/.next`
367
- 3. Restart: `cd {{appDir}} && {{devCommand}}`
368
- 4. Wait for full rebuild before testing
369
-
370
- ### Stale Data
371
- - Clear browser storage: Use `browser_close` between scenarios
372
- {{#if hasSupabase}}- Check Supabase for stale test data from previous runs
373
- - Delete test data: `DELETE FROM table WHERE data->>'_test' = 'true'`
374
- {{/if}}
375
-
376
308
  ## Rules
377
309
  - Always get a fresh `browser_snapshot` after actions before making assertions
378
310
  - Use `browser_wait_for` when waiting for async operations (form submission, API calls)
@@ -382,10 +314,5 @@ If code changes don't appear in the browser:
382
314
  - Document failures clearly so fix iteration knows what to address
383
315
 
384
316
  ## Learning Capture
385
- If E2E testing revealed issues worth remembering, append to @.ralph/LEARNINGS.md:
386
- - Flaky test patterns -> Add under "## Anti-Patterns" > "E2E Pitfalls"
387
- - Useful Playwright techniques -> Add under "## Tool Usage"
388
- - Timing issues or race conditions -> Add under "## Anti-Patterns"
389
-
390
- Format: `- [YYYY-MM-DD] [$FEATURE] Brief description`
317
+ If useful E2E patterns found, append to @.ralph/LEARNINGS.md
391
318
  {{/if}}
@@ -0,0 +1,55 @@
1
+ ## Context
2
+ If @.ralph/guides/AGENTS.md exists, study it for commands and patterns.
3
+ Study @.ralph/specs/$FEATURE-implementation-plan.md for E2E failure details.
4
+ Study @.ralph/specs/$FEATURE.md for behavioral constraints and acceptance criteria.
5
+ {{#if frameworkVariant}}For detailed architecture, see @{{appDir}}/.claude/CLAUDE.md{{/if}}
6
+
7
+ ## Learnings
8
+ Read @.ralph/LEARNINGS.md — pay attention to the "E2E Pitfalls" section to avoid repeating known issues.
9
+
10
+ ## Task
11
+ Fix all E2E test scenarios marked `- [ ] E2E: ... - FAILED` in @.ralph/specs/$FEATURE-implementation-plan.md.
12
+
13
+ 1. Read the implementation plan and identify every line matching `- [ ] E2E: ... - FAILED`.
14
+ 2. For each failure, read the `Error:` and `Fix needed:` fields to understand what went wrong.
15
+ 3. Cross-reference @.ralph/specs/$FEATURE.md to ensure fixes respect behavioral constraints.
16
+ 4. Apply the minimal code change needed to make the failing scenario pass.
17
+ 5. Do NOT touch passing scenarios or non-E2E implementation code unless required by the fix.
18
+
19
+ {{#if isTui}}
20
+ ### TUI E2E Fix Notes
21
+ - Fixes typically involve Ink component state, key handling, or terminal output formatting.
22
+ - Verify fix with the agent-browser bridge: `curl -s http://localhost:3999/health` to confirm it is running.
23
+ - After fixing, re-run the failed scenario manually via agent-browser to confirm it passes before updating the plan.
24
+ - If bridge/agent-browser cannot start due sandbox socket restrictions (e.g. `listen EPERM`, daemon socket startup failure), treat scenarios as blocked infrastructure:
25
+ - Keep each affected scenario as `- [ ] E2E: ... - FAILED: bridge unavailable in sandbox`
26
+ - Record exact error output
27
+ - Do not loop on repeated identical retries in this run
28
+ {{else}}
29
+ ### Web E2E Fix Notes
30
+ - Fixes typically involve DOM structure, async timing, or data state issues.
31
+ - Verify fix with the dev server running at http://localhost:3000.
32
+ - After fixing, re-run the failed scenario via Playwright MCP to confirm it passes before updating the plan.
33
+ {{/if}}
34
+
35
+ ## Validation
36
+ After applying fixes, run ONLY the E2E validation command:
37
+ ```bash
38
+ cd {{appDir}} && {{testCommand}}
39
+ ```
40
+
41
+ Unit tests and build are already passing — do not re-run lint, typecheck, or build unless the fix touches non-E2E source files.
42
+
43
+ ## Completion
44
+ When fixes are applied and validation passes:
45
+ 1. Update @.ralph/specs/$FEATURE-implementation-plan.md — for each fixed scenario, change `- [ ] E2E: ... - FAILED` to `- [x] E2E: ... - PASSED`.
46
+ 2. `git -C {{appDir}} add -A`
47
+ 3. `git -C {{appDir}} commit -m "fix($FEATURE): resolve failing E2E scenarios"`
48
+ 4. `git -C {{appDir}} push origin feat/$FEATURE`
49
+
50
+ ## Learning Capture
51
+ If this fix iteration revealed E2E-specific patterns worth remembering, append to @.ralph/LEARNINGS.md:
52
+ - Flaky test patterns or timing issues -> Add under "## Anti-Patterns (What to Avoid)" > "E2E Pitfalls"
53
+ - Useful debugging techniques -> Add under "## Tool Usage"
54
+
55
+ Format: `- [YYYY-MM-DD] [$FEATURE] Brief description`
@@ -14,106 +14,20 @@ Study @.ralph/specs/$FEATURE.md for feature specification.
14
14
  3. Create @.ralph/specs/$FEATURE-implementation-plan.md with tasks
15
15
 
16
16
  ## Implementation Plan Format
17
- ```markdown
18
- # $FEATURE Implementation Plan
19
-
20
- **Spec:** .ralph/specs/$FEATURE.md
21
- **Branch:** feat/$FEATURE
22
- **Status:** Planning | In Progress | PR Review | Completed
23
-
24
- ## Tasks
25
-
26
- ### Phase 1: Setup
27
- - [ ] Task 1 - [complexity: S/M/L]
28
- - [ ] Task 2
29
-
30
- ### Phase 2: Core Implementation
31
- - [ ] Task 3
32
- - [ ] Task 4
33
-
34
- ### Phase 3: Tests (Unit/Integration)
35
- - [ ] Write tests for [component]
36
- - [ ] Write tests for [feature]
37
-
38
- ### Phase 4: Polish & Design
39
- - [ ] Run design checklist from @.ralph/guides/FRONTEND.md (if UI changes)
40
- - [ ] Verify responsive design (mobile/tablet/desktop)
41
- - [ ] Add loading/empty/error states
42
- - [ ] Verify hover/focus states on interactive elements
43
- {{#if styling}}- [ ] For charts: use ChartContainer pattern with tooltips + legends{{/if}}
44
- - [ ] Task N (additional polish)
45
-
46
- ### Phase 5: E2E Testing
47
- {{#if isTui}}
48
- TUI E2E tests executed via xterm.js bridge + agent-browser.
49
- Fixture projects in `e2e/fixtures/`. Bridge at `http://localhost:3999`.
50
-
51
- - [ ] E2E: [Scenario name] - [brief description]
52
- - **Command:** [wiggum command, e.g., init, new auth-flow]
53
- - **CWD:** [working directory, e.g., e2e/fixtures/bare-project]
54
- - **Steps:**
55
- 1. [Action] -> [expected terminal output]
56
- 2. [Action] -> [expected terminal output]
57
- - **Verify:** [text that should appear in terminal]
58
-
59
- Example TUI E2E scenario:
60
- - [ ] E2E: Init in bare project - happy path
61
- - **Command:** init
62
- - **CWD:** e2e/fixtures/bare-project
63
- - **Steps:**
64
- 1. Open bridge with init command -> Welcome screen renders
65
- 2. Arrow down to select option -> Option highlighted
66
- 3. Press Enter -> Next screen appears
67
- - **Verify:** "initialized" text visible in terminal
68
- {{else}}
69
- Browser-based tests executed via Playwright MCP tools.
70
-
71
- - [ ] E2E: [Scenario name] - [brief description]
72
- - **URL:** [starting URL, use {placeholders} for dynamic IDs]
73
- - **Preconditions:** [setup requirements, e.g., "Survey must be published"]
74
- - **Steps:**
75
- 1. [Action] -> [expected result]
76
- 2. [Action] -> [expected result]
77
- - **Verify:** [final assertion text to check]
78
- - **Database check:** [optional SQL query to verify data]
79
-
80
- Example E2E scenario:
81
- - [ ] E2E: Submit survey response - happy path
82
- - **URL:** /survey/{surveyId}?utm_source=e2e_test
83
- - **Preconditions:** Published survey with required questions
84
- - **Steps:**
85
- 1. Navigate to survey URL -> Form displays with all questions
86
- 2. Fill required text field -> No validation error
87
- 3. Select rating 4 -> Button shows selected state
88
- 4. Click "Submit Survey" -> Loading state appears
89
- 5. Wait for "Thank You!" -> Success card displays
90
- - **Verify:** "successfully submitted" text visible
91
- - **Database check:** SELECT * FROM survey_responses WHERE survey_id = '{surveyId}'
92
- {{/if}}
93
-
94
- ## Done
95
- - [x] Completed task - [commit hash]
96
- - [x] E2E: Scenario name - PASSED
97
- ```
17
+ Create `@.ralph/specs/$FEATURE-implementation-plan.md` with:
18
+ - **Header:** feature name, spec path (`$FEATURE.md`), branch (`feat/$FEATURE`), status
19
+ - **Phases:** Setup | Core Implementation | Tests (Unit/Integration) | Polish & Design | E2E Testing
20
+ - **Tasks:** `- [ ] Task description [S/M/L]` — every task MUST use `- [ ]` checkbox syntax
21
+ - **E2E tasks:** `- [ ] E2E: Scenario name` with required fields:
22
+ {{#if isTui}} - Command, CWD, Steps (action expected terminal output), Verify text{{else}} - URL, Preconditions, Steps (action → expected result), Verify text, optional Database check{{/if}}
23
+ - **Done section:** `- [x] Completed task - [commit hash]`
98
24
 
99
25
  ## CRITICAL CONSTRAINT — PLANNING ONLY
100
- **You are in the PLANNING phase. Your ONLY job is to produce an implementation plan.**
101
- - Do NOT write any source code, test code, or configuration files
102
- - Do NOT create, modify, or touch any files outside `.ralph/specs/`
103
- - Do NOT run build, test, or lint commands
104
- - Do NOT make git commits
105
- - If you feel the urge to "just implement a small piece", STOP — that is a phase violation
106
- - The implementation phase runs AFTER this session ends, in a separate session
107
- - Violation of this constraint wastes tokens and breaks the harness automation
26
+ Planning only. Do NOT write code, tests, configs, or run builds/commits.
27
+ The implementation phase runs in a separate session after planning ends.
28
+ Violation wastes tokens and breaks harness automation.
108
29
 
109
30
  ## Rules
110
- - You MUST use `- [ ]` checkbox syntax for every task in the plan
111
- - Do NOT use heading-based task formats (e.g., `#### Task 1:`) for individual tasks
112
- - The harness parses `- [ ]` lines to track progress — other formats will break automation
113
- - Use `### Phase N:` headings only for phase grouping, not for individual tasks
114
- - One task = one commit-sized unit of work (but tasks can be grouped into phases for batch implementation)
31
+ - MUST use `- [ ]` checkbox syntax for every task harness parses this for progress tracking
32
+ - Use `### Phase N:` headings for phases; one task = one commit-sized unit of work
115
33
  - Every implementation task needs a corresponding test task
116
- - Use Supabase MCP to check existing schema
117
- - Use PostHog MCP to check existing analytics setup
118
- - For UI-heavy features (new pages, dashboards, analytics, marketing pages), reference @.ralph/guides/FRONTEND.md
119
- - Consider `/frontend-design` skill for features needing distinctive aesthetics