wiggum-cli 0.17.2 → 0.17.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +8 -2
  2. package/dist/agent/orchestrator.d.ts +1 -1
  3. package/dist/agent/orchestrator.js +19 -4
  4. package/dist/agent/tools/backlog.js +8 -4
  5. package/dist/agent/tools/execution.js +1 -1
  6. package/dist/agent/tools/introspection.js +26 -4
  7. package/dist/commands/config.js +96 -2
  8. package/dist/commands/run.d.ts +2 -0
  9. package/dist/commands/run.js +47 -2
  10. package/dist/generator/config.js +13 -2
  11. package/dist/index.js +7 -1
  12. package/dist/repl/command-parser.d.ts +1 -1
  13. package/dist/repl/command-parser.js +1 -1
  14. package/dist/templates/config/ralph.config.cjs.tmpl +9 -2
  15. package/dist/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
  16. package/dist/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
  17. package/dist/templates/prompts/PROMPT_feature.md.tmpl +12 -98
  18. package/dist/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
  19. package/dist/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
  20. package/dist/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
  21. package/dist/templates/prompts/PROMPT_verify.md.tmpl +7 -0
  22. package/dist/templates/root/README.md.tmpl +2 -3
  23. package/dist/templates/scripts/feature-loop.sh.tmpl +777 -90
  24. package/dist/templates/scripts/loop.sh.tmpl +5 -1
  25. package/dist/templates/scripts/ralph-monitor.sh.tmpl +0 -2
  26. package/dist/tui/app.d.ts +5 -1
  27. package/dist/tui/app.js +12 -2
  28. package/dist/tui/hooks/useAgentOrchestrator.js +16 -7
  29. package/dist/tui/hooks/useInit.d.ts +5 -1
  30. package/dist/tui/hooks/useInit.js +20 -2
  31. package/dist/tui/screens/InitScreen.js +12 -1
  32. package/dist/tui/screens/MainShell.js +70 -6
  33. package/dist/tui/screens/RunScreen.d.ts +6 -2
  34. package/dist/tui/screens/RunScreen.js +48 -6
  35. package/dist/tui/utils/loop-status.d.ts +15 -0
  36. package/dist/tui/utils/loop-status.js +89 -27
  37. package/dist/utils/config.d.ts +7 -0
  38. package/dist/utils/config.js +14 -0
  39. package/package.json +1 -1
  40. package/src/templates/config/ralph.config.cjs.tmpl +9 -2
  41. package/src/templates/prompts/PROMPT_e2e.md.tmpl +16 -89
  42. package/src/templates/prompts/PROMPT_e2e_fix.md.tmpl +55 -0
  43. package/src/templates/prompts/PROMPT_feature.md.tmpl +12 -98
  44. package/src/templates/prompts/PROMPT_review_auto.md.tmpl +52 -49
  45. package/src/templates/prompts/PROMPT_review_manual.md.tmpl +30 -2
  46. package/src/templates/prompts/PROMPT_review_merge.md.tmpl +59 -69
  47. package/src/templates/prompts/PROMPT_verify.md.tmpl +7 -0
  48. package/src/templates/root/README.md.tmpl +2 -3
  49. package/src/templates/scripts/feature-loop.sh.tmpl +777 -90
  50. package/src/templates/scripts/loop.sh.tmpl +5 -1
  51. package/src/templates/scripts/ralph-monitor.sh.tmpl +0 -2
package/README.md CHANGED
@@ -231,13 +231,19 @@ Run the autonomous development loop.
231
231
  |------|-------------|
232
232
  | `--worktree` | Git worktree isolation (parallel features) |
233
233
  | `--resume` | Resume an interrupted loop |
234
- | `--model <model>` | Claude model (`opus`, `sonnet`) |
234
+ | `--model <model>` | Model id override (applied per CLI; Codex defaults to `gpt-5.3-codex`) |
235
+ | `--cli <cli>` | Implementation CLI: `claude` or `codex` |
236
+ | `--review-cli <cli>` | Review CLI: `claude` or `codex` |
235
237
  | `--max-iterations <n>` | Max iterations (default: 10) |
236
238
  | `--max-e2e-attempts <n>` | Max E2E retries (default: 5) |
237
239
  | `--review-mode <mode>` | `manual` (stop at PR), `auto` (review, no merge), or `merge` (review + merge). Default: `manual` |
238
240
 
239
241
  </details>
240
242
 
243
+ For loop models:
244
+ - Claude CLI phases use `defaultModel` / `planningModel` (defaults: `sonnet` / `opus`).
245
+ - Codex CLI phases default to `gpt-5.3-codex` across all phases.
246
+
241
247
  <details>
242
248
  <summary><code>wiggum monitor &lt;feature&gt; [options]</code></summary>
243
249
 
@@ -304,7 +310,7 @@ Keys are stored in `.ralph/.env.local` and never leave your machine.
304
310
  - **Node.js** >= 18.0.0
305
311
  - **Git** (for worktree features)
306
312
  - An AI provider API key (Anthropic, OpenAI, or OpenRouter)
307
- - [Claude Code](https://docs.anthropic.com/en/docs/claude-code) or another coding agent (for `wiggum run`)
313
+ - A supported coding CLI for loop execution: [Claude Code](https://docs.anthropic.com/en/docs/claude-code) and/or [Codex CLI](https://github.com/openai/codex)
308
314
 
309
315
  ---
310
316
 
@@ -1,6 +1,6 @@
1
1
  import { ToolLoopAgent } from 'ai';
2
2
  import type { AgentConfig } from './types.js';
3
- export declare const AGENT_SYSTEM_PROMPT = "You are wiggum's autonomous development agent. You work through the GitHub issue backlog, shipping features one at a time.\n\n## Workflow\n\n1. Read memory to recall previous work and context\n - Use listStrategicDocs to see available project documentation\n - Use readStrategicDoc to read full documents relevant to the current task (architecture, design, implementation plans)\n2. List open issues and cross-reference with memory\n - Consider: PM priority labels (P0 > P1 > P2), dependencies, strategic context\n - **Housekeeping:** If memory says an issue was already completed (outcome \"success\" or \"skipped\") but it's still open:\n 1. Call assessFeatureState with the featureName and issueNumber\n 2. If recommendation is \"pr_merged\" or \"linked_pr_merged\": close it with closeIssue. Reflect with outcome \"skipped\". Does NOT count against maxItems.\n 3. If recommendation is anything else (e.g., \"resume_implementation\", \"start_fresh\", \"resume_pr_phase\"): the issue was NOT actually shipped. Do NOT close it. Instead, prioritize it as your next work item and follow the Feature State Decision Tree. This counts against maxItems.\n - **Retry:** If memory records a previous attempt at an issue with outcome \"failure\" or \"partial\", and it's still open, prioritize it over new issues. Bugs that caused the failure may have been fixed, and existing work (branch, spec, plan) should not be abandoned. Call assessFeatureState to determine the right action \u2014 usually resume_implementation. This counts against maxItems.\n3. For the chosen issue (one NOT already completed):\n a. Read the full issue details\n b. Derive a featureName from the issue title (lowercase, hyphens, no spaces)\n c. **Assess feature state** using assessFeatureState \u2014 MANDATORY before any action\n d. Follow the Feature State Decision Tree based on the recommendation field\n e. Monitor progress with checkLoopStatus and readLoopLog\n f. Report results by commenting on the issue\n\n## Feature State Decision Tree\n\nAfter calling assessFeatureState, follow the recommendation:\n\n| recommendation | action |\n|---|---|\n| start_fresh | generateSpec \u2192 runLoop (fresh) |\n| generate_plan | runLoop without resume (spec exists, needs planning) |\n| resume_implementation | runLoop with resume: true (plan has pending tasks) |\n| resume_pr_phase | runLoop with resume: true (all tasks done, needs PR) |\n| pr_exists_open | Comment on issue, do NOT re-run loop |\n| pr_merged | Verify PR is merged, close issue with closeIssue, reflect with outcome \"skipped\", move on |\n| pr_closed | Decide: restart from scratch or skip |\n| linked_pr_merged | Verify the linked PR is merged, close issue with closeIssue (comment \"shipped via PR #N\"), reflect with outcome \"skipped\", move on |\n| linked_pr_open | Work in progress under a different branch \u2014 comment \"in progress via PR #N\", do NOT re-run loop |\n\n**Critical:**\n- When recommendation is resume_implementation or resume_pr_phase, you MUST pass resume: true to runLoop\n- When recommendation is generate_plan, do NOT pass resume (fresh branch needed)\n- When recommendation is start_fresh, generate a spec first, then run the loop without resume\n- ALWAYS pass issueNumber to assessFeatureState so it can detect work shipped under a different branch name\n- Derive short, stable feature names (2-4 words, kebab-case) from the issue title \u2014 e.g. \"config-module\" not \"config-module-toml-read-write-with-secret-masking\"\n4. After the loop completes (successfully or with failure) \u2014 MANDATORY for EVERY issue, including subsequent ones:\n a. Call readLoopLog to get the actual log content\n b. Call assessFeatureState to check the actual state \u2014 do NOT rely solely on loop log output\n c. **Blocker detection (MANDATORY):** Scan the log for pre-existing test failures (lines like \"All N test failure(s) are pre-existing\"). If found:\n 1. Call listIssues with labels [\"bug\"] to check for existing bug issues covering these failures\n 2. If no existing issue covers them, you MUST call createIssue with title \"Fix N pre-existing test failures\", body listing the failing files, and labels [\"bug\"]. If a \"P0\" label exists on the repo you may add it; if not, just use [\"bug\"].\n 3. Do NOT skip this step just because the loop succeeded \u2014 pre-existing failures degrade CI and must be tracked\n d. Only close the issue if assessFeatureState confirms a PR was merged (recommendation: \"pr_merged\" or \"linked_pr_merged\")\n e. When closing: check off acceptance criteria with checkAllBoxes, then close with closeIssue\n f. If the loop produced code but no PR was created/merged, run the loop again with resume: true to trigger the PR phase\n g. If the loop failed and code exists on the branch without a PR, this is incomplete work \u2014 do NOT close the issue\n h. Steps 4\u20136 are MANDATORY after every runLoop \u2014 including the 2nd, 3rd, etc. issue. Do NOT summarize or stop after runLoop returns. The next tool call must be readLoopLog.\n5. Reflect on the outcome:\n - Call reflectOnWork with structured observations\n - Use outcome \"skipped\" for issues that were already complete (no real work done) \u2014 these do NOT count against maxItems\n - Use outcome \"success\"/\"partial\"/\"failure\" for issues where real work was performed\n - Note what worked, what failed, any patterns discovered\n6. Continue to next issue \u2014 MANDATORY tool call sequence:\n a. Call listIssues (with NO label filter) to get the full backlog\n b. Cross-reference with memory to avoid re-doing completed work\n c. If actionable issues remain and no stop condition is met, immediately call assessFeatureState for the next priority issue \u2014 do NOT generate text\n d. When assessFeatureState returns, follow the Feature State Decision Tree (step 3d) for that issue \u2014 e.g. start_fresh \u2192 generateSpec \u2192 runLoop. This begins a full new work cycle (steps 3\u20136). Do NOT stop after assessFeatureState.\n e. Only produce a text-only response (final summary) when the backlog is empty or a stop condition is met\n f. ANY text without a tool call terminates the session \u2014 there is no \"ask for permission\" step\n\n## Model forwarding\n\nWhen calling generateSpec, ALWAYS forward the model and provider so the spec generation uses the same AI model as this agent session. The values are provided in the Runtime Config section below.\n\nDo NOT forward model/provider to runLoop \u2014 the development loop uses Claude Code internally, which has its own model configuration (opus for planning, sonnet for implementation). Passing a non-Claude model would break the loop.\n\nWhen calling runLoop, pass the reviewMode from the Runtime Config below (if configured). This controls how the loop handles the PR phase:\n- 'manual': stop at PR creation (default)\n- 'auto': create PR + run automated review (no merge)\n- 'merge': create PR + review + merge if approved\n\n## Prioritization\n\nUse hybrid reasoning: respect PM labels (P0 > P1 > P2) but apply your own judgment for ordering within the same priority tier.\n\n**Ordering rules (in priority order):**\n1. PM priority labels: P0 > P1 > P2 > unlabeled\n2. Explicit dependencies: if readIssue returns a `dependsOn` array (parsed from \"depends on #N\" / \"blocked by #N\" in the issue body), complete those issues first\n3. Lower-numbered issues first: within the same priority tier, prefer lower issue numbers \u2014 they are typically more foundational (scaffolding, setup, core infrastructure)\n4. Prefer issues with existing branches: if assessFeatureState shows a branch exists with commits ahead, prefer that issue over one without a branch \u2014 existing branches diverge further from main with every merge, increasing conflict risk\n5. Strategic context from memory and what you learned from previous iterations\n\n## When to stop\n\nStop the loop when:\n- Backlog has no more actionable open issues\n- You've completed the maximum number of items (if configured)\n- A critical failure requires human attention\n- The user has signaled to stop\n\nIMPORTANT: Generating text without tool calls terminates the session immediately. After completing an issue, you MUST call listIssues (step 6) \u2014 never ask \"should I continue?\" or summarize before checking. After assessFeatureState returns for the next issue, you MUST follow the Feature State Decision Tree and call the next tool (e.g. generateSpec for start_fresh). Stopping after assessFeatureState is a bug \u2014 the result tells you what to do next. After runLoop returns, you MUST execute steps 4\u20136 (readLoopLog \u2192 assessFeatureState \u2192 close/comment \u2192 reflectOnWork \u2192 listIssues). Stopping after runLoop is a bug \u2014 there is always post-loop work to do. Your only text-only response is the final summary when ALL issues are processed or a stop condition is met.\n\n## Learning\n\nAfter each issue, always call reflectOnWork. Your memory entries make you progressively better at this specific codebase. Be specific and narrative in what you record. Focus on: what patterns work here, what gotchas exist, which approaches produce better specs and fewer loop iterations.\n\n## Error recovery\n\nIf spec generation fails: retry once with simplified goals. If it fails again, skip the issue and comment explaining why.\nIf a loop fails:\n1. ALWAYS call readLoopLog to get the actual log content\n2. Your issue comment MUST quote or summarize what the log says \u2014 do NOT speculate or guess the cause\n3. Call assessFeatureState to check if a PR was merged despite the loop failure\n4. If assessFeatureState shows \"pr_merged\" or \"linked_pr_merged\" \u2192 close the issue (the work shipped)\n5. If assessFeatureState shows \"resume_pr_phase\" \u2192 the code exists but no PR was created. Run the loop again with resume: true to create and merge the PR. Do NOT close the issue yet.\n6. If the log says \"already complete\" but no PR is merged, the work is stranded on a branch \u2014 resume the loop to ship it\n7. If runLoop returns status \"already_complete\", verify with assessFeatureState before closing\n8. Reflect on what happened, then move to the next issue\nNever close an issue without verifying the code is merged to main. Loop log evidence alone is not sufficient.\n\n## Blocker detection (additional)\n\nBesides the mandatory check in step 4c, also create bug issues for systemic blockers you discover (broken CI, missing infrastructure, flaky tests). Always check with listIssues(labels: [\"bug\"]) before creating to avoid duplicates. After creating blocker issues, continue processing the backlog \u2014 never stop due to blockers alone.";
3
+ export declare const AGENT_SYSTEM_PROMPT = "You are wiggum's autonomous development agent. You work through the GitHub issue backlog, shipping features one at a time.\n\n## Workflow\n\n1. Read memory to recall previous work and context\n - Use listStrategicDocs to see available project documentation\n - Use readStrategicDoc to read full documents relevant to the current task (architecture, design, implementation plans)\n2. List open issues and cross-reference with memory\n - Consider: PM priority labels (P0 > P1 > P2), dependencies, strategic context\n - **Housekeeping:** If memory says an issue was already completed (outcome \"success\" or \"skipped\") but it's still open:\n 1. Call assessFeatureState with the featureName and issueNumber\n 2. If recommendation is \"pr_merged\" or \"linked_pr_merged\": close it with closeIssue. Reflect with outcome \"skipped\". Does NOT count against maxItems.\n 3. If recommendation is anything else (e.g., \"resume_implementation\", \"start_fresh\", \"resume_pr_phase\"): the issue was NOT actually shipped. Do NOT close it. Instead, prioritize it as your next work item and follow the Feature State Decision Tree. This counts against maxItems.\n - **Retry:** If memory records a previous attempt at an issue with outcome \"failure\" or \"partial\", and it's still open, prioritize it over new issues. Bugs that caused the failure may have been fixed, and existing work (branch, spec, plan) should not be abandoned. Call assessFeatureState to determine the right action \u2014 usually resume_implementation. This counts against maxItems.\n3. For the chosen issue (one NOT already completed):\n a. Read the full issue details\n b. Derive a featureName from the issue title (lowercase, hyphens, no spaces)\n c. **Assess feature state** using assessFeatureState \u2014 MANDATORY before any action\n d. Follow the Feature State Decision Tree based on the recommendation field\n e. Monitor progress with checkLoopStatus and readLoopLog\n f. Report results by commenting on the issue\n\n## Feature State Decision Tree\n\nAfter calling assessFeatureState, follow the recommendation:\n\n| recommendation | action |\n|---|---|\n| start_fresh | generateSpec \u2192 runLoop (fresh) |\n| generate_plan | runLoop without resume (spec exists, needs planning) |\n| resume_implementation | runLoop with resume: true (plan has pending tasks) |\n| resume_pr_phase | runLoop with resume: true (all tasks done, needs PR) |\n| pr_exists_open | Comment on issue, do NOT re-run loop |\n| pr_merged | Verify PR is merged, close issue with closeIssue, reflect with outcome \"skipped\", move on |\n| pr_closed | Decide: restart from scratch or skip |\n| linked_pr_merged | Verify the linked PR is merged, close issue with closeIssue (comment \"shipped via PR #N\"), reflect with outcome \"skipped\", move on |\n| linked_pr_open | Work in progress under a different branch \u2014 comment \"in progress via PR #N\", do NOT re-run loop |\n\n**Critical:**\n- When recommendation is resume_implementation or resume_pr_phase, you MUST pass resume: true to runLoop\n- When recommendation is generate_plan, do NOT pass resume (fresh branch needed)\n- When recommendation is start_fresh, generate a spec first, then run the loop without resume\n- ALWAYS pass issueNumber to assessFeatureState so it can detect work shipped under a different branch name\n- Derive short, stable feature names (2-4 words, kebab-case) from the issue title \u2014 e.g. \"config-module\" not \"config-module-toml-read-write-with-secret-masking\"\n4. After the loop completes (successfully or with failure) \u2014 MANDATORY for EVERY issue, including subsequent ones:\n a. Call readLoopLog to get the actual log content\n b. Call assessFeatureState to check the actual state \u2014 do NOT rely solely on loop log output\n c. **Blocker detection (MANDATORY):** Scan the log for pre-existing test failures (lines like \"All N test failure(s) are pre-existing\"). If found:\n 1. Call listIssues with labels [\"bug\"] to check for existing bug issues covering these failures\n 2. If no existing issue covers them, you MUST call createIssue with title \"Fix N pre-existing test failures\", body listing the failing files, and labels [\"bug\"]. If a \"P0\" label exists on the repo you may add it; if not, just use [\"bug\"].\n 3. Do NOT skip this step just because the loop succeeded \u2014 pre-existing failures degrade CI and must be tracked\n d. Only close the issue if assessFeatureState confirms a PR was merged (recommendation: \"pr_merged\" or \"linked_pr_merged\")\n e. When closing: check off acceptance criteria with checkAllBoxes, then close with closeIssue\n f. If the loop produced code but no PR was created/merged, run the loop again with resume: true to trigger the PR phase\n g. If the loop failed and code exists on the branch without a PR, this is incomplete work \u2014 do NOT close the issue\n h. Steps 4\u20136 are MANDATORY after every runLoop \u2014 including the 2nd, 3rd, etc. issue. Do NOT summarize or stop after runLoop returns. The next tool call must be readLoopLog.\n5. Reflect on the outcome:\n - Call reflectOnWork with structured observations\n - Use outcome \"skipped\" for issues that were already complete (no real work done) \u2014 these do NOT count against maxItems\n - Use outcome \"success\"/\"partial\"/\"failure\" for issues where real work was performed\n - Note what worked, what failed, any patterns discovered\n6. Continue to next issue \u2014 MANDATORY tool call sequence:\n a. Call listIssues (with NO label filter) to get the full backlog\n b. Cross-reference with memory to avoid re-doing completed work\n c. If actionable issues remain and no stop condition is met, immediately call assessFeatureState for the next priority issue \u2014 do NOT generate text\n d. When assessFeatureState returns, follow the Feature State Decision Tree (step 3d) for that issue \u2014 e.g. start_fresh \u2192 generateSpec \u2192 runLoop. This begins a full new work cycle (steps 3\u20136). Do NOT stop after assessFeatureState.\n e. Only produce a text-only response (final summary) when the backlog is empty or a stop condition is met\n f. ANY text without a tool call terminates the session \u2014 there is no \"ask for permission\" step\n\n## Model forwarding\n\nWhen calling generateSpec, ALWAYS forward the model and provider so the spec generation uses the same AI model as this agent session. The values are provided in the Runtime Config section below.\n\nDo NOT forward model/provider to runLoop \u2014 the development loop resolves its own coding/review CLI and model configuration from project config.\n\nWhen calling runLoop, pass the reviewMode from the Runtime Config below (if configured). This controls how the loop handles the PR phase:\n- 'manual': stop at PR creation (default)\n- 'auto': create PR + run automated review (no merge)\n- 'merge': create PR + review + merge if approved\n\n## Prioritization\n\nUse hybrid reasoning: respect PM labels (P0 > P1 > P2) but apply your own judgment for ordering within the same priority tier.\n\n**Ordering rules (in priority order):**\n1. PM priority labels: P0 > P1 > P2 > unlabeled\n2. Explicit dependencies: if readIssue returns a `dependsOn` array (parsed from \"depends on #N\" / \"blocked by #N\" in the issue body), complete those issues first\n3. Lower-numbered issues first: within the same priority tier, prefer lower issue numbers \u2014 they are typically more foundational (scaffolding, setup, core infrastructure)\n4. Prefer issues with existing branches: if assessFeatureState shows a branch exists with commits ahead, prefer that issue over one without a branch \u2014 existing branches diverge further from main with every merge, increasing conflict risk\n5. Strategic context from memory and what you learned from previous iterations\n\n## When to stop\n\nStop the loop when:\n- Backlog has no more actionable open issues\n- You've completed the maximum number of items (if configured)\n- A critical failure requires human attention\n- The user has signaled to stop\n\nIMPORTANT: Generating text without tool calls terminates the session immediately. After completing an issue, you MUST call listIssues (step 6) \u2014 never ask \"should I continue?\" or summarize before checking. After listIssues returns, scan the results for issues matching your constraints (if any). If actionable issues remain, immediately call assessFeatureState \u2014 do NOT generate a summary. After assessFeatureState returns for the next issue, you MUST follow the Feature State Decision Tree and call the next tool (e.g. generateSpec for start_fresh). Stopping after assessFeatureState is a bug \u2014 the result tells you what to do next. After runLoop returns, you MUST execute steps 4\u20136 (readLoopLog \u2192 assessFeatureState \u2192 close/comment \u2192 reflectOnWork \u2192 listIssues). Stopping after runLoop is a bug \u2014 there is always post-loop work to do. Your only text-only response is the final summary when ALL constrained issues are processed or a stop condition is met. If you were given specific issue numbers to work on, you MUST process ALL of them before stopping.\n\n## Learning\n\nAfter each issue, always call reflectOnWork. Your memory entries make you progressively better at this specific codebase. Be specific and narrative in what you record. Focus on: what patterns work here, what gotchas exist, which approaches produce better specs and fewer loop iterations.\n\n## Error recovery\n\nIf spec generation fails: retry once with simplified goals. If it fails again, skip the issue and comment explaining why.\nIf a loop fails:\n1. ALWAYS call readLoopLog to get the actual log content\n2. Your issue comment MUST quote or summarize what the log says \u2014 do NOT speculate or guess the cause\n3. Call assessFeatureState to check if a PR was merged despite the loop failure\n4. If assessFeatureState shows \"pr_merged\" or \"linked_pr_merged\" \u2192 close the issue (the work shipped)\n5. If assessFeatureState shows \"resume_pr_phase\" \u2192 the code exists but no PR was created. Run the loop again with resume: true to create and merge the PR. Do NOT close the issue yet.\n6. If the log says \"already complete\" but no PR is merged, the work is stranded on a branch \u2014 resume the loop to ship it\n7. If runLoop returns status \"already_complete\", verify with assessFeatureState before closing\n8. Reflect on what happened, then move to the next issue\nNever close an issue without verifying the code is merged to main. Loop log evidence alone is not sufficient.\n\n## Blocker detection (additional)\n\nBesides the mandatory check in step 4c, also create bug issues for systemic blockers you discover (broken CI, missing infrastructure, flaky tests). Always check with listIssues(labels: [\"bug\"]) before creating to avoid duplicates. After creating blocker issues, continue processing the backlog \u2014 never stop due to blockers alone.";
4
4
  export type AgentOrchestrator = ToolLoopAgent<never, any, any>;
5
5
  export declare function buildRuntimeConfig(config: AgentConfig): string;
6
6
  export declare function buildConstraints(config: AgentConfig): string;
@@ -83,7 +83,7 @@ After calling assessFeatureState, follow the recommendation:
83
83
 
84
84
  When calling generateSpec, ALWAYS forward the model and provider so the spec generation uses the same AI model as this agent session. The values are provided in the Runtime Config section below.
85
85
 
86
- Do NOT forward model/provider to runLoop — the development loop uses Claude Code internally, which has its own model configuration (opus for planning, sonnet for implementation). Passing a non-Claude model would break the loop.
86
+ Do NOT forward model/provider to runLoop — the development loop resolves its own coding/review CLI and model configuration from project config.
87
87
 
88
88
  When calling runLoop, pass the reviewMode from the Runtime Config below (if configured). This controls how the loop handles the PR phase:
89
89
  - 'manual': stop at PR creation (default)
@@ -109,7 +109,7 @@ Stop the loop when:
109
109
  - A critical failure requires human attention
110
110
  - The user has signaled to stop
111
111
 
112
- IMPORTANT: Generating text without tool calls terminates the session immediately. After completing an issue, you MUST call listIssues (step 6) — never ask "should I continue?" or summarize before checking. After assessFeatureState returns for the next issue, you MUST follow the Feature State Decision Tree and call the next tool (e.g. generateSpec for start_fresh). Stopping after assessFeatureState is a bug — the result tells you what to do next. After runLoop returns, you MUST execute steps 4–6 (readLoopLog → assessFeatureState → close/comment → reflectOnWork → listIssues). Stopping after runLoop is a bug — there is always post-loop work to do. Your only text-only response is the final summary when ALL issues are processed or a stop condition is met.
112
+ IMPORTANT: Generating text without tool calls terminates the session immediately. After completing an issue, you MUST call listIssues (step 6) — never ask "should I continue?" or summarize before checking. After listIssues returns, scan the results for issues matching your constraints (if any). If actionable issues remain, immediately call assessFeatureState — do NOT generate a summary. After assessFeatureState returns for the next issue, you MUST follow the Feature State Decision Tree and call the next tool (e.g. generateSpec for start_fresh). Stopping after assessFeatureState is a bug — the result tells you what to do next. After runLoop returns, you MUST execute steps 4–6 (readLoopLog → assessFeatureState → close/comment → reflectOnWork → listIssues). Stopping after runLoop is a bug — there is always post-loop work to do. Your only text-only response is the final summary when ALL constrained issues are processed or a stop condition is met. If you were given specific issue numbers to work on, you MUST process ALL of them before stopping.
113
113
 
114
114
  ## Learning
115
115
 
@@ -153,7 +153,7 @@ export function buildConstraints(config) {
153
153
  lines.push(`- Only work on issues with these labels: ${config.labels.join(', ')}. Ignore all others.`);
154
154
  }
155
155
  if (config.issues?.length) {
156
- lines.push(`- ONLY work on these specific issues: ${config.issues.map(n => `#${n}`).join(', ')}. Ignore all others.`);
156
+ lines.push(`- ONLY work on these specific issues: ${config.issues.map(n => `#${n}`).join(', ')}. Ignore all others. You MUST process ALL of these issues before stopping — do not stop after the first one.`);
157
157
  }
158
158
  if (config.dryRun) {
159
159
  lines.push('- DRY RUN MODE: Plan what you would do but do NOT execute. Execution and reporting tools return simulated results.');
@@ -195,6 +195,7 @@ export function createAgentOrchestrator(config) {
195
195
  const runtimeConfig = buildRuntimeConfig(config);
196
196
  const fullPrompt = AGENT_SYSTEM_PROMPT + runtimeConfig + constraints;
197
197
  const completedIssues = new Set();
198
+ const issueNumberSet = config.issues?.length ? new Set(config.issues) : undefined;
198
199
  const maxSteps = config.maxSteps ?? 200;
199
200
  // Use traced ToolLoopAgent so Braintrust automatically captures
200
201
  // all LLM calls, tool executions, and agent steps.
@@ -258,9 +259,23 @@ export function createAgentOrchestrator(config) {
258
259
  }
259
260
  }
260
261
  }
262
+ // Filter listIssues results to configured issues before reaching the TUI.
263
+ // The tool's closure-based filter should handle this, but Braintrust's
264
+ // wrapAISDK Proxy chain can bypass tool closures in some edge cases.
265
+ const mappedResults = toolResults.map((tr) => {
266
+ const output = tr.output;
267
+ if (tr.toolName === 'listIssues' && issueNumberSet && output != null && typeof output === 'object') {
268
+ const raw = output;
269
+ if (Array.isArray(raw.issues)) {
270
+ const filtered = raw.issues.filter((i) => issueNumberSet.has(Number(i.number)));
271
+ return { toolName: tr.toolName, result: { ...raw, issues: filtered } };
272
+ }
273
+ }
274
+ return { toolName: tr.toolName, result: output };
275
+ });
261
276
  config.onStepUpdate?.({
262
277
  toolCalls: toolCalls.map((tc) => ({ toolName: tc.toolName, args: tc.input })),
263
- toolResults: toolResults.map((tr) => ({ toolName: tr.toolName, result: tr.output })),
278
+ toolResults: mappedResults,
264
279
  completedItems: completedIssues.size,
265
280
  });
266
281
  }
@@ -8,6 +8,9 @@ function extractDependencyHints(body) {
8
8
  return [...new Set(numbers)].sort((a, b) => a - b);
9
9
  }
10
10
  export function createBacklogTools(owner, repo, options = {}) {
11
+ const issueNumberSet = options.issueNumbers?.length
12
+ ? new Set(options.issueNumbers)
13
+ : undefined;
11
14
  const listIssues = tool({
12
15
  description: 'List open GitHub issues from the backlog, optionally filtered by labels or milestone.',
13
16
  inputSchema: zodSchema(z.object({
@@ -30,10 +33,11 @@ export function createBacklogTools(owner, repo, options = {}) {
30
33
  return { issues: [], error: result.error };
31
34
  // Sort by issue number ascending — lower numbers are typically more foundational
32
35
  const sorted = [...result.issues].sort((a, b) => a.number - b.number);
33
- const filtered = options.issueNumbers?.length
34
- ? sorted.filter(i => options.issueNumbers.includes(i.number))
35
- : sorted;
36
- return { issues: filtered };
36
+ if (issueNumberSet) {
37
+ const filtered = sorted.filter(i => issueNumberSet.has(Number(i.number)));
38
+ return { issues: filtered };
39
+ }
40
+ return { issues: sorted };
37
41
  },
38
42
  });
39
43
  const readIssue = tool({
@@ -106,7 +106,7 @@ export function createExecutionTools(projectRoot, options) {
106
106
  },
107
107
  });
108
108
  const runLoop = tool({
109
- description: 'Run the development loop for a feature. Spawns a background process and returns when complete. The loop uses Claude Code internally with its own model config do NOT forward the agent model here.',
109
+ description: 'Run the development loop for a feature. Spawns a background process and returns when complete. The loop uses the coding CLI configured in ralph.config.cjs; do NOT forward the agent model here.',
110
110
  inputSchema: zodSchema(z.object({
111
111
  featureName: FEATURE_NAME_SCHEMA,
112
112
  worktree: z.boolean().default(true).describe('Use git worktree isolation'),
@@ -1,9 +1,9 @@
1
1
  import { tool, zodSchema } from 'ai';
2
2
  import { z } from 'zod';
3
- import { existsSync } from 'node:fs';
4
- import { readFile } from 'node:fs/promises';
3
+ import { readFile, stat, open } from 'node:fs/promises';
5
4
  import { join } from 'node:path';
6
5
  import { FEATURE_NAME_SCHEMA } from './schemas.js';
6
+ const MAX_LOG_BYTES = 1_048_576; // 1 MB
7
7
  export function createIntrospectionTools(projectRoot) {
8
8
  const readLoopLog = tool({
9
9
  description: 'Read the stdout/stderr log of a development loop (running or completed).',
@@ -13,10 +13,32 @@ export function createIntrospectionTools(projectRoot) {
13
13
  })),
14
14
  execute: async ({ featureName, tailLines }) => {
15
15
  const logPath = join('/tmp', `ralph-loop-${featureName}.log`);
16
- if (!existsSync(logPath)) {
16
+ let fileSize;
17
+ try {
18
+ const fileStat = await stat(logPath);
19
+ fileSize = fileStat.size;
20
+ }
21
+ catch {
17
22
  return { error: `No log found at ${logPath} — verify featureName matches exactly what runLoop used` };
18
23
  }
19
- const content = await readFile(logPath, 'utf-8');
24
+ let content;
25
+ if (fileSize <= MAX_LOG_BYTES) {
26
+ content = await readFile(logPath, 'utf-8');
27
+ }
28
+ else {
29
+ // For large files, read only the last MAX_LOG_BYTES to bound memory usage.
30
+ // totalLines will reflect lines in the chunk, not the full file.
31
+ const offset = fileSize - MAX_LOG_BYTES;
32
+ const fd = await open(logPath, 'r');
33
+ try {
34
+ const buffer = Buffer.allocUnsafe(MAX_LOG_BYTES);
35
+ const { bytesRead } = await fd.read(buffer, 0, MAX_LOG_BYTES, offset);
36
+ content = buffer.subarray(0, bytesRead).toString('utf-8');
37
+ }
38
+ finally {
39
+ await fd.close();
40
+ }
41
+ }
20
42
  const allLines = content.split('\n');
21
43
  const lines = allLines.slice(-tailLines);
22
44
  return { lines, totalLines: allLines.length };
@@ -9,6 +9,7 @@ import { logger } from '../utils/logger.js';
9
9
  import { simpson } from '../utils/colors.js';
10
10
  import { getAvailableProvider, AVAILABLE_MODELS } from '../ai/providers.js';
11
11
  import { writeKeysToEnvFile } from '../utils/env.js';
12
+ import { loadConfigWithDefaults } from '../utils/config.js';
12
13
  /**
13
14
  * Supported services for API key configuration
14
15
  */
@@ -26,6 +27,11 @@ const CONFIGURABLE_SERVICES = {
26
27
  description: 'AI tracing and analytics',
27
28
  },
28
29
  };
30
+ const LOOP_CLI_SETTINGS = ['cli', 'review-cli'];
31
+ const LOOP_CLI_VALUES = ['claude', 'codex'];
32
+ const DEFAULT_CLAUDE_IMPL_MODEL = 'sonnet';
33
+ const DEFAULT_CLAUDE_REVIEW_MODEL = 'opus';
34
+ const DEFAULT_CODEX_MODEL = 'gpt-5.3-codex';
29
35
  /**
30
36
  * Check if a service API key is configured
31
37
  */
@@ -45,6 +51,69 @@ function saveKeyToEnvLocal(projectRoot, envVar, value) {
45
51
  const envLocalPath = path.join(ralphDir, '.env.local');
46
52
  writeKeysToEnvFile(envLocalPath, { [envVar]: value });
47
53
  }
54
+ function toConfigFileContent(config) {
55
+ const content = `module.exports = ${JSON.stringify(config, null, 2)};
56
+ `;
57
+ return content
58
+ .replace(/"(\w+)":/g, '$1:')
59
+ .replace(/: "([^"]+)"/g, ": '$1'");
60
+ }
61
+ function normalizeLoopCliSetting(raw) {
62
+ if (raw === 'cli')
63
+ return 'cli';
64
+ if (raw === 'review-cli' || raw === 'reviewCli')
65
+ return 'review-cli';
66
+ return null;
67
+ }
68
+ function isLoopCliValue(value) {
69
+ return LOOP_CLI_VALUES.includes(value);
70
+ }
71
+ function reconcileLoopModelsForCliSelection(loop, codingCli, reviewCli) {
72
+ let defaultModel = loop.defaultModel;
73
+ let planningModel = loop.planningModel;
74
+ const usesClaude = codingCli === 'claude' || reviewCli === 'claude';
75
+ const codexOnly = codingCli === 'codex' && reviewCli === 'codex';
76
+ if (usesClaude) {
77
+ if (defaultModel === DEFAULT_CODEX_MODEL) {
78
+ defaultModel = DEFAULT_CLAUDE_IMPL_MODEL;
79
+ }
80
+ if (planningModel === DEFAULT_CODEX_MODEL) {
81
+ planningModel = DEFAULT_CLAUDE_REVIEW_MODEL;
82
+ }
83
+ }
84
+ else if (codexOnly) {
85
+ if (defaultModel === DEFAULT_CLAUDE_IMPL_MODEL) {
86
+ defaultModel = DEFAULT_CODEX_MODEL;
87
+ }
88
+ if (planningModel === DEFAULT_CLAUDE_REVIEW_MODEL) {
89
+ planningModel = DEFAULT_CODEX_MODEL;
90
+ }
91
+ }
92
+ return { defaultModel, planningModel };
93
+ }
94
+ async function saveLoopCliToConfig(projectRoot, setting, value) {
95
+ // Check that .ralph/ exists (project is initialized)
96
+ const ralphDir = path.join(projectRoot, '.ralph');
97
+ if (!fs.existsSync(ralphDir) || !fs.statSync(ralphDir).isDirectory()) {
98
+ throw new Error('This project is not initialized. Run \'wiggum init\' before using loop CLI settings.');
99
+ }
100
+ const configPath = path.join(projectRoot, 'ralph.config.cjs');
101
+ const config = await loadConfigWithDefaults(projectRoot);
102
+ const nextCodingCli = setting === 'cli' ? value : config.loop.codingCli;
103
+ const nextReviewCli = setting === 'review-cli' ? value : config.loop.reviewCli;
104
+ const { defaultModel, planningModel } = reconcileLoopModelsForCliSelection(config.loop, nextCodingCli, nextReviewCli);
105
+ const nextConfig = {
106
+ ...config,
107
+ loop: {
108
+ ...config.loop,
109
+ defaultModel,
110
+ planningModel,
111
+ codingCli: nextCodingCli,
112
+ reviewCli: nextReviewCli,
113
+ },
114
+ };
115
+ fs.writeFileSync(configPath, toConfigFileContent(nextConfig), 'utf-8');
116
+ }
48
117
  /**
49
118
  * Display current configuration status
50
119
  */
@@ -75,6 +144,8 @@ function displayConfigStatus(state) {
75
144
  console.log(` ${simpson.yellow('/config set tavily')} ${pc.dim('<api-key>')}`);
76
145
  console.log(` ${simpson.yellow('/config set context7')} ${pc.dim('<api-key>')}`);
77
146
  console.log(` ${simpson.yellow('/config set braintrust')} ${pc.dim('<api-key>')}`);
147
+ console.log(` ${simpson.yellow('/config set cli')} ${pc.dim('<claude|codex>')}`);
148
+ console.log(` ${simpson.yellow('/config set review-cli')} ${pc.dim('<claude|codex>')}`);
78
149
  console.log('');
79
150
  }
80
151
  /**
@@ -93,17 +164,37 @@ export async function handleConfigCommand(args, state) {
93
164
  }
94
165
  // /config set <service> <key>
95
166
  if (args.length < 3) {
96
- logger.error('Usage: /config set <service> <api-key>');
167
+ logger.error('Usage: /config set <service> <value>');
97
168
  console.log('');
98
169
  console.log('Available services:');
99
170
  for (const [service, config] of Object.entries(CONFIGURABLE_SERVICES)) {
100
171
  console.log(` ${service.padEnd(12)} ${pc.dim(config.description)}`);
101
172
  }
173
+ for (const setting of LOOP_CLI_SETTINGS) {
174
+ console.log(` ${setting.padEnd(12)} ${pc.dim('Loop CLI setting')}`);
175
+ }
102
176
  console.log('');
103
177
  return state;
104
178
  }
105
- const service = args[1]?.toLowerCase();
179
+ const rawService = args[1]?.toLowerCase() ?? '';
106
180
  const apiKey = args[2];
181
+ const loopCliSetting = normalizeLoopCliSetting(rawService);
182
+ if (loopCliSetting) {
183
+ if (!isLoopCliValue(apiKey)) {
184
+ logger.error(`Invalid ${loopCliSetting} value: '${apiKey}'. Allowed values: ${LOOP_CLI_VALUES.join(', ')}`);
185
+ return state;
186
+ }
187
+ try {
188
+ await saveLoopCliToConfig(state.projectRoot, loopCliSetting, apiKey);
189
+ logger.success(`${loopCliSetting} saved to ralph.config.cjs (${apiKey})`);
190
+ console.log('');
191
+ }
192
+ catch (error) {
193
+ logger.error(`Failed to save ${loopCliSetting}: ${error instanceof Error ? error.message : String(error)}`);
194
+ }
195
+ return state;
196
+ }
197
+ const service = rawService;
107
198
  if (!(service in CONFIGURABLE_SERVICES)) {
108
199
  logger.error(`Unknown service: ${service}`);
109
200
  console.log('');
@@ -111,6 +202,9 @@ export async function handleConfigCommand(args, state) {
111
202
  for (const [svc, config] of Object.entries(CONFIGURABLE_SERVICES)) {
112
203
  console.log(` ${svc.padEnd(12)} ${pc.dim(config.description)}`);
113
204
  }
205
+ for (const setting of LOOP_CLI_SETTINGS) {
206
+ console.log(` ${setting.padEnd(12)} ${pc.dim('Loop CLI setting')}`);
207
+ }
114
208
  console.log('');
115
209
  return state;
116
210
  }
@@ -6,6 +6,8 @@ export interface RunOptions {
6
6
  worktree?: boolean;
7
7
  resume?: boolean;
8
8
  model?: string;
9
+ cli?: 'claude' | 'codex';
10
+ reviewCli?: 'claude' | 'codex';
9
11
  maxIterations?: number;
10
12
  maxE2eAttempts?: number;
11
13
  reviewMode?: 'manual' | 'auto' | 'merge';
@@ -3,11 +3,34 @@
3
3
  * Executes the feature development loop for a specific feature
4
4
  */
5
5
  import { spawn } from 'node:child_process';
6
- import { existsSync } from 'node:fs';
6
+ import { existsSync, readFileSync } from 'node:fs';
7
7
  import { join, dirname } from 'node:path';
8
8
  import { logger } from '../utils/logger.js';
9
9
  import { loadConfigWithDefaults, hasConfig, } from '../utils/config.js';
10
10
  import pc from 'picocolors';
11
+ const SUPPORTED_LOOP_CLIS = ['claude', 'codex'];
12
+ const DEFAULT_CODEX_LOOP_MODEL = 'gpt-5.3-codex';
13
+ function isSupportedLoopCli(value) {
14
+ return SUPPORTED_LOOP_CLIS.includes(value);
15
+ }
16
+ function scriptSupportsCliFlags(scriptPath) {
17
+ try {
18
+ const script = readFileSync(scriptPath, 'utf-8');
19
+ return script.includes('--cli') && script.includes('--review-cli');
20
+ }
21
+ catch {
22
+ return false;
23
+ }
24
+ }
25
+ function getModelDisplayLabel(modelOverride, codingCli, reviewCli, config) {
26
+ if (modelOverride)
27
+ return modelOverride;
28
+ if (codingCli === 'codex' && reviewCli === 'codex')
29
+ return DEFAULT_CODEX_LOOP_MODEL;
30
+ if (codingCli === 'claude' && reviewCli === 'claude')
31
+ return config.loop.defaultModel;
32
+ return `${config.loop.defaultModel} (claude) / ${DEFAULT_CODEX_LOOP_MODEL} (codex)`;
33
+ }
11
34
  /**
12
35
  * Find the feature-loop.sh script
13
36
  * Checks: 1) .ralph/scripts/ 2) ralph/ (parent ralph repo)
@@ -120,6 +143,26 @@ export async function runCommand(feature, options = {}) {
120
143
  if (options.model) {
121
144
  args.push('--model', options.model);
122
145
  }
146
+ // Resolve and validate coding CLI
147
+ const codingCli = options.cli ?? config.loop.codingCli ?? 'claude';
148
+ if (!isSupportedLoopCli(codingCli)) {
149
+ logger.error(`Invalid CLI '${codingCli}'. Allowed values are 'claude' or 'codex'.`);
150
+ process.exit(1);
151
+ }
152
+ // Resolve and validate review CLI
153
+ const reviewCli = options.reviewCli ?? config.loop.reviewCli ?? codingCli;
154
+ if (!isSupportedLoopCli(reviewCli)) {
155
+ logger.error(`Invalid review CLI '${reviewCli}'. Allowed values are 'claude' or 'codex'.`);
156
+ process.exit(1);
157
+ }
158
+ // Guard against stale generated scripts that don't support CLI flags.
159
+ if ((codingCli !== 'claude' || reviewCli !== 'claude') && !scriptSupportsCliFlags(scriptPath)) {
160
+ logger.error('The current feature-loop.sh does not support --cli/--review-cli flags.');
161
+ logger.info('Regenerate scripts with "wiggum init" (or re-run /init), then retry.');
162
+ process.exit(1);
163
+ }
164
+ args.push('--cli', codingCli);
165
+ args.push('--review-cli', reviewCli);
123
166
  // Resolve and validate reviewMode
124
167
  const reviewMode = options.reviewMode ?? config.loop.reviewMode ?? 'manual';
125
168
  if (reviewMode !== 'manual' && reviewMode !== 'auto' && reviewMode !== 'merge') {
@@ -133,7 +176,9 @@ export async function runCommand(feature, options = {}) {
133
176
  console.log(` Spec: ${specFile ?? '(on feature branch)'}`);
134
177
  console.log(` Max Iterations: ${maxIterations}`);
135
178
  console.log(` Max E2E Attempts: ${maxE2eAttempts}`);
136
- console.log(` Model: ${options.model || config.loop.defaultModel}`);
179
+ console.log(` Model: ${getModelDisplayLabel(options.model, codingCli, reviewCli, config)}`);
180
+ console.log(` Implementation CLI: ${codingCli}`);
181
+ console.log(` Review CLI: ${reviewCli}`);
137
182
  console.log(` Review Mode: ${reviewMode}`);
138
183
  console.log(` Worktree: ${options.worktree ? 'enabled' : 'disabled'}`);
139
184
  console.log(` Resume: ${options.resume ? 'enabled' : 'disabled'}`);
@@ -3,13 +3,17 @@
3
3
  * Generates ralph.config.cjs file from scan results
4
4
  */
5
5
  import { extractVariables } from './templates.js';
6
+ const DEFAULT_CODEX_MODEL = 'gpt-5.3-codex';
6
7
  /**
7
8
  * Generate ralph config object from scan result
8
9
  */
9
10
  export function generateConfig(scanResult, customVars = {}) {
10
11
  const vars = extractVariables(scanResult, customVars);
11
- const defaultModel = customVars.defaultModel || 'sonnet';
12
- const planningModel = customVars.planningModel || 'opus';
12
+ const codingCli = customVars.codingCli === 'codex' ? 'codex' : 'claude';
13
+ const reviewCli = customVars.reviewCli === 'codex' ? 'codex' : codingCli;
14
+ const codexEverywhere = codingCli === 'codex' && reviewCli === 'codex';
15
+ const defaultModel = customVars.defaultModel || (codexEverywhere ? 'gpt-5.3-codex' : 'sonnet');
16
+ const planningModel = customVars.planningModel || (codexEverywhere ? 'gpt-5.3-codex' : 'opus');
13
17
  const agentProvider = customVars.agentProvider || 'anthropic';
14
18
  const agentModel = customVars.agentModel || 'claude-sonnet-4-6';
15
19
  return {
@@ -48,7 +52,14 @@ export function generateConfig(scanResult, customVars = {}) {
48
52
  maxE2eAttempts: 5,
49
53
  defaultModel,
50
54
  planningModel,
55
+ codexModel: DEFAULT_CODEX_MODEL,
56
+ codingCli,
57
+ reviewCli,
51
58
  reviewMode: 'manual',
59
+ claudePermissionMode: 'default',
60
+ codexSandbox: 'workspace-write',
61
+ codexApprovalPolicy: 'never',
62
+ disableMcpInAutomatedRuns: true,
52
63
  },
53
64
  agent: {
54
65
  defaultProvider: agentProvider,
package/dist/index.js CHANGED
@@ -38,6 +38,8 @@ export function parseCliArgs(argv) {
38
38
  // Flags that consume the next argument as their value
39
39
  const valueFlagSet = new Set([
40
40
  '--model',
41
+ '--cli',
42
+ '--review-cli',
41
43
  '--max-iterations',
42
44
  '--max-e2e-attempts',
43
45
  '--interval',
@@ -233,6 +235,8 @@ Options for run:
233
235
  --worktree Use git worktree isolation
234
236
  --resume Resume from last checkpoint
235
237
  --model <model> AI model to use
238
+ --cli <cli> Implementation CLI: claude, codex
239
+ --review-cli <cli> Review CLI: claude, codex
236
240
  --max-iterations <n> Maximum loop iterations
237
241
  --max-e2e-attempts <n> Maximum E2E test attempts
238
242
  --review-mode <mode> Review mode: manual, auto, merge
@@ -338,13 +342,15 @@ Press Esc to cancel any operation.
338
342
  const feature = parsed.positionalArgs[0];
339
343
  if (!feature) {
340
344
  console.error('Error: <feature> is required for "run"');
341
- console.error('Usage: wiggum run <feature> [--worktree] [--resume] [--model <model>] [--max-iterations <n>] [--max-e2e-attempts <n>]');
345
+ console.error('Usage: wiggum run <feature> [--worktree] [--resume] [--model <model>] [--cli <claude|codex>] [--review-cli <claude|codex>] [--max-iterations <n>] [--max-e2e-attempts <n>]');
342
346
  process.exit(1);
343
347
  }
344
348
  const runOptions = {
345
349
  worktree: parsed.flags.worktree === true,
346
350
  resume: parsed.flags.resume === true,
347
351
  model: typeof parsed.flags.model === 'string' ? parsed.flags.model : undefined,
352
+ cli: typeof parsed.flags.cli === 'string' ? parsed.flags.cli : undefined,
353
+ reviewCli: typeof parsed.flags.reviewCli === 'string' ? parsed.flags.reviewCli : undefined,
348
354
  maxIterations: typeof parsed.flags.maxIterations === 'string' ? parseIntFlag(parsed.flags.maxIterations, '--max-iterations') : undefined,
349
355
  maxE2eAttempts: typeof parsed.flags.maxE2eAttempts === 'string' ? parseIntFlag(parsed.flags.maxE2eAttempts, '--max-e2e-attempts') : undefined,
350
356
  reviewMode: typeof parsed.flags.reviewMode === 'string' ? parsed.flags.reviewMode : undefined,
@@ -61,7 +61,7 @@ export declare const REPL_COMMANDS: {
61
61
  };
62
62
  readonly agent: {
63
63
  readonly description: "Start the autonomous backlog agent";
64
- readonly usage: "/agent [--dry-run] [--max-items <n>]";
64
+ readonly usage: "/agent [--dry-run] [--max-items <n>] [--max-steps <n>] [--review-mode manual|auto|merge] [--labels <l1,l2>] [--issues <n1,n2,...>]";
65
65
  readonly aliases: readonly ["a"];
66
66
  };
67
67
  readonly config: {
@@ -38,7 +38,7 @@ export const REPL_COMMANDS = {
38
38
  },
39
39
  agent: {
40
40
  description: 'Start the autonomous backlog agent',
41
- usage: '/agent [--dry-run] [--max-items <n>]',
41
+ usage: '/agent [--dry-run] [--max-items <n>] [--max-steps <n>] [--review-mode manual|auto|merge] [--labels <l1,l2>] [--issues <n1,n2,...>]',
42
42
  aliases: ['a'],
43
43
  },
44
44
  config: {
@@ -32,8 +32,15 @@ module.exports = {
32
32
  loop: {
33
33
  maxIterations: 10,
34
34
  maxE2eAttempts: 5,
35
- defaultModel: 'sonnet',
36
- planningModel: 'opus',
35
+ defaultModel: 'sonnet', // Claude implementation/e2e default
36
+ planningModel: 'opus', // Claude planning/review default
37
+ codexModel: 'gpt-5.3-codex', // Codex model default for codex phases
38
+ codingCli: 'claude', // Implementation CLI: 'claude' | 'codex'
39
+ reviewCli: 'claude', // Review CLI: 'claude' | 'codex'
37
40
  reviewMode: 'manual', // 'manual' = stop at PR, 'auto' = review (no merge), 'merge' = review + auto-merge
41
+ claudePermissionMode: 'default', // Claude permission mode: default|auto|dontAsk|acceptEdits|plan|bypassPermissions
42
+ codexSandbox: 'workspace-write', // Codex sandbox: read-only|workspace-write|danger-full-access
43
+ codexApprovalPolicy: 'never', // Codex approvals: untrusted|on-failure|on-request|never
44
+ disableMcpInAutomatedRuns: true, // Disable MCP servers when RALPH_AUTOMATED=1
38
45
  },
39
46
  };