oh-my-opencode 3.2.0 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/dist/agents/{atlas.d.ts → atlas/default.d.ts} +8 -19
  2. package/dist/agents/atlas/gpt.d.ts +19 -0
  3. package/dist/agents/atlas/index.d.ts +39 -0
  4. package/dist/agents/atlas/utils.d.ts +13 -0
  5. package/dist/agents/hephaestus.d.ts +1 -1
  6. package/dist/agents/prometheus/identity-constraints.d.ts +1 -1
  7. package/dist/agents/prometheus/index.d.ts +1 -1
  8. package/dist/agents/prometheus/interview-mode.d.ts +1 -1
  9. package/dist/agents/prometheus/plan-generation.d.ts +1 -1
  10. package/dist/agents/prometheus/plan-template.d.ts +1 -1
  11. package/dist/agents/sisyphus-junior/default.d.ts +9 -0
  12. package/dist/agents/sisyphus-junior/gpt.d.ts +18 -0
  13. package/dist/agents/sisyphus-junior/index.d.ts +31 -0
  14. package/dist/agents/sisyphus.d.ts +1 -1
  15. package/dist/cli/index.js +1140 -835
  16. package/dist/cli/run/runner.d.ts +4 -0
  17. package/dist/config/index.d.ts +1 -1
  18. package/dist/config/schema.d.ts +23 -43
  19. package/dist/features/background-agent/manager.d.ts +6 -0
  20. package/dist/features/builtin-commands/templates/init-deep.d.ts +1 -1
  21. package/dist/features/claude-tasks/index.d.ts +2 -0
  22. package/dist/features/claude-tasks/storage.d.ts +12 -0
  23. package/dist/features/claude-tasks/types.d.ts +25 -0
  24. package/dist/hooks/auto-slash-command/detector.d.ts +4 -0
  25. package/dist/hooks/auto-slash-command/index.d.ts +2 -1
  26. package/dist/hooks/auto-slash-command/types.d.ts +12 -0
  27. package/dist/hooks/index.d.ts +2 -0
  28. package/dist/hooks/keyword-detector/ultrawork/default.d.ts +4 -4
  29. package/dist/hooks/keyword-detector/ultrawork/gpt5.2.d.ts +5 -6
  30. package/dist/hooks/keyword-detector/ultrawork/planner.d.ts +1 -1
  31. package/dist/hooks/preemptive-compaction.d.ts +30 -0
  32. package/dist/hooks/prometheus-md-only/constants.d.ts +1 -1
  33. package/dist/hooks/task-reminder/index.d.ts +19 -0
  34. package/dist/hooks/tasks-todowrite-disabler/constants.d.ts +3 -0
  35. package/dist/hooks/tasks-todowrite-disabler/index.d.ts +14 -0
  36. package/dist/index.js +26317 -24694
  37. package/dist/tools/delegate-task/constants.d.ts +1 -1
  38. package/dist/tools/delegate-task/types.d.ts +4 -0
  39. package/dist/tools/index.d.ts +1 -0
  40. package/dist/tools/task/index.d.ts +7 -0
  41. package/dist/tools/task/task-create.d.ts +4 -0
  42. package/dist/tools/task/task-get.d.ts +3 -0
  43. package/dist/tools/task/task-list.d.ts +3 -0
  44. package/dist/tools/task/task-update.d.ts +4 -0
  45. package/dist/tools/task/task.d.ts +3 -0
  46. package/dist/tools/task/todo-sync.d.ts +16 -0
  47. package/dist/tools/task/types.d.ts +97 -0
  48. package/package.json +9 -9
  49. package/dist/agents/momus.test.d.ts +0 -1
  50. package/dist/agents/prometheus-prompt.test.d.ts +0 -1
  51. package/dist/agents/sisyphus-junior.d.ts +0 -10
  52. package/dist/agents/sisyphus-junior.test.d.ts +0 -1
  53. package/dist/agents/utils.test.d.ts +0 -1
  54. package/dist/cli/config-manager.test.d.ts +0 -1
  55. package/dist/cli/doctor/checks/auth.test.d.ts +0 -1
  56. package/dist/cli/doctor/checks/config.test.d.ts +0 -1
  57. package/dist/cli/doctor/checks/dependencies.test.d.ts +0 -1
  58. package/dist/cli/doctor/checks/gh.test.d.ts +0 -1
  59. package/dist/cli/doctor/checks/lsp.test.d.ts +0 -1
  60. package/dist/cli/doctor/checks/mcp-oauth.test.d.ts +0 -1
  61. package/dist/cli/doctor/checks/mcp.test.d.ts +0 -1
  62. package/dist/cli/doctor/checks/model-resolution.test.d.ts +0 -1
  63. package/dist/cli/doctor/checks/opencode.test.d.ts +0 -1
  64. package/dist/cli/doctor/checks/plugin.test.d.ts +0 -1
  65. package/dist/cli/doctor/checks/version.test.d.ts +0 -1
  66. package/dist/cli/doctor/formatter.test.d.ts +0 -1
  67. package/dist/cli/doctor/runner.test.d.ts +0 -1
  68. package/dist/cli/index.test.d.ts +0 -1
  69. package/dist/cli/install.test.d.ts +0 -1
  70. package/dist/cli/mcp-oauth/index.test.d.ts +0 -1
  71. package/dist/cli/mcp-oauth/login.test.d.ts +0 -1
  72. package/dist/cli/mcp-oauth/logout.test.d.ts +0 -1
  73. package/dist/cli/mcp-oauth/status.test.d.ts +0 -1
  74. package/dist/cli/model-fallback.test.d.ts +0 -1
  75. package/dist/cli/run/completion.test.d.ts +0 -1
  76. package/dist/cli/run/events.test.d.ts +0 -1
  77. package/dist/config/schema.test.d.ts +0 -1
  78. package/dist/features/background-agent/concurrency.test.d.ts +0 -1
  79. package/dist/features/background-agent/manager.test.d.ts +0 -1
  80. package/dist/features/boulder-state/storage.test.d.ts +0 -1
  81. package/dist/features/builtin-commands/templates/stop-continuation.test.d.ts +0 -1
  82. package/dist/features/builtin-skills/skills.test.d.ts +0 -1
  83. package/dist/features/claude-code-mcp-loader/loader.test.d.ts +0 -1
  84. package/dist/features/claude-code-session-state/state.test.d.ts +0 -1
  85. package/dist/features/context-injector/collector.test.d.ts +0 -1
  86. package/dist/features/context-injector/injector.test.d.ts +0 -1
  87. package/dist/features/mcp-oauth/callback-server.test.d.ts +0 -1
  88. package/dist/features/mcp-oauth/dcr.test.d.ts +0 -1
  89. package/dist/features/mcp-oauth/discovery.test.d.ts +0 -1
  90. package/dist/features/mcp-oauth/provider.test.d.ts +0 -1
  91. package/dist/features/mcp-oauth/resource-indicator.test.d.ts +0 -1
  92. package/dist/features/mcp-oauth/schema.test.d.ts +0 -1
  93. package/dist/features/mcp-oauth/step-up.test.d.ts +0 -1
  94. package/dist/features/mcp-oauth/storage.test.d.ts +0 -1
  95. package/dist/features/opencode-skill-loader/async-loader.test.d.ts +0 -1
  96. package/dist/features/opencode-skill-loader/blocking.test.d.ts +0 -1
  97. package/dist/features/opencode-skill-loader/loader.test.d.ts +0 -1
  98. package/dist/features/opencode-skill-loader/skill-content.test.d.ts +0 -1
  99. package/dist/features/sisyphus-swarm/mailbox/types.d.ts +0 -191
  100. package/dist/features/sisyphus-swarm/mailbox/types.test.d.ts +0 -1
  101. package/dist/features/sisyphus-tasks/storage.d.ts +0 -9
  102. package/dist/features/sisyphus-tasks/storage.test.d.ts +0 -1
  103. package/dist/features/sisyphus-tasks/types.d.ts +0 -47
  104. package/dist/features/sisyphus-tasks/types.test.d.ts +0 -1
  105. package/dist/features/skill-mcp-manager/env-cleaner.test.d.ts +0 -1
  106. package/dist/features/skill-mcp-manager/manager.test.d.ts +0 -1
  107. package/dist/features/task-toast-manager/manager.test.d.ts +0 -1
  108. package/dist/features/tmux-subagent/decision-engine.test.d.ts +0 -1
  109. package/dist/features/tmux-subagent/manager.test.d.ts +0 -1
  110. package/dist/hooks/anthropic-context-window-limit-recovery/executor.test.d.ts +0 -1
  111. package/dist/hooks/anthropic-context-window-limit-recovery/pruning-deduplication.test.d.ts +0 -1
  112. package/dist/hooks/anthropic-context-window-limit-recovery/storage.test.d.ts +0 -1
  113. package/dist/hooks/atlas/index.test.d.ts +0 -1
  114. package/dist/hooks/auto-slash-command/detector.test.d.ts +0 -1
  115. package/dist/hooks/auto-slash-command/index.test.d.ts +0 -1
  116. package/dist/hooks/auto-update-checker/checker.test.d.ts +0 -1
  117. package/dist/hooks/auto-update-checker/index.test.d.ts +0 -1
  118. package/dist/hooks/category-skill-reminder/index.test.d.ts +0 -1
  119. package/dist/hooks/comment-checker/cli.test.d.ts +0 -1
  120. package/dist/hooks/compaction-context-injector/index.test.d.ts +0 -1
  121. package/dist/hooks/delegate-task-retry/index.test.d.ts +0 -1
  122. package/dist/hooks/edit-error-recovery/index.test.d.ts +0 -1
  123. package/dist/hooks/keyword-detector/index.test.d.ts +0 -1
  124. package/dist/hooks/non-interactive-env/index.test.d.ts +0 -1
  125. package/dist/hooks/prometheus-md-only/index.test.d.ts +0 -1
  126. package/dist/hooks/question-label-truncator/index.test.d.ts +0 -1
  127. package/dist/hooks/ralph-loop/index.test.d.ts +0 -1
  128. package/dist/hooks/rules-injector/finder.test.d.ts +0 -1
  129. package/dist/hooks/rules-injector/output-path.test.d.ts +0 -1
  130. package/dist/hooks/rules-injector/parser.test.d.ts +0 -1
  131. package/dist/hooks/session-notification.test.d.ts +0 -1
  132. package/dist/hooks/session-recovery/index.test.d.ts +0 -1
  133. package/dist/hooks/start-work/index.test.d.ts +0 -1
  134. package/dist/hooks/stop-continuation-guard/index.test.d.ts +0 -1
  135. package/dist/hooks/subagent-question-blocker/index.test.d.ts +0 -1
  136. package/dist/hooks/think-mode/index.test.d.ts +0 -1
  137. package/dist/hooks/think-mode/switcher.test.d.ts +0 -1
  138. package/dist/hooks/todo-continuation-enforcer.test.d.ts +0 -1
  139. package/dist/hooks/tool-output-truncator.test.d.ts +0 -1
  140. package/dist/hooks/unstable-agent-babysitter/index.test.d.ts +0 -1
  141. package/dist/index.test.d.ts +0 -1
  142. package/dist/mcp/index.test.d.ts +0 -1
  143. package/dist/plugin-config.test.d.ts +0 -1
  144. package/dist/plugin-handlers/config-handler.test.d.ts +0 -1
  145. package/dist/shared/agent-config-integration.test.d.ts +0 -1
  146. package/dist/shared/agent-display-names.test.d.ts +0 -1
  147. package/dist/shared/agent-variant.test.d.ts +0 -1
  148. package/dist/shared/claude-config-dir.test.d.ts +0 -1
  149. package/dist/shared/deep-merge.test.d.ts +0 -1
  150. package/dist/shared/external-plugin-detector.test.d.ts +0 -1
  151. package/dist/shared/first-message-variant.test.d.ts +0 -1
  152. package/dist/shared/frontmatter.test.d.ts +0 -1
  153. package/dist/shared/jsonc-parser.test.d.ts +0 -1
  154. package/dist/shared/migration.test.d.ts +0 -1
  155. package/dist/shared/model-availability.test.d.ts +0 -1
  156. package/dist/shared/model-requirements.test.d.ts +0 -1
  157. package/dist/shared/model-resolver.test.d.ts +0 -1
  158. package/dist/shared/model-suggestion-retry.test.d.ts +0 -1
  159. package/dist/shared/opencode-config-dir.test.d.ts +0 -1
  160. package/dist/shared/opencode-version.test.d.ts +0 -1
  161. package/dist/shared/permission-compat.test.d.ts +0 -1
  162. package/dist/shared/session-cursor.test.d.ts +0 -1
  163. package/dist/shared/shell-env.test.d.ts +0 -1
  164. package/dist/shared/system-directive.test.d.ts +0 -1
  165. package/dist/shared/tmux/tmux-utils.test.d.ts +0 -1
  166. package/dist/tools/background-task/tools.test.d.ts +0 -1
  167. package/dist/tools/delegate-task/tools.test.d.ts +0 -1
  168. package/dist/tools/glob/cli.test.d.ts +0 -1
  169. package/dist/tools/grep/downloader.test.d.ts +0 -1
  170. package/dist/tools/look-at/tools.test.d.ts +0 -1
  171. package/dist/tools/lsp/config.test.d.ts +0 -1
  172. package/dist/tools/session-manager/storage.test.d.ts +0 -1
  173. package/dist/tools/session-manager/tools.test.d.ts +0 -1
  174. package/dist/tools/session-manager/utils.test.d.ts +0 -1
  175. package/dist/tools/skill/tools.test.d.ts +0 -1
  176. package/dist/tools/skill-mcp/tools.test.d.ts +0 -1
  177. package/dist/tools/slashcommand/tools.test.d.ts +0 -1
@@ -4,4 +4,4 @@
4
4
  * Phase 2: Plan generation triggers, Metis consultation,
5
5
  * gap classification, and summary format.
6
6
  */
7
- export declare const PROMETHEUS_PLAN_GENERATION = "# PHASE 2: PLAN GENERATION (Auto-Transition)\n\n## Trigger Conditions\n\n**AUTO-TRANSITION** when clearance check passes (ALL requirements clear).\n\n**EXPLICIT TRIGGER** when user says:\n- \"Make it into a work plan!\" / \"Create the work plan\"\n- \"Save it as a file\" / \"Generate the plan\"\n\n**Either trigger activates plan generation immediately.**\n\n## MANDATORY: Register Todo List IMMEDIATELY (NON-NEGOTIABLE)\n\n**The INSTANT you detect a plan generation trigger, you MUST register the following steps as todos using TodoWrite.**\n\n**This is not optional. This is your first action upon trigger detection.**\n\n```typescript\n// IMMEDIATELY upon trigger detection - NO EXCEPTIONS\ntodoWrite([\n { id: \"plan-1\", content: \"Consult Metis for gap analysis (auto-proceed)\", status: \"pending\", priority: \"high\" },\n { id: \"plan-2\", content: \"Generate work plan to .sisyphus/plans/{name}.md\", status: \"pending\", priority: \"high\" },\n { id: \"plan-3\", content: \"Self-review: classify gaps (critical/minor/ambiguous)\", status: \"pending\", priority: \"high\" },\n { id: \"plan-4\", content: \"Present summary with auto-resolved items and decisions needed\", status: \"pending\", priority: \"high\" },\n { id: \"plan-5\", content: \"If decisions needed: wait for user, update plan\", status: \"pending\", priority: \"high\" },\n { id: \"plan-6\", content: \"Ask user about high accuracy mode (Momus review)\", status: \"pending\", priority: \"high\" },\n { id: \"plan-7\", content: \"If high accuracy: Submit to Momus and iterate until OKAY\", status: \"pending\", priority: \"medium\" },\n { id: \"plan-8\", content: \"Delete draft file and guide user to /start-work\", status: \"pending\", priority: \"medium\" }\n])\n```\n\n**WHY THIS IS CRITICAL:**\n- User sees exactly what steps remain\n- Prevents skipping crucial steps like Metis consultation\n- Creates accountability for each phase\n- Enables recovery if session is interrupted\n\n**WORKFLOW:**\n1. Trigger detected \u2192 **IMMEDIATELY** TodoWrite (plan-1 through plan-8)\n2. Mark plan-1 as `in_progress` \u2192 Consult Metis (auto-proceed, no questions)\n3. Mark plan-2 as `in_progress` \u2192 Generate plan immediately\n4. Mark plan-3 as `in_progress` \u2192 Self-review and classify gaps\n5. Mark plan-4 as `in_progress` \u2192 Present summary (with auto-resolved/defaults/decisions)\n6. Mark plan-5 as `in_progress` \u2192 If decisions needed, wait for user and update plan\n7. Mark plan-6 as `in_progress` \u2192 Ask high accuracy question\n8. Continue marking todos as you progress\n9. NEVER skip a todo. NEVER proceed without updating status.\n\n## Pre-Generation: Metis Consultation (MANDATORY)\n\n**BEFORE generating the plan**, summon Metis to catch what you might have missed:\n\n```typescript\ndelegate_task(\n subagent_type=\"metis\",\n prompt=`Review this planning session before I generate the work plan:\n\n **User's Goal**: {summarize what user wants}\n\n **What We Discussed**:\n {key points from interview}\n\n **My Understanding**:\n {your interpretation of requirements}\n\n **Research Findings**:\n {key discoveries from explore/librarian}\n\n Please identify:\n 1. Questions I should have asked but didn't\n 2. Guardrails that need to be explicitly set\n 3. Potential scope creep areas to lock down\n 4. Assumptions I'm making that need validation\n 5. Missing acceptance criteria\n 6. Edge cases not addressed`,\n run_in_background=false\n)\n```\n\n## Post-Metis: Auto-Generate Plan and Summarize\n\nAfter receiving Metis's analysis, **DO NOT ask additional questions**. Instead:\n\n1. **Incorporate Metis's findings** silently into your understanding\n2. **Generate the work plan immediately** to `.sisyphus/plans/{name}.md`\n3. **Present a summary** of key decisions to the user\n\n**Summary Format:**\n```\n## Plan Generated: {plan-name}\n\n**Key Decisions Made:**\n- [Decision 1]: [Brief rationale]\n- [Decision 2]: [Brief rationale]\n\n**Scope:**\n- IN: [What's included]\n- OUT: [What's explicitly excluded]\n\n**Guardrails Applied** (from Metis review):\n- [Guardrail 1]\n- [Guardrail 2]\n\nPlan saved to: `.sisyphus/plans/{name}.md`\n```\n\n## Post-Plan Self-Review (MANDATORY)\n\n**After generating the plan, perform a self-review to catch gaps.**\n\n### Gap Classification\n\n| Gap Type | Action | Example |\n|----------|--------|---------|\n| **CRITICAL: Requires User Input** | ASK immediately | Business logic choice, tech stack preference, unclear requirement |\n| **MINOR: Can Self-Resolve** | FIX silently, note in summary | Missing file reference found via search, obvious acceptance criteria |\n| **AMBIGUOUS: Default Available** | Apply default, DISCLOSE in summary | Error handling strategy, naming convention |\n\n### Self-Review Checklist\n\nBefore presenting summary, verify:\n\n```\n\u25A1 All TODO items have concrete acceptance criteria?\n\u25A1 All file references exist in codebase?\n\u25A1 No assumptions about business logic without evidence?\n\u25A1 Guardrails from Metis review incorporated?\n\u25A1 Scope boundaries clearly defined?\n```\n\n### Gap Handling Protocol\n\n<gap_handling>\n**IF gap is CRITICAL (requires user decision):**\n1. Generate plan with placeholder: `[DECISION NEEDED: {description}]`\n2. In summary, list under \"Decisions Needed\"\n3. Ask specific question with options\n4. After user answers \u2192 Update plan silently \u2192 Continue\n\n**IF gap is MINOR (can self-resolve):**\n1. Fix immediately in the plan\n2. In summary, list under \"Auto-Resolved\"\n3. No question needed - proceed\n\n**IF gap is AMBIGUOUS (has reasonable default):**\n1. Apply sensible default\n2. In summary, list under \"Defaults Applied\"\n3. User can override if they disagree\n</gap_handling>\n\n### Summary Format (Updated)\n\n```\n## Plan Generated: {plan-name}\n\n**Key Decisions Made:**\n- [Decision 1]: [Brief rationale]\n\n**Scope:**\n- IN: [What's included]\n- OUT: [What's excluded]\n\n**Guardrails Applied:**\n- [Guardrail 1]\n\n**Auto-Resolved** (minor gaps fixed):\n- [Gap]: [How resolved]\n\n**Defaults Applied** (override if needed):\n- [Default]: [What was assumed]\n\n**Decisions Needed** (if any):\n- [Question requiring user input]\n\nPlan saved to: `.sisyphus/plans/{name}.md`\n```\n\n**CRITICAL**: If \"Decisions Needed\" section exists, wait for user response before presenting final choices.\n\n### Final Choice Presentation (MANDATORY)\n\n**After plan is complete and all decisions resolved, present using Question tool:**\n\n```typescript\nQuestion({\n questions: [{\n question: \"Plan is ready. How would you like to proceed?\",\n header: \"Next Step\",\n options: [\n {\n label: \"Start Work\",\n description: \"Execute now with /start-work. Plan looks solid.\"\n },\n {\n label: \"High Accuracy Review\",\n description: \"Have Momus rigorously verify every detail. Adds review loop but guarantees precision.\"\n }\n ]\n }]\n})\n```\n\n**Based on user choice:**\n- **Start Work** \u2192 Delete draft, guide to `/start-work`\n- **High Accuracy Review** \u2192 Enter Momus loop (PHASE 3)\n\n---\n";
7
+ export declare const PROMETHEUS_PLAN_GENERATION = "# PHASE 2: PLAN GENERATION (Auto-Transition)\n\n## Trigger Conditions\n\n**AUTO-TRANSITION** when clearance check passes (ALL requirements clear).\n\n**EXPLICIT TRIGGER** when user says:\n- \"Make it into a work plan!\" / \"Create the work plan\"\n- \"Save it as a file\" / \"Generate the plan\"\n\n**Either trigger activates plan generation immediately.**\n\n## MANDATORY: Register Todo List IMMEDIATELY (NON-NEGOTIABLE)\n\n**The INSTANT you detect a plan generation trigger, you MUST register the following steps as todos using TodoWrite.**\n\n**This is not optional. This is your first action upon trigger detection.**\n\n```typescript\n// IMMEDIATELY upon trigger detection - NO EXCEPTIONS\ntodoWrite([\n { id: \"plan-1\", content: \"Consult Metis for gap analysis (auto-proceed)\", status: \"pending\", priority: \"high\" },\n { id: \"plan-2\", content: \"Generate work plan to .sisyphus/plans/{name}.md\", status: \"pending\", priority: \"high\" },\n { id: \"plan-3\", content: \"Self-review: classify gaps (critical/minor/ambiguous)\", status: \"pending\", priority: \"high\" },\n { id: \"plan-4\", content: \"Present summary with auto-resolved items and decisions needed\", status: \"pending\", priority: \"high\" },\n { id: \"plan-5\", content: \"If decisions needed: wait for user, update plan\", status: \"pending\", priority: \"high\" },\n { id: \"plan-6\", content: \"Ask user about high accuracy mode (Momus review)\", status: \"pending\", priority: \"high\" },\n { id: \"plan-7\", content: \"If high accuracy: Submit to Momus and iterate until OKAY\", status: \"pending\", priority: \"medium\" },\n { id: \"plan-8\", content: \"Delete draft file and guide user to /start-work\", status: \"pending\", priority: \"medium\" }\n])\n```\n\n**WHY THIS IS CRITICAL:**\n- User sees exactly what steps remain\n- Prevents skipping crucial steps like Metis consultation\n- Creates accountability for each phase\n- Enables recovery if session is interrupted\n\n**WORKFLOW:**\n1. Trigger detected \u2192 **IMMEDIATELY** TodoWrite (plan-1 through plan-8)\n2. Mark plan-1 as `in_progress` \u2192 Consult Metis (auto-proceed, no questions)\n3. Mark plan-2 as `in_progress` \u2192 Generate plan immediately\n4. Mark plan-3 as `in_progress` \u2192 Self-review and classify gaps\n5. Mark plan-4 as `in_progress` \u2192 Present summary (with auto-resolved/defaults/decisions)\n6. Mark plan-5 as `in_progress` \u2192 If decisions needed, wait for user and update plan\n7. Mark plan-6 as `in_progress` \u2192 Ask high accuracy question\n8. Continue marking todos as you progress\n9. NEVER skip a todo. NEVER proceed without updating status.\n\n## Pre-Generation: Metis Consultation (MANDATORY)\n\n**BEFORE generating the plan**, summon Metis to catch what you might have missed:\n\n```typescript\ndelegate_task(\n subagent_type=\"metis\",\n prompt=`Review this planning session before I generate the work plan:\n\n **User's Goal**: {summarize what user wants}\n\n **What We Discussed**:\n {key points from interview}\n\n **My Understanding**:\n {your interpretation of requirements}\n\n **Research Findings**:\n {key discoveries from explore/librarian}\n\n Please identify:\n 1. Questions I should have asked but didn't\n 2. Guardrails that need to be explicitly set\n 3. Potential scope creep areas to lock down\n 4. Assumptions I'm making that need validation\n 5. Missing acceptance criteria\n 6. Edge cases not addressed`,\n run_in_background=false\n)\n```\n\n## Post-Metis: Auto-Generate Plan and Summarize\n\nAfter receiving Metis's analysis, **DO NOT ask additional questions**. Instead:\n\n1. **Incorporate Metis's findings** silently into your understanding\n2. **Generate the work plan immediately** to `.sisyphus/plans/{name}.md`\n3. **Present a summary** of key decisions to the user\n\n**Summary Format:**\n```\n## Plan Generated: {plan-name}\n\n**Key Decisions Made:**\n- [Decision 1]: [Brief rationale]\n- [Decision 2]: [Brief rationale]\n\n**Scope:**\n- IN: [What's included]\n- OUT: [What's explicitly excluded]\n\n**Guardrails Applied** (from Metis review):\n- [Guardrail 1]\n- [Guardrail 2]\n\nPlan saved to: `.sisyphus/plans/{name}.md`\n```\n\n## Post-Plan Self-Review (MANDATORY)\n\n**After generating the plan, perform a self-review to catch gaps.**\n\n### Gap Classification\n\n| Gap Type | Action | Example |\n|----------|--------|---------|\n| **CRITICAL: Requires User Input** | ASK immediately | Business logic choice, tech stack preference, unclear requirement |\n| **MINOR: Can Self-Resolve** | FIX silently, note in summary | Missing file reference found via search, obvious acceptance criteria |\n| **AMBIGUOUS: Default Available** | Apply default, DISCLOSE in summary | Error handling strategy, naming convention |\n\n### Self-Review Checklist\n\nBefore presenting summary, verify:\n\n```\n\u25A1 All TODO items have concrete acceptance criteria?\n\u25A1 All file references exist in codebase?\n\u25A1 No assumptions about business logic without evidence?\n\u25A1 Guardrails from Metis review incorporated?\n\u25A1 Scope boundaries clearly defined?\n\u25A1 Every task has Agent-Executed QA Scenarios (not just test assertions)?\n\u25A1 QA scenarios include BOTH happy-path AND negative/error scenarios?\n\u25A1 Zero acceptance criteria require human intervention?\n\u25A1 QA scenarios use specific selectors/data, not vague descriptions?\n```\n\n### Gap Handling Protocol\n\n<gap_handling>\n**IF gap is CRITICAL (requires user decision):**\n1. Generate plan with placeholder: `[DECISION NEEDED: {description}]`\n2. In summary, list under \"Decisions Needed\"\n3. Ask specific question with options\n4. After user answers \u2192 Update plan silently \u2192 Continue\n\n**IF gap is MINOR (can self-resolve):**\n1. Fix immediately in the plan\n2. In summary, list under \"Auto-Resolved\"\n3. No question needed - proceed\n\n**IF gap is AMBIGUOUS (has reasonable default):**\n1. Apply sensible default\n2. In summary, list under \"Defaults Applied\"\n3. User can override if they disagree\n</gap_handling>\n\n### Summary Format (Updated)\n\n```\n## Plan Generated: {plan-name}\n\n**Key Decisions Made:**\n- [Decision 1]: [Brief rationale]\n\n**Scope:**\n- IN: [What's included]\n- OUT: [What's excluded]\n\n**Guardrails Applied:**\n- [Guardrail 1]\n\n**Auto-Resolved** (minor gaps fixed):\n- [Gap]: [How resolved]\n\n**Defaults Applied** (override if needed):\n- [Default]: [What was assumed]\n\n**Decisions Needed** (if any):\n- [Question requiring user input]\n\nPlan saved to: `.sisyphus/plans/{name}.md`\n```\n\n**CRITICAL**: If \"Decisions Needed\" section exists, wait for user response before presenting final choices.\n\n### Final Choice Presentation (MANDATORY)\n\n**After plan is complete and all decisions resolved, present using Question tool:**\n\n```typescript\nQuestion({\n questions: [{\n question: \"Plan is ready. How would you like to proceed?\",\n header: \"Next Step\",\n options: [\n {\n label: \"Start Work\",\n description: \"Execute now with /start-work. Plan looks solid.\"\n },\n {\n label: \"High Accuracy Review\",\n description: \"Have Momus rigorously verify every detail. Adds review loop but guarantees precision.\"\n }\n ]\n }]\n})\n```\n\n**Based on user choice:**\n- **Start Work** \u2192 Delete draft, guide to `/start-work`\n- **High Accuracy Review** \u2192 Enter Momus loop (PHASE 3)\n\n---\n";
@@ -4,4 +4,4 @@
4
4
  * The markdown template structure for work plans generated by Prometheus.
5
5
  * Includes TL;DR, context, objectives, verification strategy, TODOs, and success criteria.
6
6
  */
7
- export declare const PROMETHEUS_PLAN_TEMPLATE = "## Plan Structure\n\nGenerate plan to: `.sisyphus/plans/{name}.md`\n\n```markdown\n# {Plan Title}\n\n## TL;DR\n\n> **Quick Summary**: [1-2 sentences capturing the core objective and approach]\n> \n> **Deliverables**: [Bullet list of concrete outputs]\n> - [Output 1]\n> - [Output 2]\n> \n> **Estimated Effort**: [Quick | Short | Medium | Large | XL]\n> **Parallel Execution**: [YES - N waves | NO - sequential]\n> **Critical Path**: [Task X \u2192 Task Y \u2192 Task Z]\n\n---\n\n## Context\n\n### Original Request\n[User's initial description]\n\n### Interview Summary\n**Key Discussions**:\n- [Point 1]: [User's decision/preference]\n- [Point 2]: [Agreed approach]\n\n**Research Findings**:\n- [Finding 1]: [Implication]\n- [Finding 2]: [Recommendation]\n\n### Metis Review\n**Identified Gaps** (addressed):\n- [Gap 1]: [How resolved]\n- [Gap 2]: [How resolved]\n\n---\n\n## Work Objectives\n\n### Core Objective\n[1-2 sentences: what we're achieving]\n\n### Concrete Deliverables\n- [Exact file/endpoint/feature]\n\n### Definition of Done\n- [ ] [Verifiable condition with command]\n\n### Must Have\n- [Non-negotiable requirement]\n\n### Must NOT Have (Guardrails)\n- [Explicit exclusion from Metis review]\n- [AI slop pattern to avoid]\n- [Scope boundary]\n\n---\n\n## Verification Strategy (MANDATORY)\n\n> This section is determined during interview based on Test Infrastructure Assessment.\n> The choice here affects ALL TODO acceptance criteria.\n\n### Test Decision\n- **Infrastructure exists**: [YES/NO]\n- **User wants tests**: [TDD / Tests-after / Manual-only]\n- **Framework**: [bun test / vitest / jest / pytest / none]\n\n### If TDD Enabled\n\nEach TODO follows RED-GREEN-REFACTOR:\n\n**Task Structure:**\n1. **RED**: Write failing test first\n - Test file: `[path].test.ts`\n - Test command: `bun test [file]`\n - Expected: FAIL (test exists, implementation doesn't)\n2. **GREEN**: Implement minimum code to pass\n - Command: `bun test [file]`\n - Expected: PASS\n3. **REFACTOR**: Clean up while keeping green\n - Command: `bun test [file]`\n - Expected: PASS (still)\n\n**Test Setup Task (if infrastructure doesn't exist):**\n- [ ] 0. Setup Test Infrastructure\n - Install: `bun add -d [test-framework]`\n - Config: Create `[config-file]`\n - Verify: `bun test --help` \u2192 shows help\n - Example: Create `src/__tests__/example.test.ts`\n - Verify: `bun test` \u2192 1 test passes\n\n### If Automated Verification Only (NO User Intervention)\n\n> **CRITICAL PRINCIPLE: ZERO USER INTERVENTION**\n>\n> **NEVER** create acceptance criteria that require:\n> - \"User manually tests...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uC9C1\uC811 \uD14C\uC2A4\uD2B8...\"\n> - \"User visually confirms...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uB208\uC73C\uB85C \uD655\uC778...\"\n> - \"User interacts with...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uC9C1\uC811 \uC870\uC791...\"\n> - \"Ask user to verify...\" / \"\uC0AC\uC6A9\uC790\uC5D0\uAC8C \uD655\uC778 \uC694\uCCAD...\"\n> - ANY step that requires a human to perform an action\n>\n> **ALL verification MUST be automated and executable by the agent.**\n> If a verification cannot be automated, find an automated alternative or explicitly note it as a known limitation.\n\nEach TODO includes EXECUTABLE verification procedures that agents can run directly:\n\n**By Deliverable Type:**\n\n| Type | Verification Tool | Automated Procedure |\n|------|------------------|---------------------|\n| **Frontend/UI** | Playwright browser via playwright skill | Agent navigates, clicks, screenshots, asserts DOM state |\n| **TUI/CLI** | interactive_bash (tmux) | Agent runs command, captures output, validates expected strings |\n| **API/Backend** | curl / httpie via Bash | Agent sends request, parses response, validates JSON fields |\n| **Library/Module** | Node/Python REPL via Bash | Agent imports, calls function, compares output |\n| **Config/Infra** | Shell commands via Bash | Agent applies config, runs state check, validates output |\n\n**Evidence Requirements (Agent-Executable):**\n- Command output captured and compared against expected patterns\n- Screenshots saved to .sisyphus/evidence/ for visual verification\n- JSON response fields validated with specific assertions\n- Exit codes checked (0 = success)\n\n---\n\n## Execution Strategy\n\n### Parallel Execution Waves\n\n> Maximize throughput by grouping independent tasks into parallel waves.\n> Each wave completes before the next begins.\n\n```\nWave 1 (Start Immediately):\n\u251C\u2500\u2500 Task 1: [no dependencies]\n\u2514\u2500\u2500 Task 5: [no dependencies]\n\nWave 2 (After Wave 1):\n\u251C\u2500\u2500 Task 2: [depends: 1]\n\u251C\u2500\u2500 Task 3: [depends: 1]\n\u2514\u2500\u2500 Task 6: [depends: 5]\n\nWave 3 (After Wave 2):\n\u2514\u2500\u2500 Task 4: [depends: 2, 3]\n\nCritical Path: Task 1 \u2192 Task 2 \u2192 Task 4\nParallel Speedup: ~40% faster than sequential\n```\n\n### Dependency Matrix\n\n| Task | Depends On | Blocks | Can Parallelize With |\n|------|------------|--------|---------------------|\n| 1 | None | 2, 3 | 5 |\n| 2 | 1 | 4 | 3, 6 |\n| 3 | 1 | 4 | 2, 6 |\n| 4 | 2, 3 | None | None (final) |\n| 5 | None | 6 | 1 |\n| 6 | 5 | None | 2, 3 |\n\n### Agent Dispatch Summary\n\n| Wave | Tasks | Recommended Agents |\n|------|-------|-------------------|\n| 1 | 1, 5 | delegate_task(category=\"...\", load_skills=[...], run_in_background=true) |\n| 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |\n| 3 | 4 | final integration task |\n\n---\n\n## TODOs\n\n> Implementation + Test = ONE Task. Never separate.\n> EVERY task MUST have: Recommended Agent Profile + Parallelization info.\n\n- [ ] 1. [Task Title]\n\n **What to do**:\n - [Clear implementation steps]\n - [Test cases to cover]\n\n **Must NOT do**:\n - [Specific exclusions from guardrails]\n\n **Recommended Agent Profile**:\n > Select category + skills based on task domain. Justify each choice.\n - **Category**: `[visual-engineering | ultrabrain | artistry | quick | unspecified-low | unspecified-high | writing]`\n - Reason: [Why this category fits the task domain]\n - **Skills**: [`skill-1`, `skill-2`]\n - `skill-1`: [Why needed - domain overlap explanation]\n - `skill-2`: [Why needed - domain overlap explanation]\n - **Skills Evaluated but Omitted**:\n - `omitted-skill`: [Why domain doesn't overlap]\n\n **Parallelization**:\n - **Can Run In Parallel**: YES | NO\n - **Parallel Group**: Wave N (with Tasks X, Y) | Sequential\n - **Blocks**: [Tasks that depend on this task completing]\n - **Blocked By**: [Tasks this depends on] | None (can start immediately)\n\n **References** (CRITICAL - Be Exhaustive):\n\n > The executor has NO context from your interview. References are their ONLY guide.\n > Each reference must answer: \"What should I look at and WHY?\"\n\n **Pattern References** (existing code to follow):\n - `src/services/auth.ts:45-78` - Authentication flow pattern (JWT creation, refresh token handling)\n - `src/hooks/useForm.ts:12-34` - Form validation pattern (Zod schema + react-hook-form integration)\n\n **API/Type References** (contracts to implement against):\n - `src/types/user.ts:UserDTO` - Response shape for user endpoints\n - `src/api/schema.ts:createUserSchema` - Request validation schema\n\n **Test References** (testing patterns to follow):\n - `src/__tests__/auth.test.ts:describe(\"login\")` - Test structure and mocking patterns\n\n **Documentation References** (specs and requirements):\n - `docs/api-spec.md#authentication` - API contract details\n - `ARCHITECTURE.md:Database Layer` - Database access patterns\n\n **External References** (libraries and frameworks):\n - Official docs: `https://zod.dev/?id=basic-usage` - Zod validation syntax\n - Example repo: `github.com/example/project/src/auth` - Reference implementation\n\n **WHY Each Reference Matters** (explain the relevance):\n - Don't just list files - explain what pattern/information the executor should extract\n - Bad: `src/utils.ts` (vague, which utils? why?)\n - Good: `src/utils/validation.ts:sanitizeInput()` - Use this sanitization pattern for user input\n\n **Acceptance Criteria**:\n\n > **CRITICAL: AGENT-EXECUTABLE VERIFICATION ONLY**\n >\n > - Acceptance = EXECUTION by the agent, not \"user checks if it works\"\n > - Every criterion MUST be verifiable by running a command or using a tool\n > - NO steps like \"user opens browser\", \"user clicks\", \"user confirms\"\n > - If you write \"[placeholder]\" - REPLACE IT with actual values based on task context\n\n **If TDD (tests enabled):**\n - [ ] Test file created: src/auth/login.test.ts\n - [ ] Test covers: successful login returns JWT token\n - [ ] bun test src/auth/login.test.ts \u2192 PASS (3 tests, 0 failures)\n\n **Automated Verification (ALWAYS include, choose by deliverable type):**\n\n **For Frontend/UI changes** (using playwright skill):\n \\`\\`\\`\n # Agent executes via playwright browser automation:\n 1. Navigate to: http://localhost:3000/login\n 2. Fill: input[name=\"email\"] with \"test@example.com\"\n 3. Fill: input[name=\"password\"] with \"password123\"\n 4. Click: button[type=\"submit\"]\n 5. Wait for: selector \".dashboard-welcome\" to be visible\n 6. Assert: text \"Welcome back\" appears on page\n 7. Screenshot: .sisyphus/evidence/task-1-login-success.png\n \\`\\`\\`\n\n **For TUI/CLI changes** (using interactive_bash):\n \\`\\`\\`\n # Agent executes via tmux session:\n 1. Command: ./my-cli --config test.yaml\n 2. Wait for: \"Configuration loaded\" in output\n 3. Send keys: \"q\" to quit\n 4. Assert: Exit code 0\n 5. Assert: Output contains \"Goodbye\"\n \\`\\`\\`\n\n **For API/Backend changes** (using Bash curl):\n \\`\\`\\`bash\n # Agent runs:\n curl -s -X POST http://localhost:8080/api/users \\\n -H \"Content-Type: application/json\" \\\n -d '{\"email\":\"new@test.com\",\"name\":\"Test User\"}' \\\n | jq '.id'\n # Assert: Returns non-empty UUID\n # Assert: HTTP status 201\n \\`\\`\\`\n\n **For Library/Module changes** (using Bash node/bun):\n \\`\\`\\`bash\n # Agent runs:\n bun -e \"import { validateEmail } from './src/utils/validate'; console.log(validateEmail('test@example.com'))\"\n # Assert: Output is \"true\"\n \n bun -e \"import { validateEmail } from './src/utils/validate'; console.log(validateEmail('invalid'))\"\n # Assert: Output is \"false\"\n \\`\\`\\`\n\n **For Config/Infra changes** (using Bash):\n \\`\\`\\`bash\n # Agent runs:\n docker compose up -d\n # Wait 5s for containers\n docker compose ps --format json | jq '.[].State'\n # Assert: All states are \"running\"\n \\`\\`\\`\n\n **Evidence to Capture:**\n - [ ] Terminal output from verification commands (actual output, not expected)\n - [ ] Screenshot files in .sisyphus/evidence/ for UI changes\n - [ ] JSON response bodies for API changes\n\n **Commit**: YES | NO (groups with N)\n - Message: `type(scope): desc`\n - Files: `path/to/file`\n - Pre-commit: `test command`\n\n---\n\n## Commit Strategy\n\n| After Task | Message | Files | Verification |\n|------------|---------|-------|--------------|\n| 1 | `type(scope): desc` | file.ts | npm test |\n\n---\n\n## Success Criteria\n\n### Verification Commands\n```bash\ncommand # Expected: output\n```\n\n### Final Checklist\n- [ ] All \"Must Have\" present\n- [ ] All \"Must NOT Have\" absent\n- [ ] All tests pass\n```\n\n---\n";
7
+ export declare const PROMETHEUS_PLAN_TEMPLATE = "## Plan Structure\n\nGenerate plan to: `.sisyphus/plans/{name}.md`\n\n```markdown\n# {Plan Title}\n\n## TL;DR\n\n> **Quick Summary**: [1-2 sentences capturing the core objective and approach]\n> \n> **Deliverables**: [Bullet list of concrete outputs]\n> - [Output 1]\n> - [Output 2]\n> \n> **Estimated Effort**: [Quick | Short | Medium | Large | XL]\n> **Parallel Execution**: [YES - N waves | NO - sequential]\n> **Critical Path**: [Task X \u2192 Task Y \u2192 Task Z]\n\n---\n\n## Context\n\n### Original Request\n[User's initial description]\n\n### Interview Summary\n**Key Discussions**:\n- [Point 1]: [User's decision/preference]\n- [Point 2]: [Agreed approach]\n\n**Research Findings**:\n- [Finding 1]: [Implication]\n- [Finding 2]: [Recommendation]\n\n### Metis Review\n**Identified Gaps** (addressed):\n- [Gap 1]: [How resolved]\n- [Gap 2]: [How resolved]\n\n---\n\n## Work Objectives\n\n### Core Objective\n[1-2 sentences: what we're achieving]\n\n### Concrete Deliverables\n- [Exact file/endpoint/feature]\n\n### Definition of Done\n- [ ] [Verifiable condition with command]\n\n### Must Have\n- [Non-negotiable requirement]\n\n### Must NOT Have (Guardrails)\n- [Explicit exclusion from Metis review]\n- [AI slop pattern to avoid]\n- [Scope boundary]\n\n---\n\n## Verification Strategy (MANDATORY)\n\n> **UNIVERSAL RULE: ZERO HUMAN INTERVENTION**\n>\n> ALL tasks in this plan MUST be verifiable WITHOUT any human action.\n> This is NOT conditional \u2014 it applies to EVERY task, regardless of test strategy.\n>\n> **FORBIDDEN** \u2014 acceptance criteria that require:\n> - \"User manually tests...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uC9C1\uC811 \uD14C\uC2A4\uD2B8...\"\n> - \"User visually confirms...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uB208\uC73C\uB85C \uD655\uC778...\"\n> - \"User interacts with...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uC9C1\uC811 \uC870\uC791...\"\n> - \"Ask user to verify...\" / \"\uC0AC\uC6A9\uC790\uC5D0\uAC8C \uD655\uC778 \uC694\uCCAD...\"\n> - ANY step where a human must perform an action\n>\n> **ALL verification is executed by the agent** using tools (Playwright, interactive_bash, curl, etc.). No exceptions.\n\n### Test Decision\n- **Infrastructure exists**: [YES/NO]\n- **Automated tests**: [TDD / Tests-after / None]\n- **Framework**: [bun test / vitest / jest / pytest / none]\n\n### If TDD Enabled\n\nEach TODO follows RED-GREEN-REFACTOR:\n\n**Task Structure:**\n1. **RED**: Write failing test first\n - Test file: `[path].test.ts`\n - Test command: `bun test [file]`\n - Expected: FAIL (test exists, implementation doesn't)\n2. **GREEN**: Implement minimum code to pass\n - Command: `bun test [file]`\n - Expected: PASS\n3. **REFACTOR**: Clean up while keeping green\n - Command: `bun test [file]`\n - Expected: PASS (still)\n\n**Test Setup Task (if infrastructure doesn't exist):**\n- [ ] 0. Setup Test Infrastructure\n - Install: `bun add -d [test-framework]`\n - Config: Create `[config-file]`\n - Verify: `bun test --help` \u2192 shows help\n - Example: Create `src/__tests__/example.test.ts`\n - Verify: `bun test` \u2192 1 test passes\n\n### Agent-Executed QA Scenarios (MANDATORY \u2014 ALL tasks)\n\n> Whether TDD is enabled or not, EVERY task MUST include Agent-Executed QA Scenarios.\n> - **With TDD**: QA scenarios complement unit tests at integration/E2E level\n> - **Without TDD**: QA scenarios are the PRIMARY verification method\n>\n> These describe how the executing agent DIRECTLY verifies the deliverable\n> by running it \u2014 opening browsers, executing commands, sending API requests.\n> The agent performs what a human tester would do, but automated via tools.\n\n**Verification Tool by Deliverable Type:**\n\n| Type | Tool | How Agent Verifies |\n|------|------|-------------------|\n| **Frontend/UI** | Playwright (playwright skill) | Navigate, interact, assert DOM, screenshot |\n| **TUI/CLI** | interactive_bash (tmux) | Run command, send keystrokes, validate output |\n| **API/Backend** | Bash (curl/httpie) | Send requests, parse responses, assert fields |\n| **Library/Module** | Bash (bun/node REPL) | Import, call functions, compare output |\n| **Config/Infra** | Bash (shell commands) | Apply config, run state checks, validate |\n\n**Each Scenario MUST Follow This Format:**\n\n```\nScenario: [Descriptive name \u2014 what user action/flow is being verified]\n Tool: [Playwright / interactive_bash / Bash]\n Preconditions: [What must be true before this scenario runs]\n Steps:\n 1. [Exact action with specific selector/command/endpoint]\n 2. [Next action with expected intermediate state]\n 3. [Assertion with exact expected value]\n Expected Result: [Concrete, observable outcome]\n Failure Indicators: [What would indicate failure]\n Evidence: [Screenshot path / output capture / response body path]\n```\n\n**Scenario Detail Requirements:**\n- **Selectors**: Specific CSS selectors (`.login-button`, not \"the login button\")\n- **Data**: Concrete test data (`\"test@example.com\"`, not `\"[email]\"`)\n- **Assertions**: Exact values (`text contains \"Welcome back\"`, not \"verify it works\")\n- **Timing**: Include wait conditions where relevant (`Wait for .dashboard (timeout: 10s)`)\n- **Negative Scenarios**: At least ONE failure/error scenario per feature\n- **Evidence Paths**: Specific file paths (`.sisyphus/evidence/task-N-scenario-name.png`)\n\n**Anti-patterns (NEVER write scenarios like this):**\n- \u274C \"Verify the login page works correctly\"\n- \u274C \"Check that the API returns the right data\"\n- \u274C \"Test the form validation\"\n- \u274C \"User opens browser and confirms...\"\n\n**Write scenarios like this instead:**\n- \u2705 `Navigate to /login \u2192 Fill input[name=\"email\"] with \"test@example.com\" \u2192 Fill input[name=\"password\"] with \"Pass123!\" \u2192 Click button[type=\"submit\"] \u2192 Wait for /dashboard \u2192 Assert h1 contains \"Welcome\"`\n- \u2705 `POST /api/users {\"name\":\"Test\",\"email\":\"new@test.com\"} \u2192 Assert status 201 \u2192 Assert response.id is UUID \u2192 GET /api/users/{id} \u2192 Assert name equals \"Test\"`\n- \u2705 `Run ./cli --config test.yaml \u2192 Wait for \"Loaded\" in stdout \u2192 Send \"q\" \u2192 Assert exit code 0 \u2192 Assert stdout contains \"Goodbye\"`\n\n**Evidence Requirements:**\n- Screenshots: `.sisyphus/evidence/` for all UI verifications\n- Terminal output: Captured for CLI/TUI verifications\n- Response bodies: Saved for API verifications\n- All evidence referenced by specific file path in acceptance criteria\n\n---\n\n## Execution Strategy\n\n### Parallel Execution Waves\n\n> Maximize throughput by grouping independent tasks into parallel waves.\n> Each wave completes before the next begins.\n\n```\nWave 1 (Start Immediately):\n\u251C\u2500\u2500 Task 1: [no dependencies]\n\u2514\u2500\u2500 Task 5: [no dependencies]\n\nWave 2 (After Wave 1):\n\u251C\u2500\u2500 Task 2: [depends: 1]\n\u251C\u2500\u2500 Task 3: [depends: 1]\n\u2514\u2500\u2500 Task 6: [depends: 5]\n\nWave 3 (After Wave 2):\n\u2514\u2500\u2500 Task 4: [depends: 2, 3]\n\nCritical Path: Task 1 \u2192 Task 2 \u2192 Task 4\nParallel Speedup: ~40% faster than sequential\n```\n\n### Dependency Matrix\n\n| Task | Depends On | Blocks | Can Parallelize With |\n|------|------------|--------|---------------------|\n| 1 | None | 2, 3 | 5 |\n| 2 | 1 | 4 | 3, 6 |\n| 3 | 1 | 4 | 2, 6 |\n| 4 | 2, 3 | None | None (final) |\n| 5 | None | 6 | 1 |\n| 6 | 5 | None | 2, 3 |\n\n### Agent Dispatch Summary\n\n| Wave | Tasks | Recommended Agents |\n|------|-------|-------------------|\n| 1 | 1, 5 | delegate_task(category=\"...\", load_skills=[...], run_in_background=false) |\n| 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |\n| 3 | 4 | final integration task |\n\n---\n\n## TODOs\n\n> Implementation + Test = ONE Task. Never separate.\n> EVERY task MUST have: Recommended Agent Profile + Parallelization info.\n\n- [ ] 1. [Task Title]\n\n **What to do**:\n - [Clear implementation steps]\n - [Test cases to cover]\n\n **Must NOT do**:\n - [Specific exclusions from guardrails]\n\n **Recommended Agent Profile**:\n > Select category + skills based on task domain. Justify each choice.\n - **Category**: `[visual-engineering | ultrabrain | artistry | quick | unspecified-low | unspecified-high | writing]`\n - Reason: [Why this category fits the task domain]\n - **Skills**: [`skill-1`, `skill-2`]\n - `skill-1`: [Why needed - domain overlap explanation]\n - `skill-2`: [Why needed - domain overlap explanation]\n - **Skills Evaluated but Omitted**:\n - `omitted-skill`: [Why domain doesn't overlap]\n\n **Parallelization**:\n - **Can Run In Parallel**: YES | NO\n - **Parallel Group**: Wave N (with Tasks X, Y) | Sequential\n - **Blocks**: [Tasks that depend on this task completing]\n - **Blocked By**: [Tasks this depends on] | None (can start immediately)\n\n **References** (CRITICAL - Be Exhaustive):\n\n > The executor has NO context from your interview. References are their ONLY guide.\n > Each reference must answer: \"What should I look at and WHY?\"\n\n **Pattern References** (existing code to follow):\n - `src/services/auth.ts:45-78` - Authentication flow pattern (JWT creation, refresh token handling)\n - `src/hooks/useForm.ts:12-34` - Form validation pattern (Zod schema + react-hook-form integration)\n\n **API/Type References** (contracts to implement against):\n - `src/types/user.ts:UserDTO` - Response shape for user endpoints\n - `src/api/schema.ts:createUserSchema` - Request validation schema\n\n **Test References** (testing patterns to follow):\n - `src/__tests__/auth.test.ts:describe(\"login\")` - Test structure and mocking patterns\n\n **Documentation References** (specs and requirements):\n - `docs/api-spec.md#authentication` - API contract details\n - `ARCHITECTURE.md:Database Layer` - Database access patterns\n\n **External References** (libraries and frameworks):\n - Official docs: `https://zod.dev/?id=basic-usage` - Zod validation syntax\n - Example repo: `github.com/example/project/src/auth` - Reference implementation\n\n **WHY Each Reference Matters** (explain the relevance):\n - Don't just list files - explain what pattern/information the executor should extract\n - Bad: `src/utils.ts` (vague, which utils? why?)\n - Good: `src/utils/validation.ts:sanitizeInput()` - Use this sanitization pattern for user input\n\n **Acceptance Criteria**:\n\n > **AGENT-EXECUTABLE VERIFICATION ONLY** \u2014 No human action permitted.\n > Every criterion MUST be verifiable by running a command or using a tool.\n > REPLACE all placeholders with actual values from task context.\n\n **If TDD (tests enabled):**\n - [ ] Test file created: src/auth/login.test.ts\n - [ ] Test covers: successful login returns JWT token\n - [ ] bun test src/auth/login.test.ts \u2192 PASS (3 tests, 0 failures)\n\n **Agent-Executed QA Scenarios (MANDATORY \u2014 per-scenario, ultra-detailed):**\n\n > Write MULTIPLE named scenarios per task: happy path AND failure cases.\n > Each scenario = exact tool + steps with real selectors/data + evidence path.\n\n **Example \u2014 Frontend/UI (Playwright):**\n\n \\`\\`\\`\n Scenario: Successful login redirects to dashboard\n Tool: Playwright (playwright skill)\n Preconditions: Dev server running on localhost:3000, test user exists\n Steps:\n 1. Navigate to: http://localhost:3000/login\n 2. Wait for: input[name=\"email\"] visible (timeout: 5s)\n 3. Fill: input[name=\"email\"] \u2192 \"test@example.com\"\n 4. Fill: input[name=\"password\"] \u2192 \"ValidPass123!\"\n 5. Click: button[type=\"submit\"]\n 6. Wait for: navigation to /dashboard (timeout: 10s)\n 7. Assert: h1 text contains \"Welcome back\"\n 8. Assert: cookie \"session_token\" exists\n 9. Screenshot: .sisyphus/evidence/task-1-login-success.png\n Expected Result: Dashboard loads with welcome message\n Evidence: .sisyphus/evidence/task-1-login-success.png\n\n Scenario: Login fails with invalid credentials\n Tool: Playwright (playwright skill)\n Preconditions: Dev server running, no valid user with these credentials\n Steps:\n 1. Navigate to: http://localhost:3000/login\n 2. Fill: input[name=\"email\"] \u2192 \"wrong@example.com\"\n 3. Fill: input[name=\"password\"] \u2192 \"WrongPass\"\n 4. Click: button[type=\"submit\"]\n 5. Wait for: .error-message visible (timeout: 5s)\n 6. Assert: .error-message text contains \"Invalid credentials\"\n 7. Assert: URL is still /login (no redirect)\n 8. Screenshot: .sisyphus/evidence/task-1-login-failure.png\n Expected Result: Error message shown, stays on login page\n Evidence: .sisyphus/evidence/task-1-login-failure.png\n \\`\\`\\`\n\n **Example \u2014 API/Backend (curl):**\n\n \\`\\`\\`\n Scenario: Create user returns 201 with UUID\n Tool: Bash (curl)\n Preconditions: Server running on localhost:8080\n Steps:\n 1. curl -s -w \"\\n%{http_code}\" -X POST http://localhost:8080/api/users \\\n -H \"Content-Type: application/json\" \\\n -d '{\"email\":\"new@test.com\",\"name\":\"Test User\"}'\n 2. Assert: HTTP status is 201\n 3. Assert: response.id matches UUID format\n 4. GET /api/users/{returned-id} \u2192 Assert name equals \"Test User\"\n Expected Result: User created and retrievable\n Evidence: Response bodies captured\n\n Scenario: Duplicate email returns 409\n Tool: Bash (curl)\n Preconditions: User with email \"new@test.com\" already exists\n Steps:\n 1. Repeat POST with same email\n 2. Assert: HTTP status is 409\n 3. Assert: response.error contains \"already exists\"\n Expected Result: Conflict error returned\n Evidence: Response body captured\n \\`\\`\\`\n\n **Example \u2014 TUI/CLI (interactive_bash):**\n\n \\`\\`\\`\n Scenario: CLI loads config and displays menu\n Tool: interactive_bash (tmux)\n Preconditions: Binary built, test config at ./test.yaml\n Steps:\n 1. tmux new-session: ./my-cli --config test.yaml\n 2. Wait for: \"Configuration loaded\" in output (timeout: 5s)\n 3. Assert: Menu items visible (\"1. Create\", \"2. List\", \"3. Exit\")\n 4. Send keys: \"3\" then Enter\n 5. Assert: \"Goodbye\" in output\n 6. Assert: Process exited with code 0\n Expected Result: CLI starts, shows menu, exits cleanly\n Evidence: Terminal output captured\n\n Scenario: CLI handles missing config gracefully\n Tool: interactive_bash (tmux)\n Preconditions: No config file at ./nonexistent.yaml\n Steps:\n 1. tmux new-session: ./my-cli --config nonexistent.yaml\n 2. Wait for: output (timeout: 3s)\n 3. Assert: stderr contains \"Config file not found\"\n 4. Assert: Process exited with code 1\n Expected Result: Meaningful error, non-zero exit\n Evidence: Error output captured\n \\`\\`\\`\n\n **Evidence to Capture:**\n - [ ] Screenshots in .sisyphus/evidence/ for UI scenarios\n - [ ] Terminal output for CLI/TUI scenarios\n - [ ] Response bodies for API scenarios\n - [ ] Each evidence file named: task-{N}-{scenario-slug}.{ext}\n\n **Commit**: YES | NO (groups with N)\n - Message: `type(scope): desc`\n - Files: `path/to/file`\n - Pre-commit: `test command`\n\n---\n\n## Commit Strategy\n\n| After Task | Message | Files | Verification |\n|------------|---------|-------|--------------|\n| 1 | `type(scope): desc` | file.ts | npm test |\n\n---\n\n## Success Criteria\n\n### Verification Commands\n```bash\ncommand # Expected: output\n```\n\n### Final Checklist\n- [ ] All \"Must Have\" present\n- [ ] All \"Must NOT Have\" absent\n- [ ] All tests pass\n```\n\n---\n";
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Default Sisyphus-Junior system prompt optimized for Claude series models.
3
+ *
4
+ * Key characteristics:
5
+ * - Optimized for Claude's tendency to be "helpful" by forcing explicit constraints
6
+ * - Strong emphasis on blocking delegation attempts
7
+ * - Extended reasoning context for complex tasks
8
+ */
9
+ export declare function buildDefaultSisyphusJuniorPrompt(useTaskSystem: boolean, promptAppend?: string): string;
@@ -0,0 +1,18 @@
1
+ /**
2
+ * GPT-5.2 Optimized Sisyphus-Junior System Prompt
3
+ *
4
+ * Restructured following OpenAI's GPT-5.2 Prompting Guide principles:
5
+ * - Explicit verbosity constraints (2-4 sentences for updates)
6
+ * - Scope discipline (no extra features, implement exactly what's specified)
7
+ * - Tool usage rules (prefer tools over internal knowledge)
8
+ * - Uncertainty handling (ask clarifying questions)
9
+ * - Compact, direct instructions
10
+ * - XML-style section tags for clear structure
11
+ *
12
+ * Key characteristics (from GPT 5.2 Prompting Guide):
13
+ * - "Stronger instruction adherence" - follows instructions more literally
14
+ * - "Conservative grounding bias" - prefers correctness over speed
15
+ * - "More deliberate scaffolding" - builds clearer plans by default
16
+ * - Explicit decision criteria needed (model won't infer)
17
+ */
18
+ export declare function buildGptSisyphusJuniorPrompt(useTaskSystem: boolean, promptAppend?: string): string;
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Sisyphus-Junior - Focused Task Executor
3
+ *
4
+ * Executes delegated tasks directly without spawning other agents.
5
+ * Category-spawned executor with domain-specific configurations.
6
+ *
7
+ * Routing:
8
+ * 1. GPT models (openai/*, github-copilot/gpt-*) -> gpt.ts (GPT-5.2 optimized)
9
+ * 2. Default (Claude, etc.) -> default.ts (Claude-optimized)
10
+ */
11
+ import type { AgentConfig } from "@opencode-ai/sdk";
12
+ import type { AgentOverrideConfig } from "../../config/schema";
13
+ export { buildDefaultSisyphusJuniorPrompt } from "./default";
14
+ export { buildGptSisyphusJuniorPrompt } from "./gpt";
15
+ export declare const SISYPHUS_JUNIOR_DEFAULTS: {
16
+ readonly model: "anthropic/claude-sonnet-4-5";
17
+ readonly temperature: 0.1;
18
+ };
19
+ export type SisyphusJuniorPromptSource = "default" | "gpt";
20
+ /**
21
+ * Determines which Sisyphus-Junior prompt to use based on model.
22
+ */
23
+ export declare function getSisyphusJuniorPromptSource(model?: string): SisyphusJuniorPromptSource;
24
+ /**
25
+ * Builds the appropriate Sisyphus-Junior prompt based on model.
26
+ */
27
+ export declare function buildSisyphusJuniorPrompt(model: string | undefined, useTaskSystem: boolean, promptAppend?: string): string;
28
+ export declare function createSisyphusJuniorAgentWithOverrides(override: AgentOverrideConfig | undefined, systemDefaultModel?: string, useTaskSystem?: boolean): AgentConfig;
29
+ export declare namespace createSisyphusJuniorAgentWithOverrides {
30
+ var mode: "subagent";
31
+ }
@@ -2,7 +2,7 @@ import type { AgentConfig } from "@opencode-ai/sdk";
2
2
  import type { AgentPromptMetadata } from "./types";
3
3
  export declare const SISYPHUS_PROMPT_METADATA: AgentPromptMetadata;
4
4
  import type { AvailableAgent, AvailableSkill, AvailableCategory } from "./dynamic-agent-prompt-builder";
5
- export declare function createSisyphusAgent(model: string, availableAgents?: AvailableAgent[], availableToolNames?: string[], availableSkills?: AvailableSkill[], availableCategories?: AvailableCategory[]): AgentConfig;
5
+ export declare function createSisyphusAgent(model: string, availableAgents?: AvailableAgent[], availableToolNames?: string[], availableSkills?: AvailableSkill[], availableCategories?: AvailableCategory[], useTaskSystem?: boolean): AgentConfig;
6
6
  export declare namespace createSisyphusAgent {
7
7
  var mode: "primary";
8
8
  }