npm - oh-my-opencode - Versions diffs - 3.2.4 → 3.3.0 - Mend

oh-my-opencode 3.2.4 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/dist/agents/atlas/default.d.ts +1 -1
package/dist/agents/atlas/gpt.d.ts +1 -1
package/dist/agents/atlas/index.d.ts +1 -1
package/dist/agents/prometheus/high-accuracy-mode.d.ts +1 -1
package/dist/agents/prometheus/index.d.ts +1 -1
package/dist/agents/prometheus/interview-mode.d.ts +1 -1
package/dist/agents/prometheus/plan-generation.d.ts +1 -1
package/dist/agents/prometheus/plan-template.d.ts +1 -1
package/dist/cli/index.js +2248 -2062
package/dist/cli/run/agent-resolver.d.ts +5 -0
package/dist/cli/run/events.d.ts +2 -0
package/dist/cli/run/index.d.ts +6 -1
package/dist/cli/run/json-output.d.ts +12 -0
package/dist/cli/run/on-complete-hook.d.ts +7 -0
package/dist/cli/run/runner.d.ts +2 -4
package/dist/cli/run/server-connection.d.ts +6 -0
package/dist/cli/run/session-resolver.d.ts +5 -0
package/dist/cli/run/types.d.ts +17 -0
package/dist/config/schema.d.ts +156 -2
package/dist/features/background-agent/manager.d.ts +2 -1
package/dist/features/builtin-commands/templates/init-deep.d.ts +1 -1
package/dist/features/tool-metadata-store/index.d.ts +39 -0
package/dist/hooks/agent-usage-reminder/constants.d.ts +1 -1
package/dist/hooks/anthropic-context-window-limit-recovery/deduplication-recovery.d.ts +3 -0
package/dist/hooks/anthropic-context-window-limit-recovery/index.d.ts +2 -13
package/dist/hooks/anthropic-context-window-limit-recovery/pruning-tool-output-truncation.d.ts +3 -0
package/dist/hooks/anthropic-context-window-limit-recovery/recovery-hook.d.ts +13 -0
package/dist/hooks/anthropic-effort/index.d.ts +26 -0
package/dist/hooks/atlas/index.d.ts +1 -0
package/dist/hooks/compaction-todo-preserver/index.d.ts +11 -0
package/dist/hooks/index.d.ts +1 -0
package/dist/hooks/keyword-detector/ultrawork/default.d.ts +2 -2
package/dist/hooks/keyword-detector/ultrawork/gpt5.2.d.ts +1 -1
package/dist/hooks/keyword-detector/ultrawork/planner.d.ts +1 -1
package/dist/hooks/tasks-todowrite-disabler/constants.d.ts +1 -1
package/dist/hooks/unstable-agent-babysitter/index.d.ts +19 -0
package/dist/index.js +1864 -1214
package/dist/shared/index.d.ts +2 -0
package/dist/shared/migration/agent-category.d.ts +19 -0
package/dist/shared/migration/agent-names.d.ts +6 -0
package/dist/shared/migration/config-migration.d.ts +1 -0
package/dist/shared/migration/hook-names.d.ts +6 -0
package/dist/shared/migration/model-versions.d.ts +13 -0
package/dist/shared/migration.d.ts +5 -44
package/dist/shared/safe-create-hook.d.ts +5 -0
package/dist/shared/truncate-description.d.ts +1 -0
package/dist/tools/ast-grep/constants.d.ts +1 -1
package/dist/tools/ast-grep/index.d.ts +1 -4
package/dist/tools/ast-grep/tools.d.ts +2 -2
package/dist/tools/call-omo-agent/constants.d.ts +1 -1
package/dist/tools/delegate-task/constants.d.ts +1 -1
package/dist/tools/delegate-task/types.d.ts +6 -1
package/dist/tools/glob/index.d.ts +1 -2
package/dist/tools/glob/tools.d.ts +2 -1
package/dist/tools/grep/index.d.ts +1 -2
package/dist/tools/grep/tools.d.ts +2 -1
package/dist/tools/index.d.ts +4 -0
package/dist/tools/session-manager/index.d.ts +1 -1
package/dist/tools/session-manager/tools.d.ts +2 -4
package/package.json +8 -8

package/dist/agents/prometheus/plan-template.d.ts CHANGED Viewed

@@ -4,4 +4,4 @@
  * The markdown template structure for work plans generated by Prometheus.
  * Includes TL;DR, context, objectives, verification strategy, TODOs, and success criteria.
  */
-export declare const PROMETHEUS_PLAN_TEMPLATE = "## Plan Structure\n\nGenerate plan to: `.sisyphus/plans/{name}.md`\n\n```markdown\n# {Plan Title}\n\n## TL;DR\n\n> **Quick Summary**: [1-2 sentences capturing the core objective and approach]\n> \n> **Deliverables**: [Bullet list of concrete outputs]\n> - [Output 1]\n> - [Output 2]\n> \n> **Estimated Effort**: [Quick | Short | Medium | Large | XL]\n> **Parallel Execution**: [YES - N waves | NO - sequential]\n> **Critical Path**: [Task X \u2192 Task Y \u2192 Task Z]\n\n---\n\n## Context\n\n### Original Request\n[User's initial description]\n\n### Interview Summary\n**Key Discussions**:\n- [Point 1]: [User's decision/preference]\n- [Point 2]: [Agreed approach]\n\n**Research Findings**:\n- [Finding 1]: [Implication]\n- [Finding 2]: [Recommendation]\n\n### Metis Review\n**Identified Gaps** (addressed):\n- [Gap 1]: [How resolved]\n- [Gap 2]: [How resolved]\n\n---\n\n## Work Objectives\n\n### Core Objective\n[1-2 sentences: what we're achieving]\n\n### Concrete Deliverables\n- [Exact file/endpoint/feature]\n\n### Definition of Done\n- [ ] [Verifiable condition with command]\n\n### Must Have\n- [Non-negotiable requirement]\n\n### Must NOT Have (Guardrails)\n- [Explicit exclusion from Metis review]\n- [AI slop pattern to avoid]\n- [Scope boundary]\n\n---\n\n## Verification Strategy (MANDATORY)\n\n> **UNIVERSAL RULE: ZERO HUMAN INTERVENTION**\n>\n> ALL tasks in this plan MUST be verifiable WITHOUT any human action.\n> This is NOT conditional \u2014 it applies to EVERY task, regardless of test strategy.\n>\n> **FORBIDDEN** \u2014 acceptance criteria that require:\n> - \"User manually tests...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uC9C1\uC811 \uD14C\uC2A4\uD2B8...\"\n> - \"User visually confirms...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uB208\uC73C\uB85C \uD655\uC778...\"\n> - \"User interacts with...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uC9C1\uC811 \uC870\uC791...\"\n> - \"Ask user to verify...\" / \"\uC0AC\uC6A9\uC790\uC5D0\uAC8C \uD655\uC778 \uC694\uCCAD...\"\n> - ANY step where a human must perform an action\n>\n> **ALL verification is executed by the agent** using tools (Playwright, interactive_bash, curl, etc.). No exceptions.\n\n### Test Decision\n- **Infrastructure exists**: [YES/NO]\n- **Automated tests**: [TDD / Tests-after / None]\n- **Framework**: [bun test / vitest / jest / pytest / none]\n\n### If TDD Enabled\n\nEach TODO follows RED-GREEN-REFACTOR:\n\n**Task Structure:**\n1. **RED**: Write failing test first\n   - Test file: `[path].test.ts`\n   - Test command: `bun test [file]`\n   - Expected: FAIL (test exists, implementation doesn't)\n2. **GREEN**: Implement minimum code to pass\n   - Command: `bun test [file]`\n   - Expected: PASS\n3. **REFACTOR**: Clean up while keeping green\n   - Command: `bun test [file]`\n   - Expected: PASS (still)\n\n**Test Setup Task (if infrastructure doesn't exist):**\n- [ ] 0. Setup Test Infrastructure\n  - Install: `bun add -d [test-framework]`\n  - Config: Create `[config-file]`\n  - Verify: `bun test --help` \u2192 shows help\n  - Example: Create `src/__tests__/example.test.ts`\n  - Verify: `bun test` \u2192 1 test passes\n\n### Agent-Executed QA Scenarios (MANDATORY \u2014 ALL tasks)\n\n> Whether TDD is enabled or not, EVERY task MUST include Agent-Executed QA Scenarios.\n> - **With TDD**: QA scenarios complement unit tests at integration/E2E level\n> - **Without TDD**: QA scenarios are the PRIMARY verification method\n>\n> These describe how the executing agent DIRECTLY verifies the deliverable\n> by running it \u2014 opening browsers, executing commands, sending API requests.\n> The agent performs what a human tester would do, but automated via tools.\n\n**Verification Tool by Deliverable Type:**\n\n| Type | Tool | How Agent Verifies |\n|------|------|-------------------|\n| **Frontend/UI** | Playwright (playwright skill) | Navigate, interact, assert DOM, screenshot |\n| **TUI/CLI** | interactive_bash (tmux) | Run command, send keystrokes, validate output |\n| **API/Backend** | Bash (curl/httpie) | Send requests, parse responses, assert fields |\n| **Library/Module** | Bash (bun/node REPL) | Import, call functions, compare output |\n| **Config/Infra** | Bash (shell commands) | Apply config, run state checks, validate |\n\n**Each Scenario MUST Follow This Format:**\n\n```\nScenario: [Descriptive name \u2014 what user action/flow is being verified]\n  Tool: [Playwright / interactive_bash / Bash]\n  Preconditions: [What must be true before this scenario runs]\n  Steps:\n    1. [Exact action with specific selector/command/endpoint]\n    2. [Next action with expected intermediate state]\n    3. [Assertion with exact expected value]\n  Expected Result: [Concrete, observable outcome]\n  Failure Indicators: [What would indicate failure]\n  Evidence: [Screenshot path / output capture / response body path]\n```\n\n**Scenario Detail Requirements:**\n- **Selectors**: Specific CSS selectors (`.login-button`, not \"the login button\")\n- **Data**: Concrete test data (`\"test@example.com\"`, not `\"[email]\"`)\n- **Assertions**: Exact values (`text contains \"Welcome back\"`, not \"verify it works\")\n- **Timing**: Include wait conditions where relevant (`Wait for .dashboard (timeout: 10s)`)\n- **Negative Scenarios**: At least ONE failure/error scenario per feature\n- **Evidence Paths**: Specific file paths (`.sisyphus/evidence/task-N-scenario-name.png`)\n\n**Anti-patterns (NEVER write scenarios like this):**\n- \u274C \"Verify the login page works correctly\"\n- \u274C \"Check that the API returns the right data\"\n- \u274C \"Test the form validation\"\n- \u274C \"User opens browser and confirms...\"\n\n**Write scenarios like this instead:**\n- \u2705 `Navigate to /login \u2192 Fill input[name=\"email\"] with \"test@example.com\" \u2192 Fill input[name=\"password\"] with \"Pass123!\" \u2192 Click button[type=\"submit\"] \u2192 Wait for /dashboard \u2192 Assert h1 contains \"Welcome\"`\n- \u2705 `POST /api/users {\"name\":\"Test\",\"email\":\"new@test.com\"} \u2192 Assert status 201 \u2192 Assert response.id is UUID \u2192 GET /api/users/{id} \u2192 Assert name equals \"Test\"`\n- \u2705 `Run ./cli --config test.yaml \u2192 Wait for \"Loaded\" in stdout \u2192 Send \"q\" \u2192 Assert exit code 0 \u2192 Assert stdout contains \"Goodbye\"`\n\n**Evidence Requirements:**\n- Screenshots: `.sisyphus/evidence/` for all UI verifications\n- Terminal output: Captured for CLI/TUI verifications\n- Response bodies: Saved for API verifications\n- All evidence referenced by specific file path in acceptance criteria\n\n---\n\n## Execution Strategy\n\n### Parallel Execution Waves\n\n> Maximize throughput by grouping independent tasks into parallel waves.\n> Each wave completes before the next begins.\n\n```\nWave 1 (Start Immediately):\n\u251C\u2500\u2500 Task 1: [no dependencies]\n\u2514\u2500\u2500 Task 5: [no dependencies]\n\nWave 2 (After Wave 1):\n\u251C\u2500\u2500 Task 2: [depends: 1]\n\u251C\u2500\u2500 Task 3: [depends: 1]\n\u2514\u2500\u2500 Task 6: [depends: 5]\n\nWave 3 (After Wave 2):\n\u2514\u2500\u2500 Task 4: [depends: 2, 3]\n\nCritical Path: Task 1 \u2192 Task 2 \u2192 Task 4\nParallel Speedup: ~40% faster than sequential\n```\n\n### Dependency Matrix\n\n| Task | Depends On | Blocks | Can Parallelize With |\n|------|------------|--------|---------------------|\n| 1 | None | 2, 3 | 5 |\n| 2 | 1 | 4 | 3, 6 |\n| 3 | 1 | 4 | 2, 6 |\n| 4 | 2, 3 | None | None (final) |\n| 5 | None | 6 | 1 |\n| 6 | 5 | None | 2, 3 |\n\n### Agent Dispatch Summary\n\n| Wave | Tasks | Recommended Agents |\n|------|-------|-------------------|\n| 1 | 1, 5 | delegate_task(category=\"...\", load_skills=[...], run_in_background=false) |\n| 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |\n| 3 | 4 | final integration task |\n\n---\n\n## TODOs\n\n> Implementation + Test = ONE Task. Never separate.\n> EVERY task MUST have: Recommended Agent Profile + Parallelization info.\n\n- [ ] 1. [Task Title]\n\n  **What to do**:\n  - [Clear implementation steps]\n  - [Test cases to cover]\n\n  **Must NOT do**:\n  - [Specific exclusions from guardrails]\n\n  **Recommended Agent Profile**:\n  > Select category + skills based on task domain. Justify each choice.\n  - **Category**: `[visual-engineering | ultrabrain | artistry | quick | unspecified-low | unspecified-high | writing]`\n    - Reason: [Why this category fits the task domain]\n  - **Skills**: [`skill-1`, `skill-2`]\n    - `skill-1`: [Why needed - domain overlap explanation]\n    - `skill-2`: [Why needed - domain overlap explanation]\n  - **Skills Evaluated but Omitted**:\n    - `omitted-skill`: [Why domain doesn't overlap]\n\n  **Parallelization**:\n  - **Can Run In Parallel**: YES | NO\n  - **Parallel Group**: Wave N (with Tasks X, Y) | Sequential\n  - **Blocks**: [Tasks that depend on this task completing]\n  - **Blocked By**: [Tasks this depends on] | None (can start immediately)\n\n  **References** (CRITICAL - Be Exhaustive):\n\n  > The executor has NO context from your interview. References are their ONLY guide.\n  > Each reference must answer: \"What should I look at and WHY?\"\n\n  **Pattern References** (existing code to follow):\n  - `src/services/auth.ts:45-78` - Authentication flow pattern (JWT creation, refresh token handling)\n  - `src/hooks/useForm.ts:12-34` - Form validation pattern (Zod schema + react-hook-form integration)\n\n  **API/Type References** (contracts to implement against):\n  - `src/types/user.ts:UserDTO` - Response shape for user endpoints\n  - `src/api/schema.ts:createUserSchema` - Request validation schema\n\n  **Test References** (testing patterns to follow):\n  - `src/__tests__/auth.test.ts:describe(\"login\")` - Test structure and mocking patterns\n\n  **Documentation References** (specs and requirements):\n  - `docs/api-spec.md#authentication` - API contract details\n  - `ARCHITECTURE.md:Database Layer` - Database access patterns\n\n  **External References** (libraries and frameworks):\n  - Official docs: `https://zod.dev/?id=basic-usage` - Zod validation syntax\n  - Example repo: `github.com/example/project/src/auth` - Reference implementation\n\n  **WHY Each Reference Matters** (explain the relevance):\n  - Don't just list files - explain what pattern/information the executor should extract\n  - Bad: `src/utils.ts` (vague, which utils? why?)\n  - Good: `src/utils/validation.ts:sanitizeInput()` - Use this sanitization pattern for user input\n\n  **Acceptance Criteria**:\n\n  > **AGENT-EXECUTABLE VERIFICATION ONLY** \u2014 No human action permitted.\n  > Every criterion MUST be verifiable by running a command or using a tool.\n  > REPLACE all placeholders with actual values from task context.\n\n  **If TDD (tests enabled):**\n  - [ ] Test file created: src/auth/login.test.ts\n  - [ ] Test covers: successful login returns JWT token\n  - [ ] bun test src/auth/login.test.ts \u2192 PASS (3 tests, 0 failures)\n\n  **Agent-Executed QA Scenarios (MANDATORY \u2014 per-scenario, ultra-detailed):**\n\n  > Write MULTIPLE named scenarios per task: happy path AND failure cases.\n  > Each scenario = exact tool + steps with real selectors/data + evidence path.\n\n  **Example \u2014 Frontend/UI (Playwright):**\n\n  \\`\\`\\`\n  Scenario: Successful login redirects to dashboard\n    Tool: Playwright (playwright skill)\n    Preconditions: Dev server running on localhost:3000, test user exists\n    Steps:\n      1. Navigate to: http://localhost:3000/login\n      2. Wait for: input[name=\"email\"] visible (timeout: 5s)\n      3. Fill: input[name=\"email\"] \u2192 \"test@example.com\"\n      4. Fill: input[name=\"password\"] \u2192 \"ValidPass123!\"\n      5. Click: button[type=\"submit\"]\n      6. Wait for: navigation to /dashboard (timeout: 10s)\n      7. Assert: h1 text contains \"Welcome back\"\n      8. Assert: cookie \"session_token\" exists\n      9. Screenshot: .sisyphus/evidence/task-1-login-success.png\n    Expected Result: Dashboard loads with welcome message\n    Evidence: .sisyphus/evidence/task-1-login-success.png\n\n  Scenario: Login fails with invalid credentials\n    Tool: Playwright (playwright skill)\n    Preconditions: Dev server running, no valid user with these credentials\n    Steps:\n      1. Navigate to: http://localhost:3000/login\n      2. Fill: input[name=\"email\"] \u2192 \"wrong@example.com\"\n      3. Fill: input[name=\"password\"] \u2192 \"WrongPass\"\n      4. Click: button[type=\"submit\"]\n      5. Wait for: .error-message visible (timeout: 5s)\n      6. Assert: .error-message text contains \"Invalid credentials\"\n      7. Assert: URL is still /login (no redirect)\n      8. Screenshot: .sisyphus/evidence/task-1-login-failure.png\n    Expected Result: Error message shown, stays on login page\n    Evidence: .sisyphus/evidence/task-1-login-failure.png\n  \\`\\`\\`\n\n  **Example \u2014 API/Backend (curl):**\n\n  \\`\\`\\`\n  Scenario: Create user returns 201 with UUID\n    Tool: Bash (curl)\n    Preconditions: Server running on localhost:8080\n    Steps:\n      1. curl -s -w \"\\n%{http_code}\" -X POST http://localhost:8080/api/users \\\n           -H \"Content-Type: application/json\" \\\n           -d '{\"email\":\"new@test.com\",\"name\":\"Test User\"}'\n      2. Assert: HTTP status is 201\n      3. Assert: response.id matches UUID format\n      4. GET /api/users/{returned-id} \u2192 Assert name equals \"Test User\"\n    Expected Result: User created and retrievable\n    Evidence: Response bodies captured\n\n  Scenario: Duplicate email returns 409\n    Tool: Bash (curl)\n    Preconditions: User with email \"new@test.com\" already exists\n    Steps:\n      1. Repeat POST with same email\n      2. Assert: HTTP status is 409\n      3. Assert: response.error contains \"already exists\"\n    Expected Result: Conflict error returned\n    Evidence: Response body captured\n  \\`\\`\\`\n\n  **Example \u2014 TUI/CLI (interactive_bash):**\n\n  \\`\\`\\`\n  Scenario: CLI loads config and displays menu\n    Tool: interactive_bash (tmux)\n    Preconditions: Binary built, test config at ./test.yaml\n    Steps:\n      1. tmux new-session: ./my-cli --config test.yaml\n      2. Wait for: \"Configuration loaded\" in output (timeout: 5s)\n      3. Assert: Menu items visible (\"1. Create\", \"2. List\", \"3. Exit\")\n      4. Send keys: \"3\" then Enter\n      5. Assert: \"Goodbye\" in output\n      6. Assert: Process exited with code 0\n    Expected Result: CLI starts, shows menu, exits cleanly\n    Evidence: Terminal output captured\n\n  Scenario: CLI handles missing config gracefully\n    Tool: interactive_bash (tmux)\n    Preconditions: No config file at ./nonexistent.yaml\n    Steps:\n      1. tmux new-session: ./my-cli --config nonexistent.yaml\n      2. Wait for: output (timeout: 3s)\n      3. Assert: stderr contains \"Config file not found\"\n      4. Assert: Process exited with code 1\n    Expected Result: Meaningful error, non-zero exit\n    Evidence: Error output captured\n  \\`\\`\\`\n\n  **Evidence to Capture:**\n  - [ ] Screenshots in .sisyphus/evidence/ for UI scenarios\n  - [ ] Terminal output for CLI/TUI scenarios\n  - [ ] Response bodies for API scenarios\n  - [ ] Each evidence file named: task-{N}-{scenario-slug}.{ext}\n\n  **Commit**: YES | NO (groups with N)\n  - Message: `type(scope): desc`\n  - Files: `path/to/file`\n  - Pre-commit: `test command`\n\n---\n\n## Commit Strategy\n\n| After Task | Message | Files | Verification |\n|------------|---------|-------|--------------|\n| 1 | `type(scope): desc` | file.ts | npm test |\n\n---\n\n## Success Criteria\n\n### Verification Commands\n```bash\ncommand  # Expected: output\n```\n\n### Final Checklist\n- [ ] All \"Must Have\" present\n- [ ] All \"Must NOT Have\" absent\n- [ ] All tests pass\n```\n\n---\n";
+export declare const PROMETHEUS_PLAN_TEMPLATE = "## Plan Structure\n\nGenerate plan to: `.sisyphus/plans/{name}.md`\n\n```markdown\n# {Plan Title}\n\n## TL;DR\n\n> **Quick Summary**: [1-2 sentences capturing the core objective and approach]\n> \n> **Deliverables**: [Bullet list of concrete outputs]\n> - [Output 1]\n> - [Output 2]\n> \n> **Estimated Effort**: [Quick | Short | Medium | Large | XL]\n> **Parallel Execution**: [YES - N waves | NO - sequential]\n> **Critical Path**: [Task X \u2192 Task Y \u2192 Task Z]\n\n---\n\n## Context\n\n### Original Request\n[User's initial description]\n\n### Interview Summary\n**Key Discussions**:\n- [Point 1]: [User's decision/preference]\n- [Point 2]: [Agreed approach]\n\n**Research Findings**:\n- [Finding 1]: [Implication]\n- [Finding 2]: [Recommendation]\n\n### Metis Review\n**Identified Gaps** (addressed):\n- [Gap 1]: [How resolved]\n- [Gap 2]: [How resolved]\n\n---\n\n## Work Objectives\n\n### Core Objective\n[1-2 sentences: what we're achieving]\n\n### Concrete Deliverables\n- [Exact file/endpoint/feature]\n\n### Definition of Done\n- [ ] [Verifiable condition with command]\n\n### Must Have\n- [Non-negotiable requirement]\n\n### Must NOT Have (Guardrails)\n- [Explicit exclusion from Metis review]\n- [AI slop pattern to avoid]\n- [Scope boundary]\n\n---\n\n## Verification Strategy (MANDATORY)\n\n> **UNIVERSAL RULE: ZERO HUMAN INTERVENTION**\n>\n> ALL tasks in this plan MUST be verifiable WITHOUT any human action.\n> This is NOT conditional \u2014 it applies to EVERY task, regardless of test strategy.\n>\n> **FORBIDDEN** \u2014 acceptance criteria that require:\n> - \"User manually tests...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uC9C1\uC811 \uD14C\uC2A4\uD2B8...\"\n> - \"User visually confirms...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uB208\uC73C\uB85C \uD655\uC778...\"\n> - \"User interacts with...\" / \"\uC0AC\uC6A9\uC790\uAC00 \uC9C1\uC811 \uC870\uC791...\"\n> - \"Ask user to verify...\" / \"\uC0AC\uC6A9\uC790\uC5D0\uAC8C \uD655\uC778 \uC694\uCCAD...\"\n> - ANY step where a human must perform an action\n>\n> **ALL verification is executed by the agent** using tools (Playwright, interactive_bash, curl, etc.). No exceptions.\n\n### Test Decision\n- **Infrastructure exists**: [YES/NO]\n- **Automated tests**: [TDD / Tests-after / None]\n- **Framework**: [bun test / vitest / jest / pytest / none]\n\n### If TDD Enabled\n\nEach TODO follows RED-GREEN-REFACTOR:\n\n**Task Structure:**\n1. **RED**: Write failing test first\n   - Test file: `[path].test.ts`\n   - Test command: `bun test [file]`\n   - Expected: FAIL (test exists, implementation doesn't)\n2. **GREEN**: Implement minimum code to pass\n   - Command: `bun test [file]`\n   - Expected: PASS\n3. **REFACTOR**: Clean up while keeping green\n   - Command: `bun test [file]`\n   - Expected: PASS (still)\n\n**Test Setup Task (if infrastructure doesn't exist):**\n- [ ] 0. Setup Test Infrastructure\n  - Install: `bun add -d [test-framework]`\n  - Config: Create `[config-file]`\n  - Verify: `bun test --help` \u2192 shows help\n  - Example: Create `src/__tests__/example.test.ts`\n  - Verify: `bun test` \u2192 1 test passes\n\n### Agent-Executed QA Scenarios (MANDATORY \u2014 ALL tasks)\n\n> Whether TDD is enabled or not, EVERY task MUST include Agent-Executed QA Scenarios.\n> - **With TDD**: QA scenarios complement unit tests at integration/E2E level\n> - **Without TDD**: QA scenarios are the PRIMARY verification method\n>\n> These describe how the executing agent DIRECTLY verifies the deliverable\n> by running it \u2014 opening browsers, executing commands, sending API requests.\n> The agent performs what a human tester would do, but automated via tools.\n\n**Verification Tool by Deliverable Type:**\n\n| Type | Tool | How Agent Verifies |\n|------|------|-------------------|\n| **Frontend/UI** | Playwright (playwright skill) | Navigate, interact, assert DOM, screenshot |\n| **TUI/CLI** | interactive_bash (tmux) | Run command, send keystrokes, validate output |\n| **API/Backend** | Bash (curl/httpie) | Send requests, parse responses, assert fields |\n| **Library/Module** | Bash (bun/node REPL) | Import, call functions, compare output |\n| **Config/Infra** | Bash (shell commands) | Apply config, run state checks, validate |\n\n**Each Scenario MUST Follow This Format:**\n\n```\nScenario: [Descriptive name \u2014 what user action/flow is being verified]\n  Tool: [Playwright / interactive_bash / Bash]\n  Preconditions: [What must be true before this scenario runs]\n  Steps:\n    1. [Exact action with specific selector/command/endpoint]\n    2. [Next action with expected intermediate state]\n    3. [Assertion with exact expected value]\n  Expected Result: [Concrete, observable outcome]\n  Failure Indicators: [What would indicate failure]\n  Evidence: [Screenshot path / output capture / response body path]\n```\n\n**Scenario Detail Requirements:**\n- **Selectors**: Specific CSS selectors (`.login-button`, not \"the login button\")\n- **Data**: Concrete test data (`\"test@example.com\"`, not `\"[email]\"`)\n- **Assertions**: Exact values (`text contains \"Welcome back\"`, not \"verify it works\")\n- **Timing**: Include wait conditions where relevant (`Wait for .dashboard (timeout: 10s)`)\n- **Negative Scenarios**: At least ONE failure/error scenario per feature\n- **Evidence Paths**: Specific file paths (`.sisyphus/evidence/task-N-scenario-name.png`)\n\n**Anti-patterns (NEVER write scenarios like this):**\n- \u274C \"Verify the login page works correctly\"\n- \u274C \"Check that the API returns the right data\"\n- \u274C \"Test the form validation\"\n- \u274C \"User opens browser and confirms...\"\n\n**Write scenarios like this instead:**\n- \u2705 `Navigate to /login \u2192 Fill input[name=\"email\"] with \"test@example.com\" \u2192 Fill input[name=\"password\"] with \"Pass123!\" \u2192 Click button[type=\"submit\"] \u2192 Wait for /dashboard \u2192 Assert h1 contains \"Welcome\"`\n- \u2705 `POST /api/users {\"name\":\"Test\",\"email\":\"new@test.com\"} \u2192 Assert status 201 \u2192 Assert response.id is UUID \u2192 GET /api/users/{id} \u2192 Assert name equals \"Test\"`\n- \u2705 `Run ./cli --config test.yaml \u2192 Wait for \"Loaded\" in stdout \u2192 Send \"q\" \u2192 Assert exit code 0 \u2192 Assert stdout contains \"Goodbye\"`\n\n**Evidence Requirements:**\n- Screenshots: `.sisyphus/evidence/` for all UI verifications\n- Terminal output: Captured for CLI/TUI verifications\n- Response bodies: Saved for API verifications\n- All evidence referenced by specific file path in acceptance criteria\n\n---\n\n## Execution Strategy\n\n### Parallel Execution Waves\n\n> Maximize throughput by grouping independent tasks into parallel waves.\n> Each wave completes before the next begins.\n\n```\nWave 1 (Start Immediately):\n\u251C\u2500\u2500 Task 1: [no dependencies]\n\u2514\u2500\u2500 Task 5: [no dependencies]\n\nWave 2 (After Wave 1):\n\u251C\u2500\u2500 Task 2: [depends: 1]\n\u251C\u2500\u2500 Task 3: [depends: 1]\n\u2514\u2500\u2500 Task 6: [depends: 5]\n\nWave 3 (After Wave 2):\n\u2514\u2500\u2500 Task 4: [depends: 2, 3]\n\nCritical Path: Task 1 \u2192 Task 2 \u2192 Task 4\nParallel Speedup: ~40% faster than sequential\n```\n\n### Dependency Matrix\n\n| Task | Depends On | Blocks | Can Parallelize With |\n|------|------------|--------|---------------------|\n| 1 | None | 2, 3 | 5 |\n| 2 | 1 | 4 | 3, 6 |\n| 3 | 1 | 4 | 2, 6 |\n| 4 | 2, 3 | None | None (final) |\n| 5 | None | 6 | 1 |\n| 6 | 5 | None | 2, 3 |\n\n### Agent Dispatch Summary\n\n| Wave | Tasks | Recommended Agents |\n|------|-------|-------------------|\n| 1 | 1, 5 | task(category=\"...\", load_skills=[...], run_in_background=false) |\n| 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |\n| 3 | 4 | final integration task |\n\n---\n\n## TODOs\n\n> Implementation + Test = ONE Task. Never separate.\n> EVERY task MUST have: Recommended Agent Profile + Parallelization info.\n\n- [ ] 1. [Task Title]\n\n  **What to do**:\n  - [Clear implementation steps]\n  - [Test cases to cover]\n\n  **Must NOT do**:\n  - [Specific exclusions from guardrails]\n\n  **Recommended Agent Profile**:\n  > Select category + skills based on task domain. Justify each choice.\n  - **Category**: `[visual-engineering | ultrabrain | artistry | quick | unspecified-low | unspecified-high | writing]`\n    - Reason: [Why this category fits the task domain]\n  - **Skills**: [`skill-1`, `skill-2`]\n    - `skill-1`: [Why needed - domain overlap explanation]\n    - `skill-2`: [Why needed - domain overlap explanation]\n  - **Skills Evaluated but Omitted**:\n    - `omitted-skill`: [Why domain doesn't overlap]\n\n  **Parallelization**:\n  - **Can Run In Parallel**: YES | NO\n  - **Parallel Group**: Wave N (with Tasks X, Y) | Sequential\n  - **Blocks**: [Tasks that depend on this task completing]\n  - **Blocked By**: [Tasks this depends on] | None (can start immediately)\n\n  **References** (CRITICAL - Be Exhaustive):\n\n  > The executor has NO context from your interview. References are their ONLY guide.\n  > Each reference must answer: \"What should I look at and WHY?\"\n\n  **Pattern References** (existing code to follow):\n  - `src/services/auth.ts:45-78` - Authentication flow pattern (JWT creation, refresh token handling)\n  - `src/hooks/useForm.ts:12-34` - Form validation pattern (Zod schema + react-hook-form integration)\n\n  **API/Type References** (contracts to implement against):\n  - `src/types/user.ts:UserDTO` - Response shape for user endpoints\n  - `src/api/schema.ts:createUserSchema` - Request validation schema\n\n  **Test References** (testing patterns to follow):\n  - `src/__tests__/auth.test.ts:describe(\"login\")` - Test structure and mocking patterns\n\n  **Documentation References** (specs and requirements):\n  - `docs/api-spec.md#authentication` - API contract details\n  - `ARCHITECTURE.md:Database Layer` - Database access patterns\n\n  **External References** (libraries and frameworks):\n  - Official docs: `https://zod.dev/?id=basic-usage` - Zod validation syntax\n  - Example repo: `github.com/example/project/src/auth` - Reference implementation\n\n  **WHY Each Reference Matters** (explain the relevance):\n  - Don't just list files - explain what pattern/information the executor should extract\n  - Bad: `src/utils.ts` (vague, which utils? why?)\n  - Good: `src/utils/validation.ts:sanitizeInput()` - Use this sanitization pattern for user input\n\n  **Acceptance Criteria**:\n\n  > **AGENT-EXECUTABLE VERIFICATION ONLY** \u2014 No human action permitted.\n  > Every criterion MUST be verifiable by running a command or using a tool.\n  > REPLACE all placeholders with actual values from task context.\n\n  **If TDD (tests enabled):**\n  - [ ] Test file created: src/auth/login.test.ts\n  - [ ] Test covers: successful login returns JWT token\n  - [ ] bun test src/auth/login.test.ts \u2192 PASS (3 tests, 0 failures)\n\n  **Agent-Executed QA Scenarios (MANDATORY \u2014 per-scenario, ultra-detailed):**\n\n  > Write MULTIPLE named scenarios per task: happy path AND failure cases.\n  > Each scenario = exact tool + steps with real selectors/data + evidence path.\n\n  **Example \u2014 Frontend/UI (Playwright):**\n\n  \\`\\`\\`\n  Scenario: Successful login redirects to dashboard\n    Tool: Playwright (playwright skill)\n    Preconditions: Dev server running on localhost:3000, test user exists\n    Steps:\n      1. Navigate to: http://localhost:3000/login\n      2. Wait for: input[name=\"email\"] visible (timeout: 5s)\n      3. Fill: input[name=\"email\"] \u2192 \"test@example.com\"\n      4. Fill: input[name=\"password\"] \u2192 \"ValidPass123!\"\n      5. Click: button[type=\"submit\"]\n      6. Wait for: navigation to /dashboard (timeout: 10s)\n      7. Assert: h1 text contains \"Welcome back\"\n      8. Assert: cookie \"session_token\" exists\n      9. Screenshot: .sisyphus/evidence/task-1-login-success.png\n    Expected Result: Dashboard loads with welcome message\n    Evidence: .sisyphus/evidence/task-1-login-success.png\n\n  Scenario: Login fails with invalid credentials\n    Tool: Playwright (playwright skill)\n    Preconditions: Dev server running, no valid user with these credentials\n    Steps:\n      1. Navigate to: http://localhost:3000/login\n      2. Fill: input[name=\"email\"] \u2192 \"wrong@example.com\"\n      3. Fill: input[name=\"password\"] \u2192 \"WrongPass\"\n      4. Click: button[type=\"submit\"]\n      5. Wait for: .error-message visible (timeout: 5s)\n      6. Assert: .error-message text contains \"Invalid credentials\"\n      7. Assert: URL is still /login (no redirect)\n      8. Screenshot: .sisyphus/evidence/task-1-login-failure.png\n    Expected Result: Error message shown, stays on login page\n    Evidence: .sisyphus/evidence/task-1-login-failure.png\n  \\`\\`\\`\n\n  **Example \u2014 API/Backend (curl):**\n\n  \\`\\`\\`\n  Scenario: Create user returns 201 with UUID\n    Tool: Bash (curl)\n    Preconditions: Server running on localhost:8080\n    Steps:\n      1. curl -s -w \"\\n%{http_code}\" -X POST http://localhost:8080/api/users \\\n           -H \"Content-Type: application/json\" \\\n           -d '{\"email\":\"new@test.com\",\"name\":\"Test User\"}'\n      2. Assert: HTTP status is 201\n      3. Assert: response.id matches UUID format\n      4. GET /api/users/{returned-id} \u2192 Assert name equals \"Test User\"\n    Expected Result: User created and retrievable\n    Evidence: Response bodies captured\n\n  Scenario: Duplicate email returns 409\n    Tool: Bash (curl)\n    Preconditions: User with email \"new@test.com\" already exists\n    Steps:\n      1. Repeat POST with same email\n      2. Assert: HTTP status is 409\n      3. Assert: response.error contains \"already exists\"\n    Expected Result: Conflict error returned\n    Evidence: Response body captured\n  \\`\\`\\`\n\n  **Example \u2014 TUI/CLI (interactive_bash):**\n\n  \\`\\`\\`\n  Scenario: CLI loads config and displays menu\n    Tool: interactive_bash (tmux)\n    Preconditions: Binary built, test config at ./test.yaml\n    Steps:\n      1. tmux new-session: ./my-cli --config test.yaml\n      2. Wait for: \"Configuration loaded\" in output (timeout: 5s)\n      3. Assert: Menu items visible (\"1. Create\", \"2. List\", \"3. Exit\")\n      4. Send keys: \"3\" then Enter\n      5. Assert: \"Goodbye\" in output\n      6. Assert: Process exited with code 0\n    Expected Result: CLI starts, shows menu, exits cleanly\n    Evidence: Terminal output captured\n\n  Scenario: CLI handles missing config gracefully\n    Tool: interactive_bash (tmux)\n    Preconditions: No config file at ./nonexistent.yaml\n    Steps:\n      1. tmux new-session: ./my-cli --config nonexistent.yaml\n      2. Wait for: output (timeout: 3s)\n      3. Assert: stderr contains \"Config file not found\"\n      4. Assert: Process exited with code 1\n    Expected Result: Meaningful error, non-zero exit\n    Evidence: Error output captured\n  \\`\\`\\`\n\n  **Evidence to Capture:**\n  - [ ] Screenshots in .sisyphus/evidence/ for UI scenarios\n  - [ ] Terminal output for CLI/TUI scenarios\n  - [ ] Response bodies for API scenarios\n  - [ ] Each evidence file named: task-{N}-{scenario-slug}.{ext}\n\n  **Commit**: YES | NO (groups with N)\n  - Message: `type(scope): desc`\n  - Files: `path/to/file`\n  - Pre-commit: `test command`\n\n---\n\n## Commit Strategy\n\n| After Task | Message | Files | Verification |\n|------------|---------|-------|--------------|\n| 1 | `type(scope): desc` | file.ts | npm test |\n\n---\n\n## Success Criteria\n\n### Verification Commands\n```bash\ncommand  # Expected: output\n```\n\n### Final Checklist\n- [ ] All \"Must Have\" present\n- [ ] All \"Must NOT Have\" absent\n- [ ] All tests pass\n```\n\n---\n";