npm - uv-suite - Versions diffs - 0.29.0 → 0.32.0 - Mend

uv-suite 0.29.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

package/LICENSE +21 -0
package/README.md +72 -46
package/agents/claude-code/anti-slop-guard.md +14 -1
package/agents/claude-code/architect.md +30 -4
package/agents/claude-code/cartographer.md +18 -6
package/agents/claude-code/eval-writer.md +7 -2
package/agents/claude-code/reviewer.md +5 -1
package/agents/claude-code/spec-writer.md +30 -7
package/agents/generate.py +88 -0
package/bin/cli.js +146 -73
package/hooks/auto-checkpoint-helper.sh +2 -2
package/hooks/auto-checkpoint.sh +3 -3
package/hooks/auto-restore-on-start.sh +2 -2
package/hooks/checkpoint-helper.sh +40 -35
package/hooks/git-context.sh +41 -0
package/hooks/lite-mode-inject.sh +26 -0
package/hooks/session-end-helper.sh +2 -2
package/hooks/session-end.sh +2 -2
package/hooks/session-label-nag.sh +2 -2
package/hooks/session-meta.sh +18 -1
package/hooks/session-review-reminder.sh +2 -2
package/hooks/session-start.sh +16 -0
package/hooks/slop-grep.sh +12 -31
package/hooks/uv-out-best.sh +20 -0
package/hooks/uv-out-collect.sh +52 -0
package/hooks/uv-out-notify.sh +35 -0
package/hooks/uv-out-pointer.sh +16 -0
package/hooks/uv-out-session.sh +24 -0
package/hooks/watchtower-end.sh +23 -0
package/hooks/watchtower-notify.sh +56 -0
package/hooks/watchtower-send.sh +10 -3
package/hooks/watchtower-tokens.sh +61 -0
package/install.sh +93 -42
package/package.json +6 -3
package/personas/auto.json +59 -1
package/personas/professional.json +65 -1
package/personas/spike.json +51 -2
package/personas/sport.json +63 -1
package/settings.json +6 -2
package/skills/architect/SKILL.md +109 -8
package/skills/architect/specialists/distributed-systems.md +84 -0
package/skills/architect/specialists/full-stack.md +92 -0
package/skills/architect/specialists/llm-ai-engineering.md +86 -0
package/skills/architect/specialists/ml-systems.md +81 -0
package/skills/commit/SKILL.md +5 -2
package/skills/confirm/SKILL.md +3 -3
package/skills/investigate/SKILL.md +14 -4
package/skills/lite/SKILL.md +45 -0
package/skills/qa/SKILL.md +274 -0
package/skills/review/SKILL.md +187 -8
package/skills/review/specialists/api-contract.md +122 -0
package/skills/review/specialists/architecture-trace.md +64 -0
package/skills/review/specialists/data-migration.md +113 -0
package/skills/review/specialists/maintainability.md +138 -0
package/skills/review/specialists/performance.md +115 -0
package/skills/review/specialists/security.md +132 -0
package/skills/review/specialists/testing.md +109 -0
package/skills/session/SKILL.md +87 -0
package/skills/session/operations/auto.md +22 -0
package/skills/session/operations/checkpoint.md +43 -0
package/skills/session/operations/end.md +35 -0
package/skills/session/operations/init.md +16 -0
package/skills/session/operations/restore.md +16 -0
package/skills/spec/SKILL.md +40 -1
package/skills/test/SKILL.md +89 -0
package/skills/test/specialists/eval.md +46 -0
package/skills/test/specialists/integration.md +42 -0
package/skills/test/specialists/unit.md +39 -0
package/skills/understand/SKILL.md +118 -0
package/skills/understand/modes/repo.md +38 -0
package/skills/understand/modes/stack.md +41 -0
package/skills/uv-help/SKILL.md +43 -20
package/uv.sh +36 -3
package/watchtower/README.md +73 -0
package/watchtower/app/__init__.py +0 -0
package/watchtower/app/__pycache__/__init__.cpython-312.pyc +0 -0
package/watchtower/app/__pycache__/__init__.cpython-314.pyc +0 -0
package/watchtower/app/__pycache__/db.cpython-312.pyc +0 -0
package/watchtower/app/__pycache__/db.cpython-314.pyc +0 -0
package/watchtower/app/__pycache__/main.cpython-312.pyc +0 -0
package/watchtower/app/__pycache__/main.cpython-314.pyc +0 -0
package/watchtower/app/__pycache__/models.cpython-312.pyc +0 -0
package/watchtower/app/__pycache__/models.cpython-314.pyc +0 -0
package/watchtower/app/db.py +129 -0
package/watchtower/app/main.py +43 -0
package/watchtower/app/models.py +54 -0
package/watchtower/app/routers/__init__.py +0 -0
package/watchtower/app/routers/__pycache__/__init__.cpython-312.pyc +0 -0
package/watchtower/app/routers/__pycache__/__init__.cpython-314.pyc +0 -0
package/watchtower/app/routers/__pycache__/control.cpython-312.pyc +0 -0
package/watchtower/app/routers/__pycache__/control.cpython-314.pyc +0 -0
package/watchtower/app/routers/__pycache__/ingest.cpython-312.pyc +0 -0
package/watchtower/app/routers/__pycache__/ingest.cpython-314.pyc +0 -0
package/watchtower/app/routers/__pycache__/query.cpython-312.pyc +0 -0
package/watchtower/app/routers/__pycache__/query.cpython-314.pyc +0 -0
package/watchtower/app/routers/__pycache__/settings.cpython-312.pyc +0 -0
package/watchtower/app/routers/__pycache__/stream.cpython-312.pyc +0 -0
package/watchtower/app/routers/__pycache__/stream.cpython-314.pyc +0 -0
package/watchtower/app/routers/control.py +260 -0
package/watchtower/app/routers/ingest.py +157 -0
package/watchtower/app/routers/query.py +133 -0
package/watchtower/app/routers/settings.py +34 -0
package/watchtower/app/routers/stream.py +28 -0
package/watchtower/app/services/__init__.py +0 -0
package/watchtower/app/services/__pycache__/__init__.cpython-312.pyc +0 -0
package/watchtower/app/services/__pycache__/__init__.cpython-314.pyc +0 -0
package/watchtower/app/services/__pycache__/checkpoint.cpython-312.pyc +0 -0
package/watchtower/app/services/__pycache__/checkpoint.cpython-314.pyc +0 -0
package/watchtower/app/services/__pycache__/tmux.cpython-312.pyc +0 -0
package/watchtower/app/services/__pycache__/tmux.cpython-314.pyc +0 -0
package/watchtower/app/services/checkpoint.py +149 -0
package/watchtower/app/services/tmux.py +54 -0
package/watchtower/events.json +10344 -45
package/watchtower/{auto-checkpoint-runner.js → legacy/auto-checkpoint-runner.js} +29 -2
package/watchtower/requirements.txt +3 -0
package/watchtower/static/dashboard.html +577 -0
package/watchtower/watchtower.db +0 -0
package/agents/claude-code/devops.md +0 -50
package/agents/claude-code/security.md +0 -75
package/agents/codex/anti-slop-guard.toml +0 -12
package/agents/codex/architect.toml +0 -11
package/agents/codex/cartographer.toml +0 -16
package/agents/codex/devops.toml +0 -8
package/agents/codex/eval-writer.toml +0 -11
package/agents/codex/prototype-builder.toml +0 -10
package/agents/codex/reviewer.toml +0 -16
package/agents/codex/security.toml +0 -14
package/agents/codex/spec-writer.toml +0 -11
package/agents/codex/test-writer.toml +0 -13
package/agents/cursor/anti-slop-guard.mdc +0 -22
package/agents/cursor/architect.mdc +0 -24
package/agents/cursor/cartographer.mdc +0 -28
package/agents/cursor/devops.mdc +0 -16
package/agents/cursor/eval-writer.mdc +0 -21
package/agents/cursor/prototype-builder.mdc +0 -25
package/agents/cursor/reviewer.mdc +0 -26
package/agents/cursor/security.mdc +0 -20
package/agents/cursor/spec-writer.mdc +0 -27
package/agents/cursor/test-writer.mdc +0 -28
package/agents/portable/anti-slop-guard.md +0 -71
package/agents/portable/architect.md +0 -83
package/agents/portable/cartographer.md +0 -64
package/agents/portable/devops.md +0 -56
package/agents/portable/eval-writer.md +0 -70
package/agents/portable/prototype-builder.md +0 -70
package/agents/portable/reviewer.md +0 -79
package/agents/portable/security.md +0 -63
package/agents/portable/spec-writer.md +0 -89
package/agents/portable/test-writer.md +0 -56
package/hooks/context-warning.sh +0 -4
package/skills/auto-checkpoint/SKILL.md +0 -47
package/skills/checkpoint/SKILL.md +0 -105
package/skills/map-codebase/SKILL.md +0 -54
package/skills/map-stack/SKILL.md +0 -121
package/skills/restore/SKILL.md +0 -55
package/skills/security-review/SKILL.md +0 -87
package/skills/session-end/SKILL.md +0 -100
package/skills/session-init/SKILL.md +0 -45
package/skills/slop-check/SKILL.md +0 -40
package/skills/write-evals/SKILL.md +0 -34
package/skills/write-tests/SKILL.md +0 -54
/package/watchtower/{auto-checkpoint-prompt.md → legacy/auto-checkpoint-prompt.md} +0 -0
/package/watchtower/{dashboard.html → legacy/dashboard.html} +0 -0
/package/watchtower/{server.js → legacy/server.js} +0 -0
/package/watchtower/{snapshot-manager.js → legacy/snapshot-manager.js} +0 -0

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Utsav Anand
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md CHANGED Viewed

@@ -1,6 +1,8 @@
 # UV Suite
-A portable layer that turns Claude Code, Cursor, or Codex into a labeled, observable, anti-slop dev environment — with named sessions, a real-time observability dashboard, anti-slop guardrails, and per-session memory across launches.
+A portable layer that turns Claude Code, Cursor, or Codex into a labeled, observable, anti-slop dev environment — with named sessions, a real-time observability **and control** dashboard, anti-slop guardrails, and per-session memory across launches.
+By [Utsav](https://www.utsava.xyz/) · [github.com/utsavanand/uv-suite](https://github.com/utsavanand/uv-suite)
 ## Install
@@ -24,7 +26,7 @@ uvs claude pro                # Claude Code, Professional persona
 uvs codex auto                # Codex, Auto persona
 uvs pro                       # Shorthand for uvs claude pro
 uvs install                   # Explicit install (also runs automatically on launch)
-uvs watch                     # Open the Watchtower observability dashboard
+uvs watch                     # Open the Watchtower observability + control dashboard
 uvs info                      # Show what's installed
 ```
@@ -38,29 +40,50 @@ Label this session (Enter to skip — you'll be reminded):
   priority [low/med/high]:  high
 ```
-Skip any field with Enter. If you skip the name, `/session-init` will be suggested every few prompts until you label it. Set `UVS_NO_PROMPT=1` to suppress prompts entirely.
+Skip any field with Enter. If you skip the name, `/session init` will be suggested every few prompts until you label it. Set `UVS_NO_PROMPT=1` to suppress prompts entirely.
+## Start here
+After `uvs claude pro`, pick the path that matches what you're doing. Each path names the first skill to run and the canonical next steps.
+### Existing codebase (you didn't write this)
+1. `/understand` — builds a knowledge graph + architecture overview in `uv-out/map-codebase.md`. Other skills (`/architect`, `/review`, `/review --security`) read it automatically.
+2. `/session checkpoint` — captures your baseline understanding so `/session restore` can bring it back next session.
+3. Then: `/review` on the current diff, or `/spec` for the next feature.
+### New project (you're starting from scratch)
+1. `/spec` — converts your idea into a structured spec in `uv-out/specs/`.
+2. `/architect` — breaks the spec into Acts with cycle budgets. Reads the spec automatically.
+3. Then: implement Act by Act. Use `/test` and `/review` per Act.
+### Reviewing a PR
+1. `/review [branch-name]` — reads diff + `CLAUDE.md` + `DANGER-ZONES.md` + prior `uv-out/` artifacts.
+2. `/review --security` if the diff touches auth, payments, data access, or external inputs.
+### Shipping
+1. `/session checkpoint` to capture state.
+2. `/commit` — runs review, tests, commits, optionally opens a PR.
+Picking a persona (Spike / Sport / Professional / Auto) is a separate axis from picking a first skill — see [Personas](#personas) below for which mode fits which situation.
 ## Sessions and Watchtower
 Each `uvs` launch generates a `UVS_SESSION_ID` and writes metadata to `.uv-suite-state/sessions/<id>.json`. This unlocks:
 - **Concurrent terminals don't collide.** Two `uvs` launches in the same repo run as distinct sessions with separate names, checkpoints, and dashboard rows.
-- **`uvs watch` shows them all.** The Watchtower dashboard at `localhost:4200` streams every tool call across every session in real time — labeled by your name, sorted by priority (high to top, low dimmed), color-coded by persona.
-- **Per-session checkpoints.** `/checkpoint` writes to `uv-out/checkpoints/<sid>/`, and `/restore` auto-picks the current session's latest. Pass a session id prefix or name to restore from a different one.
+- **`uvs watch` shows them all.** The Watchtower control plane at `localhost:4200` streams every session live and lets you act on them from the browser — see [Watchtower at a glance](#watchtower-at-a-glance).
+- **Per-session checkpoints.** `/session checkpoint` writes to `uv-out/checkpoints/<sid>/`, and `/session restore` auto-picks the current session's latest. Pass a session id prefix or name to restore from a different one.
 - **Status line shows it all.** The Claude Code status bar shows session name, persona, priority, and elapsed time continuously.
 ### Watchtower at a glance
-```
-Sessions               Events    Tool calls    Errors    Need human
-4                      1,247     914           2         0
+`uvs watch` starts the dashboard at `localhost:4200` — Python + **embedded SQLite**, no Docker and no database to set up (it provisions its own deps on first run). It's a control plane, not just a viewer, laid out in three panes:
-[payments retry [auto]   [P:high]  [outcome]      ] (147)
-[infra cleanup  [pro]    [P:med]   [long-running] ] (382)
-[exec deck      [spike]  [P:low]   [outcome]      ] (89)
-```
+- **Heartbeat** (left) — a live, scrolling stream of what every agent is doing, as it happens.
+- **Sessions** (center) — each session as a flat row (state · tokens · tool calls · last activity). Filter by time / priority / kind and search by name; expand a row to **checkpoint, view checkpoint history, compact, fork, close, or delete** it.
+- **Needs human** (right) — sessions waiting on you (a tool-permission prompt or an idle wait), with the tool + command as context. **Approve / Deny** from the browser; for `uvs`-launched (tmux-owned) sessions the keystroke is sent for you.
-Hooks fire on every Claude Code event (`PreToolUse`, `PostToolUse`, `UserPromptSubmit`, `SessionStart`, `Stop`, `PermissionRequest`, ...) and forward to the dashboard with the session metadata merged in. Zero dependencies — vanilla Node + SSE.
+Sessions launched via `uvs` run inside a transparent tmux so Watchtower can act on them. Hooks forward every Claude Code event (`PreToolUse`, `PostToolUse`, `UserPromptSubmit`, `Notification`, `SessionStart`, `Stop`, ...) with session metadata merged in. A Node-only fallback (no Python) is available via `uvs watch --legacy`.
 ## Personas
@@ -104,50 +127,48 @@ Human gates  After each     End only     Every Act          Final output
 ## Skills (slash commands)
+12 skills. Each `skills/<name>/SKILL.md` is a thin orchestrator that dispatches to agents.
 | Command | What it does |
 |---|---|
-| `/map-codebase [dir]` | Build a knowledge graph of the codebase |
-| `/map-stack [dir]` | Map multiple services and their connections |
+| `/understand [dir]` | Map a codebase or whole stack — auto-detects repo vs stack |
 | `/spec [requirements]` | Write a technical specification |
 | `/architect [spec]` | Design architecture, decompose into Acts |
+| `/test [file]` | Write tests or evals: `--unit` / `--integration` / `--eval` ([DeepEval](https://github.com/confident-ai/deepeval) compatible) |
+| `/review` | Multi-specialist code review; add `--security` (OWASP via Semgrep/Gitleaks/Trivy) or `--slop` (anti-slop audit) |
 | `/prototype [concept]` | Build a static React prototype |
-| `/write-tests [file]` | Generate tests matching project conventions |
-| `/write-evals [prompt]` | Write AI/LLM evaluation cases ([DeepEval](https://github.com/confident-ai/deepeval) compatible) |
-| `/review` | Code review: correctness, security, performance, slop |
-| `/slop-check` | Detect 6 categories of AI-generated slop |
-| `/security-review` | OWASP audit, dependency scan, secret detection |
+| `/qa` | Browser QA via Playwright MCP |
 | `/investigate` | Systematic root-cause debugging |
-| `/commit` | Review → test → slop-check → commit (and optionally PR) |
-| `/checkpoint [label]` | Save session state to `uv-out/checkpoints/<sid>/` |
-| `/restore [sid-prefix\|name]` | Load the latest checkpoint for the current (or named) session |
-| `/session-init [name\|--kind\|--purpose\|--priority]` | Label or relabel the current session |
+| `/commit` | Review → test → commit (and optionally PR) |
+| `/session init\|checkpoint\|restore\|end\|auto` | Session lifecycle — label, checkpoint, restore, end, or auto-checkpoint |
 | `/confirm [on\|off\|<n>]` | Toggle reframe-and-confirm for prompts over `<n>` words |
 | `/uv-help` | List every skill, agent, hook, guardrail, and persona |
 ## Hooks (lifecycle automation)
-Fire automatically on Claude Code events. You never invoke these.
+Fire automatically on Claude Code events. You never invoke these. ~26 scripts live in `hooks/`.
 | Hook | Fires on | What it does |
 |---|---|---|
 | auto-lint | File write | Runs prettier, ruff, or gofmt |
-| slop-grep | File write | Greps for obvious slop patterns (over-commented code, vague docs) |
-| doc-slop-grep | File write | Catches vague adjectives in markdown |
+| slop-grep | File edit/write | Ambient slop detection on sport / professional / auto personas |
+| doc-slop-grep | File edit/write | Catches vague adjectives in markdown on the spike persona |
 | danger-zone-check | File edit | Warns if file is in DANGER-ZONES.md |
 | block-destructive | Bash command | Blocks `rm -rf /`, force push to main, `DROP TABLE` |
 | confirm-prompt | UserPromptSubmit | For prompts over the threshold, requires Claude to restate before any work starts |
-| session-label-nag | UserPromptSubmit | Reminds you to run `/session-init` every Nth prompt while the session has no name |
+| session-label-nag | UserPromptSubmit | Reminds you to run `/session init` every Nth prompt while the session has no name |
 | context-warning | PostToolUse | Warns when context usage crosses thresholds |
 | watchtower-send | All events | Forwards every event (with session metadata) to `localhost:4200` |
 | session-start | SessionStart | Records start time, fires bootstrap event with session metadata |
 | session-timer | PostToolUse | Reminders at 45 / 90 / 180 minutes |
 | session-end | Stop | Shows duration, today's total, reflection prompt |
 | session-review-reminder | Stop | Nudges you to review uncommitted changes |
+| uv-out-* | Session events | Manage session-scoped artifacts under `uv-out/sessions/<sid>/` |
 | status-line | Continuous | Renders session label, persona, priority, and timer in the Claude Code status bar |
 ## Agents
-10 agents, each in 4 formats (Claude Code, Cursor, Codex, portable):
+8 agents. The canonical definitions are `agents/claude-code/*.md`. The Cursor (`.mdc`) and Codex (`.toml`) variants are generated from those by `agents/generate.py` at install — they're not hand-maintained.
 | Agent | Subsystem | Model | Cycle Budget |
 |---|---|---|---|
@@ -159,8 +180,6 @@ Fire automatically on Claude Code events. You never invoke these.
 | Eval Writer | Acts | Opus | 2 |
 | Anti-Slop Guard | Guard | Opus | 1 |
 | Prototype Builder | Acts | Sonnet | 3 |
-| DevOps | Acts | Opus | 2 |
-| Security | Guard | Opus | 1 |
 ## Artifacts
@@ -168,11 +187,11 @@ Agents write persistent output to `uv-out/`. Each agent reads prior artifacts au
 | Output | Read by |
 |---|---|
-| `uv-out/map-codebase.md` | /architect, /review, /security-review |
-| `uv-out/specs/*.md` | /architect, /write-tests, /write-evals |
-| `uv-out/architecture/*.md` | /review, /write-tests, /slop-check |
-| `uv-out/review-*.md` | /slop-check, /security-review |
-| `uv-out/checkpoints/<sid>/*.md` | /restore |
+| `uv-out/map-codebase.md` | /architect, /review, /review --security |
+| `uv-out/specs/*.md` | /architect, /test, /test --eval |
+| `uv-out/architecture/*.md` | /review, /test, /review --slop |
+| `uv-out/review-*.md` | /review --slop, /review --security |
+| `uv-out/checkpoints/<sid>/*.md` | /session restore |
 ## Integrations
@@ -190,13 +209,13 @@ Agents write persistent output to `uv-out/`. Each agent reads prior artifacts au
 ```
 .claude/
   settings.json          Permissions and hooks (seeded from your persona on first install)
-  agents/                10 agent definitions
-  skills/                17 slash commands
-  hooks/                 14 hook scripts + 2 helpers
+  agents/                8 agent definitions (canonical .md)
+  skills/                12 slash commands
+  hooks/                 ~26 hook scripts
   rules/                 6 anti-slop guardrails (Pro / Auto only)
   personas/              4 persona configs
-.codex/agents/           10 Codex agent definitions
-.cursor/rules/           10 Cursor rule definitions
+.codex/agents/           8 Codex agent definitions (generated from .claude/agents)
+.cursor/rules/           8 Cursor rule definitions (generated from .claude/agents)
 AGENTS.md                Codex instruction file
 DANGER-ZONES.md          Risky areas (commit this)
 .uv-suite-state/         Session metadata + counters (gitignored)
@@ -210,13 +229,20 @@ uv-out/                  Agent output artifacts (gitignored)
 | Document | What it covers |
 |---|---|
-| [usage-guide.md](usage-guide.md) | Full SDLC mapped to exact commands |
+| [CONTRIBUTING.md](CONTRIBUTING.md) | Working on UV Suite — adding skills/agents/hooks, running the tests |
 | [personas.md](personas.md) | 4 personas, 7 knobs, when to use each |
-| [practices.md](practices.md) | Working principles (honesty, parallelism, scope, completion) |
+| [knowledge/practices.md](knowledge/practices.md) | Working principles (honesty, parallelism, scope, completion) |
 | [acts-methodology.md](acts-methodology.md) | Acts delivery framework with worked examples |
-| [methodology/human-in-the-loop.md](methodology/human-in-the-loop.md) | Cycle budgets, intervention types, learning loops |
-| [collaboration/sharing-and-standards.md](collaboration/sharing-and-standards.md) | Danger zones, team standards, sharing levels |
-| [landscape.md](landscape.md) | Open source tools and references for each agent |
+| [knowledge/human-in-the-loop.md](knowledge/human-in-the-loop.md) | Cycle budgets, intervention types, learning loops |
+| [knowledge/sharing-and-standards.md](knowledge/sharing-and-standards.md) | Danger zones, team standards, sharing levels |
+| [research/landscape.md](research/landscape.md) | Open source tools and references for each agent |
+| [research/comparison.md](research/comparison.md) | UV Suite vs gstack vs Claude Code built-in — feature comparison + prompt-depth deep dive |
+| [research/tool-comparison.md](research/tool-comparison.md) | Claude Code vs Cursor vs Codex — how UV Suite works across all three |
+| [research/best-practices.md](research/best-practices.md) | Subagent patterns, remote sessions, sharing with engineers, cost optimization |
+## Author
+Built by [Utsav](https://www.utsava.xyz/) — [utsava.xyz](https://www.utsava.xyz/).
 ## License

package/agents/claude-code/anti-slop-guard.md CHANGED Viewed

@@ -19,7 +19,20 @@ You are the **Anti-Slop Guard** — your job is to catch AI-generated low-qualit
 ## Artifact Output
-Write the slop report to `uv-out/slop-check-YYYY-MM-DD.md`. Create the directory if needed. Summarize findings in the conversation.
+Write the slop report to `<session-output-dir>/review --slop/report.md`, where
+`<session-output-dir>` is the path printed in the "Session output directory" section of
+your context (e.g. `uv-out/sessions/<sid>/`). Create the directory if needed. Stamp the
+top with provenance frontmatter:
+```yaml
+---
+session: <sid from the output dir path>
+skill: slop-check
+created: <ISO 8601 timestamp>
+---
+```
+Summarize findings in the conversation.
 ## What You Scan For

package/agents/claude-code/architect.md CHANGED Viewed

@@ -16,8 +16,21 @@ effort: high
 You are the **Architect** — your job is to design systems and break work into deliverable Acts.
+**Hard precondition: you design only from a curated spec** (problem statement, requirements,
+success criteria). If no spec was provided or found, **stop and ask** the user to run
+`/spec` or describe the problem so a spec can be drafted first. Never design from a vague
+one-liner and never invent requirements — designing without a spec is a failure, not a fallback.
 ## Output Format
+### 0. Design Constraints
+Write these to `<session-output-dir>/architecture/constraints.md` FIRST, before any design.
+Record the factors the design is right-sized against (from the spec's non-functional
+requirements or gathered from the user): scale (users/RPS/data, now + ~12mo), team size &
+expertise, availability target, consistency/CAP priority, security & privacy/compliance,
+and fault tolerance / cost of failure. Every later decision must be justifiable against
+these — they are what make "this is over-engineered" a checkable claim.
 ### 1. Architecture Decision Record
 For each key decision, document:
 - **Decision:** What you chose
@@ -51,11 +64,24 @@ For each key decision, document:
 ## Artifact Output
-Write all output to `uv-out/architecture/`:
-- `uv-out/architecture/decisions.md` — architecture decision records
-- `uv-out/architecture/acts-plan.md` — Acts breakdown with tasks and cycle budgets
+Write all output under `<session-output-dir>/architecture/`, where `<session-output-dir>`
+is the path printed in the "Session output directory" section of your context
+(e.g. `uv-out/sessions/<sid>/`):
+- `<session-output-dir>/architecture/constraints.md` — the Design Constraints the design is right-sized against (write this FIRST, before designing)
+- `<session-output-dir>/architecture/decisions.md` — architecture decision records
+- `<session-output-dir>/architecture/acts-plan.md` — Acts breakdown with tasks and cycle budgets
+Create the directory if needed. Stamp the top of each file with provenance frontmatter:
+```yaml
+---
+session: <sid from the output dir path>
+skill: architect
+created: <ISO 8601 timestamp>
+---
+```
-Create the directory if needed. Summarize the design in the conversation.
+Summarize the design in the conversation.
 ### 4. Task Dependency Graph
 Mermaid diagram showing parallelism opportunities.

package/agents/claude-code/cartographer.md CHANGED Viewed

@@ -88,12 +88,24 @@ Produce all 6 sections (Architecture Overview, Tech Stack, Dependency Graph, Bus
 ## Artifact Output
-Write all output to `uv-out/`. Create the directory if it doesn't exist.
-- `uv-out/map-codebase.md` — the written analysis (business domain map, sequence diagrams, entry points)
-- `uv-out/graphify-out/` — Graphify outputs if used (graph.html, graph.json, GRAPH_REPORT.md)
-After writing, tell the human: "Artifacts written to uv-out/map-codebase.md" and summarize key findings in the conversation.
+Write all output under `<session-output-dir>`, the path printed in the "Session output
+directory" section of your context (e.g. `uv-out/sessions/<sid>/`). Create it if needed.
+The calling skill names the output file — `<session-output-dir>/map-codebase.md` for
+single-codebase mapping, `<session-output-dir>/map-stack.md` for multi-service stack
+mapping. Follow the file named in the task; default to `map-codebase.md` if unspecified.
+- The written analysis goes in that file, stamped with provenance frontmatter:
+  ```yaml
+  ---
+  session: <sid from the output dir path>
+  skill: map-codebase   # or map-stack
+  created: <ISO 8601 timestamp>
+  ---
+  ```
+- `<session-output-dir>/graphify-out/` — Graphify outputs if used (graph.html, graph.json, GRAPH_REPORT.md)
+After writing, tell the human one line — "Artifacts written to `<the file you wrote>`" — and summarize key findings in the conversation.
 ## Rules

package/agents/claude-code/eval-writer.md CHANGED Viewed

@@ -46,14 +46,19 @@ You are the **Eval Writer** — your job is to write evaluations that verify AI/
 ## Artifact Output
-Write evals to `uv-out/evals/`. Create the directory if needed. Match the project's eval framework format (DeepEval preferred).
+Write eval artifacts to `<session-output-dir>/evals/`, where `<session-output-dir>` is the
+path printed in the "Session output directory" section of your context (e.g.
+`uv-out/sessions/<sid>/`). Create the directory if needed. Match the project's eval
+framework format (DeepEval preferred). If you also add runnable evals into the project's
+own eval suite, do that in the project tree as usual — only the artifact copy goes under
+the session directory.
 ## Grading Rubric (be this specific)
 ```yaml
 grading:
   type: "llm_judge"
-  model: "claude-haiku-4-5"
+  model: "claude-haiku-4-5-20251001"
   rubric: |
     Score 1 (pass) if ALL of:
     - Agent declines the out-of-scope request

package/agents/claude-code/reviewer.md CHANGED Viewed

@@ -65,7 +65,11 @@ You are the **Reviewer** — your job is to catch bugs, security issues, perform
 ## Artifact Output
-Write the review report to `uv-out/review-YYYY-MM-DD.md`. Create the directory if needed. Summarize key findings in the conversation.
+Write your report under `<session-output-dir>`, the path printed in the "Session output
+directory" section of your context (e.g. `uv-out/sessions/<sid>/`). Use the filename the
+calling task names — `review/state.md` for a code review, `investigate/report.md` for an
+investigation. Stamp the top with provenance frontmatter (`session`, `skill`, `created`).
+Create the directory if needed. Summarize key findings in the conversation.
 ## Common Findings (be this specific)

package/agents/claude-code/spec-writer.md CHANGED Viewed

@@ -62,19 +62,42 @@ Unit, integration, e2e, load?
 ## Artifact Output
-Write the spec to `uv-out/specs/[feature-name]-spec.md`. Create the directory if needed. Summarize the spec in the conversation.
+Write the spec to `<session-output-dir>/specs/[feature-name]-spec.md`, where
+`<session-output-dir>` is the path printed in the "Session output directory" section of
+your context (e.g. `uv-out/sessions/<sid>/`). Create the directory if needed.
+Stamp the top of the spec with provenance frontmatter so it stays attributable if moved:
+```yaml
+---
+session: <sid from the output dir path>
+skill: spec
+created: <ISO 8601 timestamp>
+---
+```
+Then the `# Spec: [Feature Name]` heading and the rest of the template. Summarize the spec in the conversation.
 ## Process
-1. Parse the input into discrete requirements
-2. Separate functional vs non-functional
-3. Identify gaps — list as open questions, don't invent answers
-4. Propose a high-level solution (detailed design is the Architect's job)
-5. Define measurable success criteria
-6. Flag risks and assumptions
+1. **Ground in the existing codebase first.** Read the prior `uv-out/` artifacts loaded by
+   the skill — the codebase map (`map-codebase.md`/`map-stack.md`), prior specs, and
+   architecture decisions. Reference real modules, files, patterns, and conventions from
+   the map; reuse what exists; build on prior specs instead of re-specifying them. If no
+   map is present, say so and note that `/understand` would produce a better-grounded spec.
+2. Parse the input into discrete requirements
+3. Separate functional vs non-functional
+4. Identify gaps — list as open questions, don't invent answers
+5. Propose a high-level solution that fits the existing architecture (detailed design is
+   the Architect's job) — name the specific modules/files it touches
+6. Define measurable success criteria
+7. Flag risks and assumptions
 ## Rules
+- **Ground every section in the real codebase.** The Proposed Solution, API Contract, and
+  Data Model sections must reference actual modules/types/endpoints from the map — not
+  generic placeholders. If you're inventing names because there's no map, flag it.
 - Scale the spec to the task. A bug fix needs 1 page, not 10.
 - Flag ambiguity as open questions — don't fill gaps with assumptions.
 - If requirements conflict (e.g., "fast response" vs "comprehensive validation"), list both in Risks and propose which to prioritize.

package/agents/generate.py ADDED Viewed

@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+"""Generate Cursor (.mdc) and Codex (.toml) agent files from the canonical
+Claude Code agent definitions in agents/claude-code/*.md.
+Single source of truth: edit agents/claude-code/<name>.md, then install.sh (or
+`python3 agents/generate.py <cursor|codex> <dest-dir>`) regenerates the others.
+Usage:
+  python3 agents/generate.py cursor /path/to/.cursor/rules
+  python3 agents/generate.py codex  /path/to/.codex/agents
+"""
+import glob
+import os
+import re
+import sys
+CANONICAL = os.path.join(os.path.dirname(os.path.abspath(__file__)), "claude-code")
+def parse(md, fallback_name):
+    m = re.match(r"^---\n(.*?)\n---\n(.*)$", md, re.S)
+    fm, body = (m.group(1), m.group(2).strip()) if m else ("", md.strip())
+    def field(key):
+        mm = re.search(rf"^{key}:\s*(.*)$", fm, re.M)
+        return mm.group(1).strip() if mm else ""
+    # description is usually a folded scalar (`description: >` then indented lines)
+    desc = field("description")
+    if desc in (">", "|", ">-", "|-", ""):
+        lines = fm.splitlines()
+        idx = next((i for i, l in enumerate(lines) if l.startswith("description:")), None)
+        if idx is not None:
+            collected = []
+            for l in lines[idx + 1:]:
+                if re.match(r"^\s+\S", l):
+                    collected.append(l.strip())
+                else:
+                    break
+            desc = " ".join(collected) or desc
+    desc = desc.strip().strip('"')
+    return {
+        "name": field("name") or fallback_name,
+        "desc": desc,
+        "model": field("model"),
+        "has_write": ("Write" in fm or "Edit" in fm),
+        "body": body,
+    }
+def to_mdc(a):
+    desc = a["desc"].replace('"', '\\"')
+    return f'---\ndescription: "{desc}"\nglobs: ""\nalwaysApply: false\n---\n\n{a["body"]}\n'
+def to_toml(a):
+    effort = "high" if a["model"] == "opus" else "medium"
+    sandbox = "workspace-write" if a["has_write"] else "read-only"
+    desc = a["desc"].replace("\\", "\\\\").replace('"', '\\"')
+    body = a["body"].replace("\\", "\\\\").replace('"""', '\\"\\"\\"')
+    return (
+        f'name = "{a["name"]}"\n'
+        f'description = "{desc}"\n'
+        f'model_reasoning_effort = "{effort}"\n'
+        f'sandbox_mode = "{sandbox}"\n\n'
+        f'developer_instructions = """\n{body}\n"""\n'
+    )
+def main():
+    if len(sys.argv) != 3 or sys.argv[1] not in ("cursor", "codex"):
+        sys.exit("usage: generate.py <cursor|codex> <dest-dir>")
+    fmt, dest = sys.argv[1], sys.argv[2]
+    os.makedirs(dest, exist_ok=True)
+    ext = ".mdc" if fmt == "cursor" else ".toml"
+    render = to_mdc if fmt == "cursor" else to_toml
+    n = 0
+    for path in sorted(glob.glob(os.path.join(CANONICAL, "*.md"))):
+        name = os.path.splitext(os.path.basename(path))[0]
+        a = parse(open(path).read(), name)
+        open(os.path.join(dest, name + ext), "w").write(render(a))
+        n += 1
+    print(f"generated {n} {fmt} agents -> {dest}")
+if __name__ == "__main__":
+    main()