claude-dev-env 1.36.2 → 1.37.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/_shared/pr-loop/scripts/config/preflight_constants.py +29 -8
  2. package/_shared/pr-loop/scripts/preflight.py +242 -20
  3. package/_shared/pr-loop/scripts/tests/test_preflight.py +362 -25
  4. package/_shared/pr-loop/scripts/tests/test_preflight_constants.py +9 -14
  5. package/hooks/blocking/code_rules_enforcer.py +269 -23
  6. package/hooks/blocking/test_code_rules_enforcer_unused_imports.py +157 -1
  7. package/hooks/config/test_unused_module_import_constants.py +48 -0
  8. package/hooks/config/unused_module_import_constants.py +41 -0
  9. package/package.json +1 -1
  10. package/rules/gh-paginate.md +4 -50
  11. package/rules/no-historical-clutter.md +36 -0
  12. package/skills/bg-agent/SKILL.md +69 -0
  13. package/skills/bugteam/CONSTRAINTS.md +10 -19
  14. package/skills/bugteam/PROMPTS.md +21 -14
  15. package/skills/bugteam/SKILL.md +122 -208
  16. package/skills/bugteam/SKILL_EVALS.md +75 -114
  17. package/skills/bugteam/reference/README.md +2 -4
  18. package/skills/bugteam/reference/audit-and-teammates.md +21 -48
  19. package/skills/bugteam/reference/audit-contract.md +7 -7
  20. package/skills/bugteam/reference/design-rationale.md +3 -8
  21. package/skills/bugteam/reference/team-setup.md +11 -19
  22. package/skills/bugteam/reference/teardown-publish-permissions.md +2 -14
  23. package/skills/bugteam/scripts/config/__init__.py +0 -0
  24. package/skills/bugteam/scripts/config/reflow_skill_md_constants.py +12 -0
  25. package/skills/bugteam/scripts/reflow_skill_md.py +51 -47
  26. package/skills/bugteam/sources.md +1 -25
  27. package/skills/bugteam/test_skill_additions.py +4 -13
  28. package/skills/fresh-branch/SKILL.md +71 -0
  29. package/skills/gotcha/SKILL.md +73 -0
  30. package/skills/monitor-open-prs/SKILL.md +4 -37
  31. package/skills/monitor-open-prs/test_skill_contract.py +0 -5
  32. package/skills/pr-converge/SKILL.md +60 -1298
  33. package/skills/pr-converge/reference/convergence-gates.md +122 -0
  34. package/skills/pr-converge/reference/examples.md +76 -0
  35. package/skills/pr-converge/reference/fix-protocol.md +56 -0
  36. package/skills/pr-converge/reference/ground-rules.md +13 -0
  37. package/skills/pr-converge/reference/multi-pr-orchestration.md +204 -0
  38. package/skills/pr-converge/reference/per-tick.md +204 -0
  39. package/skills/pr-converge/reference/state-schema.md +19 -0
  40. package/skills/pr-converge/reference/stop-conditions.md +26 -0
  41. package/skills/pr-converge/scripts/README.md +36 -9
  42. package/skills/pr-converge/scripts/check_pr_mergeability.py +1 -2
  43. package/skills/pr-converge/scripts/config/pr_converge_constants.py +74 -5
  44. package/skills/pr-converge/scripts/config/reflow_skill_md_constants.py +13 -0
  45. package/skills/pr-converge/scripts/config/test_pr_converge_constants.py +0 -24
  46. package/skills/pr-converge/scripts/cursor-agents-continue.ahk +22 -2
  47. package/skills/pr-converge/scripts/fetch_bugbot_inline_comments.py +19 -59
  48. package/skills/pr-converge/scripts/fetch_bugbot_reviews.py +15 -61
  49. package/skills/pr-converge/scripts/fetch_claude_inline_comments.py +70 -0
  50. package/skills/pr-converge/scripts/fetch_claude_reviews.py +61 -0
  51. package/skills/pr-converge/scripts/fetch_copilot_inline_comments.py +19 -61
  52. package/skills/pr-converge/scripts/fetch_copilot_reviews.py +14 -74
  53. package/skills/pr-converge/scripts/reflow_skill_md.py +71 -50
  54. package/skills/pr-converge/scripts/reviewer_fetch_core.py +153 -0
  55. package/skills/pr-converge/scripts/reviewer_specs.py +98 -0
  56. package/skills/pr-converge/scripts/test_cursor_agents_continue.py +65 -0
  57. package/skills/pr-converge/scripts/test_fetch_bugbot_inline_comments.py +107 -6
  58. package/skills/pr-converge/scripts/test_fetch_bugbot_reviews.py +85 -6
  59. package/skills/pr-converge/scripts/test_fetch_claude_inline_comments.py +485 -0
  60. package/skills/pr-converge/scripts/test_fetch_claude_reviews.py +368 -0
  61. package/skills/pr-converge/scripts/test_fetch_copilot_inline_comments.py +74 -6
  62. package/skills/pr-converge/scripts/test_fetch_copilot_reviews.py +94 -8
  63. package/skills/pr-converge/scripts/test_reflow_skill_md.py +162 -0
  64. package/skills/pr-converge/scripts/test_reviewer_fetch_core.py +448 -0
  65. package/skills/pr-converge/scripts/test_reviewer_specs.py +107 -0
  66. package/skills/pr-converge/scripts/test_view_pr_context.py +44 -0
  67. package/skills/pr-converge/scripts/view_pr_context.py +35 -4
  68. package/skills/pr-converge/workflows/schedule-wakeup-loop.md +24 -22
  69. package/skills/bugteam/reference/workflow-path-a-orchestrated-teams.md +0 -113
  70. package/skills/bugteam/reference/workflow-path-b-task-harness.md +0 -48
  71. package/skills/bugteam/test_team_lifecycle.py +0 -103
  72. package/skills/monitor-open-prs/test_team_lifecycle.py +0 -46
  73. package/skills/pr-converge/scripts/open_followup_copilot_pr.py +0 -136
  74. package/skills/pr-converge/scripts/test_open_followup_copilot_pr.py +0 -236
  75. package/skills/pr-converge/test_team_lifecycle.py +0 -56
  76. package/skills/pr-converge/workflows/ahk-auto-continue-loop.md +0 -108
@@ -1,6 +1,6 @@
1
1
  # gh API Pagination Rule
2
2
 
3
- **Root cause:** The GitHub REST API returns 30 items per page by default. `gh api repos/<owner>/<repo>/pulls/<number>/reviews` and `gh api repos/<owner>/<repo>/pulls/<number>/comments` silently truncate at 30 results without warning. PRs that have accumulated more than 30 reviews or inline comments — common on long PR-loop cycles where bugbot, copilot, or the in-house bugteam each post repeatedly — return only the **oldest** 30, hiding the most recent reviews and findings entirely. A `sort_by(.submitted_at) | last` (or `| reverse`) on a truncated array picks the latest entry **within the first 30**, not the actual latest, which produces a stale-but-confident report that then drives wrong decisions (e.g., re-triggering bugbot when it has already posted a CLEAN review on a later page).
3
+ **Root cause:** GitHub REST API list endpoints paginate by default. Without `--paginate --slurp`, callers see only the oldest page, and cross-page jq operations (e.g., `sort_by | last`) operate within a single page producing wrong-but-confident results.
4
4
 
5
5
  **Rule:** All `gh api` calls that read `pulls/<number>/reviews`, `pulls/<number>/comments`, `issues/<number>/comments`, or any other paginated GitHub list endpoint **must** request the full set of pages AND apply any cross-page jq operation through external `jq`, not through `gh`'s built-in `--jq`. Use `--paginate --slurp | jq` (preferred — see [Safe patterns](#safe-patterns)). Never call these endpoints with their default pagination, and never use `gh`'s `--jq` for cross-page operations like `sort_by | last` or `| reverse | .[0]`.
6
6
 
@@ -8,8 +8,8 @@
8
8
 
9
9
  This rule guards against two distinct silent-truncation defects that compound:
10
10
 
11
- 1. **Default 30-item page.** Without `--paginate`, only the first page is fetched. On long PRs this hides the most recent reviews entirely.
12
- 2. **`--jq` runs per-page, not on the concatenated result.** Per [GitHub CLI #10459](https://github.com/cli/cli/issues/10459), `gh api --paginate --jq '<filter>'` applies `<filter>` to each page **separately** and emits one output per page. Cross-page operations like `sort_by(.submitted_at) | last` therefore operate within each page independently, not across the merged result set. On PRs with more than 100 reviews this still produces a wrong-but-confident "latest" review even when `--paginate` is set.
11
+ 1. **Default page truncation.** Without `--paginate`, only the first page is fetched.
12
+ 2. **`--jq` runs per-page, not on the concatenated result.** Per [GitHub CLI #10459](https://github.com/cli/cli/issues/10459), `gh api --paginate --jq '<filter>'` applies `<filter>` to each page **separately** and emits one output per page. Cross-page operations like `sort_by(.submitted_at) | last` therefore operate within each page independently, not across the merged result set.
13
13
 
14
14
  The safe patterns below fix both defects together: `--paginate --slurp` walks every page AND emits a single merged structure, and an **external** `jq` then runs cross-page operations on that merged structure.
15
15
 
@@ -39,7 +39,7 @@ gh api 'repos/<owner>/<repo>/pulls/<number>/reviews?per_page=100' --paginate --s
39
39
  | jq '[.[][] | select(.user.login=="cursor[bot]")] | sort_by(.submitted_at) | last'
40
40
  ```
41
41
 
42
- The `.[][]` flattens the array-of-pages into one stream of items before the cross-page operators (`sort_by`, `last`, `reverse`) run. Combine with `?per_page=100` so each page fetches 100 items instead of 30, reducing round-trips on long PRs without changing correctness.
42
+ The `.[][]` flattens the array-of-pages into one stream of items before the cross-page operators (`sort_by`, `last`, `reverse`) run. Combine with `?per_page=100` to reduce round-trips on long PRs.
43
43
 
44
44
  `gh`'s `--jq` flag and `--slurp` flag are mutually exclusive (gh CLI rejects `--paginate --slurp --jq` with `the --slurp option is not supported with --jq or --template`), which is why the filter must run in an external `jq` invocation.
45
45
 
@@ -74,52 +74,6 @@ gh api 'repos/<owner>/<repo>/pulls/<number>/reviews?per_page=100' --paginate --s
74
74
 
75
75
  This is the canonical pattern for the bugbot ↔ bugteam convergence loop: walk newest-first, stop at the first clean review.
76
76
 
77
- ## What NOT to do
78
-
79
- ```bash
80
- # BAD — default 30-item page silently truncates on long PRs
81
- gh api repos/<owner>/<repo>/pulls/<number>/reviews \
82
- --jq '[.[] | select(.user.login=="cursor[bot]")] | sort_by(.submitted_at) | last'
83
-
84
- # BAD — `?per_page=100` alone caps at 100 items; PRs with 100+ reviews still truncate
85
- gh api 'repos/<owner>/<repo>/pulls/<number>/reviews?per_page=100' \
86
- --jq '[.[] | select(.user.login=="cursor[bot]")] | sort_by(.submitted_at) | last'
87
-
88
- # BAD — --paginate fetches every page, but `--jq` runs PER-PAGE (gh CLI #10459).
89
- # `sort_by(.submitted_at) | last` operates within each page independently and
90
- # emits one "latest" per page, not the actual latest across the full result set.
91
- gh api 'repos/<owner>/<repo>/pulls/<number>/reviews?per_page=100' --paginate \
92
- --jq '[.[] | select(.user.login=="cursor[bot]")] | sort_by(.submitted_at) | last'
93
-
94
- # BAD — taking `| last` on an unpaginated read returns the latest of the first 30,
95
- # not the actual latest. Same defect for `| reverse | .[0]`.
96
- ```
97
-
98
- ## Why both defects matter
99
-
100
- `gh api`'s default page is the FIRST page of results, ordered oldest-to-newest by the GitHub API. When the result set exceeds 30 items, page 1 contains the OLDEST 30 — not the newest. A jq `| last` after `sort_by(.submitted_at)` picks the latest entry within those 30 oldest items, producing output that looks correct but reports a state from days or weeks ago.
101
-
102
- `--paginate` alone does NOT fix this when paired with `--jq`: gh applies the jq filter to each page separately and emits one result per page. A consumer reading "the last line of output" still gets the latest within a single page, not the latest across all pages. The skill that consumes this output then makes decisions (re-trigger bugbot, mark a finding stale, report convergence) against an obsolete view of the PR.
103
-
104
- `--paginate --slurp | jq` fixes both defects: every page is fetched, every page is merged into one structure before any jq operator runs, and cross-page operations see the full result set.
105
-
106
- ## Consumers
107
-
108
- Skills and scripts in this repo that read paginated endpoints and must therefore use `--paginate --slurp` plus external `jq`:
109
-
110
- - `pr-converge` — bugbot review walk (BUGBOT phase, Step 2.a) and inline-comments fetch (Step 2.b).
111
- - `bugteam` — review threads, inline comments, audit-loop history.
112
- - `qbug` — same as bugteam, scoped to a single subagent loop.
113
- - `pr-review-responder` — review comments fetch (already enforced; this rule extends the same constraint to reviews and other endpoints).
114
- - `monitor-many` — open-PR enumeration and per-PR review/comment scans.
115
- - `babysit-pr` — review-comment polling.
116
-
117
- Updating any of these to read paginated endpoints requires `--paginate --slurp` plus external `jq` (or a documented single-page bound on a small list).
118
-
119
77
  ## Enforcement
120
78
 
121
79
  This rule is documentation-only at present. A future PreToolUse hook may pattern-match `Bash` invocations of `gh api repos/.../pulls/<n>/(reviews|comments)` without `--paginate --slurp` (or with `--paginate --jq` doing cross-page operations) and return a corrective message. Until that hook lands, treat this rule as binding by review and rely on it during skill authoring.
122
-
123
- ## Precedent
124
-
125
- The `pr-review-responder` skill predated this rule and forbids default pagination on `pulls/<n>/comments` reads (`packages/claude-dev-env/skills/pr-review-responder/SKILL.md` Rule 1). This file generalizes that constraint to every paginated GitHub endpoint, adds the `--jq` per-page defect (gh CLI #10459) discovered while reviewing this rule, and centralizes the safe patterns so additional skills inherit the rule by reference instead of restating it.
@@ -0,0 +1,36 @@
1
+ ---
2
+ paths: **/*.md
3
+ ---
4
+
5
+ # No Historical Clutter in Documentation
6
+
7
+ **When this applies:** Any Write or Edit to `.md` files.
8
+
9
+ ## Rule
10
+
11
+ Never reference removed implementations, old defaults, prior behaviors, or how something "used to be" when updating documentation. The current state is all that matters.
12
+
13
+ ## Examples of prohibited patterns
14
+
15
+ | Pattern | Why it's clutter |
16
+ |---------|-----------------|
17
+ | "instead of 30" in a pagination rule | The old default no longer exists in code; the rule reader doesn't need to know what it was |
18
+ | "previously this used X" | If X is gone, it's noise |
19
+ | "before this rule, we did Y" | The rule exists now; the before-state is irrelevant |
20
+ | "migrated from Z to W" | If Z is fully removed, the migration story is git history, not documentation |
21
+ | "the old implementation did A" | If A is gone, the reader gains nothing from knowing it existed |
22
+ | "originally" / "used to be" | Same — dead context |
23
+
24
+ ## What IS allowed
25
+
26
+ - Comparisons to *currently existing* alternatives (e.g., "use `--paginate --slurp | jq`, not `--jq` alone")
27
+ - Rationale that explains *why* a pattern is wrong in terms of present behavior (e.g., "`--jq` runs per-page, so cross-page operations produce wrong results")
28
+ - References to external sources for defects that still exist (e.g., gh CLI #10459)
29
+
30
+ ## The test
31
+
32
+ After writing documentation, ask: **"If someone reads this a year from now, with no knowledge of what came before, does every sentence still make sense and add value?"** If a sentence only adds value to someone who knew the old state, delete it.
33
+
34
+ ## Why
35
+
36
+ Historical references clog context windows and force readers to mentally filter "what was" from "what is." The git log is the authoritative record of what changed and why. Documentation describes the current contract.
@@ -0,0 +1,69 @@
1
+ ---
2
+ name: bg-agent
3
+ description: Delegates a task to a background agent. Invoked as "bg-agent [task to do]". Claude picks a suitable agent type from the available agents list and spawns it via Agent with run_in_background: true. Triggers on "/bg-agent", "bg-agent", "background agent for this".
4
+ ---
5
+
6
+ # bg-agent
7
+
8
+ ## Overview
9
+
10
+ Delegates a task to a background agent so the main session can continue without waiting. This is the programmatic invocation path for background work — other skills (e.g. gotcha) and the user can both invoke it.
11
+
12
+ **Announce at start:** "Delegating to a background agent: `<one-line summary of task>`."
13
+
14
+ ## Instructions
15
+
16
+ ### Step 1 — Parse the task
17
+
18
+ The user (or calling skill) provides a task description after `bg-agent`. Example:
19
+
20
+ ```
21
+ bg-agent add a gotcha to the rebase skill about force-push lease format
22
+ ```
23
+
24
+ Extract the full task description from the arguments.
25
+
26
+ ### Step 2 — Select the right agent
27
+
28
+ Review the available agent types (listed in the system prompt's Agent tool description) and pick the most suitable one for the task:
29
+
30
+ - **Read-only tasks** (research, search, exploring code) → Explore agent or general-purpose agent.
31
+ - **Code authoring tasks** (writing/editing skill files, creating PRs) → general-purpose agent with `run_in_background: true`.
32
+ - **Specialized tasks** → pick the agent whose description best matches the task. For example, use `pr-description-writer` for PR descriptions, `git-commit-crafter` for commits.
33
+
34
+ If no specialized agent fits, use the general-purpose agent.
35
+
36
+ ### Step 3 — Spawn the background agent
37
+
38
+ Use the `Agent` tool with `run_in_background: true`. Write a self-contained prompt that:
39
+
40
+ - States the exact goal and expected output.
41
+ - Lists the files or directories involved (from the caller's context).
42
+ - Includes any constraints (do not create a PR, do not push, etc.).
43
+ - Specifies what success looks like.
44
+
45
+ Example for a gotcha-adding task:
46
+
47
+ ```
48
+ Agent({
49
+ description: "Add gotcha to skill file",
50
+ prompt: "Add a gotcha entry to packages/claude-dev-env/skills/rebase/SKILL.md. The gotcha is: 'force-push --force-with-lease requires the full <branch>:<sha> format, not just the branch name.' Add it under the ## Gotchas section. If no ## Gotchas section exists, create one at the bottom of the file.",
51
+ subagent_type: "general-purpose",
52
+ run_in_background: true
53
+ })
54
+ ```
55
+
56
+ ### Step 4 — Report spawn
57
+
58
+ Confirm the agent was spawned and state its task in one sentence. The caller does not need to wait for completion — background agents notify on completion automatically.
59
+
60
+ ## Constraints
61
+
62
+ - Always use `run_in_background: true`. This skill is specifically for background delegation.
63
+ - Never run the task inline in the main session. The point is to offload it.
64
+ - If the task requires a PR, the spawned agent handles the full flow (branch → commit → push → PR).
65
+ - Return control to the caller immediately after spawning. Do not poll for completion.
66
+
67
+ ## Gotchas
68
+
69
+ See the gotcha reference at the bottom of this file. When a new gotcha is discovered during use, invoke `/gotcha` to add it here.
@@ -1,35 +1,26 @@
1
1
  # Bugteam — invariants and design rationale
2
2
 
3
- ## Path A vs Path B
4
-
5
- **Path A** (`CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1`): the constraints below apply as written — `TeamCreate`, isolated teammate sessions, lead-only `TeamDelete`. **Path B** (Task harness): read [`reference/workflow-path-b-task-harness.md`](reference/workflow-path-b-task-harness.md) for harness-only steps; **agent types** (`code-quality-agent`, `clean-coder`), **models**, **one commit per fix**, **gate-before-AUDIT**, **10-loop cap**, and **outcome XML** remain identical to `SKILL.md`. Path B intentionally uses **`Task`** from the lead instead of teammate isolation — see that file **Clean-room note**.
6
-
7
3
  ## Constraints
8
4
 
9
- - **Path A agent teams required, not parallel subagents from the lead without `TeamCreate`.** On Path A, the skill MUST use Claude Code's agent teams feature (`CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1`). Spawning `code-quality-agent` and `clean-coder` as parallel **Agents** with `team_name` from the lead is the supported pattern. Spawning ad-hoc `generalPurpose` workers in place of those roles = fail. **Path B** does not use `TeamCreate`; it uses **`Task`** carrying the same Path A spawn contracts per [`reference/workflow-path-b-task-harness.md`](reference/workflow-path-b-task-harness.md) (AUDIT/FIX spawn; when the host rejects `subagent_type="clean-coder"`, apply **Path B Cursor `Task` registry**). Not a substitute for skipping `code-quality-agent` / `clean-coder` work.
10
- - **Path B — Cursor `Task` registry.** When the host `Task` tool rejects `subagent_type="clean-coder"`, Path B FIX MUST use `subagent_type: "generalPurpose"` plus the mandatory **Read** of `clean-coder.md` in the FIX prompt per [`reference/workflow-path-b-task-harness.md`](reference/workflow-path-b-task-harness.md) (FIX spawn, Cursor host split). This is the documented shim, not an ad-hoc `generalPurpose` bypass of the clean-coder contract.
11
- - **Path A — orchestrator-only `TeamCreate`.** Only the lead session (this session, when `/bugteam` is invoked) calls `TeamCreate`. Teammates never call `TeamCreate` — if a teammate's spawn prompt instructs it to, that is a skill defect. When additional parallel work is needed (e.g., parallel auditors from loop 4 onward, supplementary audit of adjacent files), the lead spawns additional teammates into the EXISTING team by passing the current `team_name` to every `Agent(...)` call. Multiple teammate "sets" live inside one team under one orchestrator. The runtime enforces this: `TeamCreate` called while the session already leads a team returns the error `Already leading team "<name>". A leader can only manage one team at a time. Use TeamDelete to end the current team before creating a new one.` — direct quote from the runtime's response when this invariant is violated. The Step 2 lifecycle resolution in [Team lifecycle](SKILL.md#team-lifecycle-path-a-only) parses this exact error in `auto` mode to attach to the existing team rather than fail. **Path B:** no `TeamCreate`; parallel work uses parallel **`Task`** calls per [`reference/workflow-path-b-task-harness.md`](reference/workflow-path-b-task-harness.md).
12
- - **One team per invocation, multi-PR supported.** All PRs in a single /bugteam invocation share one team created by the orchestrator. Per-PR identity lives in the teammate name prefix (`bugfind-pr<N>-loop<L>` / `bugfix-pr<N>-loop<L>`) and the `<team_temp_dir>/pr-<N>/` subfolder containing that PR's git worktree, diff patches, and outcome XML files.
5
+ - **One run per invocation, multi-PR supported.** All PRs in a single /bugteam invocation share one `run_temp_dir`. Per-PR identity lives in the subagent name prefix (`bugfind-pr<N>-loop<L>` / `bugfix-pr<N>-loop<L>`) and the `<run_temp_dir>/pr-<N>/` subfolder containing that PR's git worktree, diff patches, and outcome XML files.
13
6
  - **Grant before any spawn, revoke before any return.** Step 0 grants project `.claude/**` permissions; Step 5 revokes. Both are mandatory. Revoke runs on every exit path including error, cap-reached, and stuck.
14
- - **Fresh teammate per loop.** Both bugfind and bugfix are spawned new each loop and shut down after their action. Reusing a teammate across loops accumulates context inside that teammate's window — defeats clean-room.
7
+ - **Fresh subagent per loop.** Both bugfind and bugfix are spawned new each loop. Reusing a subagent across loops accumulates context inside that subagent's window — defeats clean-room.
15
8
  - **One up-front confirmation = whole cycle.** The `/bugteam` invocation authorizes the entire cycle; every subsequent decision runs on that single authorization.
16
9
  - **10-loop hard cap.** Counted as **AUDIT** completions (increment in Step 3). Standards-fix passes before an audit do not advance `loop_count`. Worst case includes extra clean-coder spawns for the code-rules gate.
17
10
  - **Code rules gate before every AUDIT.** Run `_shared/pr-loop/scripts/code_rules_gate.py` (resolved via `${CLAUDE_SKILL_DIR}/../../_shared/pr-loop/scripts/code_rules_gate.py`) until exit **0** before spawning **bugfind**. Same `validate_content` logic as `hooks/blocking/code_rules_enforcer.py`.
18
- - **Clean-room audits, every loop.** Each bugfind teammate's spawn prompt contains only the PR scope, audit rubric, and the current loop number. Prior loop history stays in the lead.
19
- - **Targeted fixes.** Each fix teammate sees ONLY the most recent audit's findings. Prior loops are invisible to the fix teammate.
20
- - **Opus 4.7 at xhigh effort for both teammates.** Both `Agent(...)` spawns pass `model="opus"`, which resolves to Opus 4.7 on the Anthropic API. Opus 4.7's default effort level in Claude Code is `xhigh` (https://code.claude.com/docs/en/model-config — *"On Opus 4.7, the default effort is `xhigh` for all plans and providers."*), so no `effort` override is needed at spawn time. Effort is set per-subagent in YAML frontmatter, not via the `Agent` tool's parameters; `code-quality-agent` and `clean-coder` rely on the model default. The trade vs Sonnet is higher per-loop cost in exchange for deeper audit recall and stronger fix correctness on bug-hunting work, which the per-PR loop economics tolerate (10-loop hard cap bounds total spend).
21
- - **Fix teammate receives the latest audit as its input contract.** Passing the audit's findings to the fix teammate is the input contract — each loop's fix run operates on the current audit's output and only that.
11
+ - **Clean-room audits, every loop.** Each bugfind subagent's spawn prompt contains only the PR scope, audit rubric, and the current loop number. Prior loop history stays in the lead.
12
+ - **Targeted fixes.** Each fix subagent sees ONLY the most recent audit's findings. Prior loops are invisible to the fix subagent.
13
+ - **Opus 4.7 at xhigh effort for validator and fix subagents.** Single-auditor mode, validator, and fix spawns pass `model="opus"`; parallel-auditor siblings (`-b` through `-k`) pass `model="haiku"`. Opus 4.7's default effort level in Claude Code is `xhigh` (https://code.claude.com/docs/en/model-config — *"On Opus 4.7, the default effort is `xhigh` for all plans and providers."*), so no `effort` override is needed at spawn time. Effort is set per-subagent in YAML frontmatter, not via the `Agent` tool's parameters; `code-quality-agent` and `clean-coder` rely on the model default. The trade vs Sonnet is higher per-loop cost in exchange for deeper audit recall and stronger fix correctness on bug-hunting work, which the per-PR loop economics tolerate (10-loop hard cap bounds total spend).
14
+ - **Fix subagent receives the latest audit as its input contract.** Passing the audit's findings to the fix subagent is the input contract — each loop's fix run operates on the current audit's output and only that.
22
15
  - **One commit per fix action.** Loops produce one commit per loop, not one per bug.
23
16
  - **Linear branch, fixed PR base.** Every loop appends one forward-only commit; existing commits and the PR base stay intact throughout the cycle.
24
- - **Lead-only cleanup, gated by `team_owned`.** Per the docs: *"Always use the lead to clean up. Teammates should not run cleanup because their team context may not resolve correctly, potentially leaving resources in an inconsistent state."* This session is the lead, and cleanup runs here only. Step 4 calls `TeamDelete` **only when `team_owned == true`** (this invocation called `TeamCreate` itself). When `team_owned == false` (lifecycle `attach`, or `auto` after the runtime's `Already leading team` fallback), the orchestrator that originally created the team owns teardown — see [Team lifecycle](SKILL.md#team-lifecycle-path-a-only).
25
- - **Orchestrators must use `attach` mode, not `owned`.** When `/bugteam` runs inside an orchestrator that is itself managing a long-lived team across PRs (`pr-converge` multi-PR mode, `monitor-open-prs`), the orchestrator passes `BUGTEAM_TEAM_LIFECYCLE=attach` and `BUGTEAM_TEAM_NAME=<existing>`. `owned` mode under such an orchestrator would either error out (the session already leads a team) or, worse, tear down the orchestrator's team mid-sweep on the first invocation's Step 4. `auto` is the safe default for ambiguous callers; `attach` is the explicit-orchestrator contract.
26
- - **Cleanup the per-team scoped temp directory on exit, gated by `team_owned`.** When `team_owned == true`, the resolved `<team_temp_dir>` is removed entirely so no loop patches leak between runs. When `team_owned == false`, only this invocation's per-PR subfolders (`<team_temp_dir>/pr-<N>/`) are removed; the orchestrator-owned parent stays so the next attached invocation can write its own per-PR subfolders without colliding.
27
- - **Cleanup all `.bugteam-*` files on exit.** `.bugteam-loop-*.patch`, `.bugteam-loop-*.outcomes.xml`, `.bugteam-final.diff`, `.bugteam-original-body.md`, `.bugteam-final-body.md`. Working directory ends clean.
28
- - **Audit/fix comment posting.** **Path A:** Bugfind posts ONE per-loop review (parent body + child finding comments in a single batched POST, with review-fallback to a top-level issue comment). Bugfix posts the fix replies after committing. All comment, review, and reply POSTs belong to the teammates; the lead's single PR-write action is the final description rewrite at Step 4.5. **Path B:** the **lead** performs the same POSTs after Task handoffs (`SKILL.md` Step 2.5 + [`reference/workflow-path-b-task-harness.md`](reference/workflow-path-b-task-harness.md) § Step 2.5).
17
+ - **Lead-only cleanup.** Cleanup runs in the lead (this session) only. Step 4 removes the full `<run_temp_dir>` so no loop patches leak between runs.
18
+ - **Cleanup all `.bugteam-*` files on exit.** The per-run `<run_temp_dir>` is removed entirely by Step 4, which covers `<run_temp_dir>/pr-<N>/loop-<L>.patch` and `<run_temp_dir>/pr-<N>/loop-<L>-<letter>.outcomes.xml`. The per-loop outcomes XML at `<worktree_path>/.bugteam-pr<N>-loop<L>.outcomes.xml` is removed with the worktree. Step 4.5 deletes `.bugteam-final.diff`, `.bugteam-original-body.md`, and `.bugteam-final-body.md`. Working directory ends clean.
19
+ - **Audit/fix comment posting.** The bugfind subagent posts ONE per-loop review (parent body + child finding comments in a single batched POST, with review-fallback to a top-level issue comment). The bugfix subagent posts the fix replies after committing. All comment, review, and reply POSTs belong to the subagents; the lead's single PR-write action is the final description rewrite at Step 4.5.
29
20
  - **Lead owns the final PR description rewrite only** (Step 4.5), and only via the `pr-description-writer` agent. The lead does not compose the description inline.
30
21
  - **One review per loop, findings as child comments of that review.** Each loop posts a single pull-request review whose body is the loop header and whose `comments[]` are the anchored findings. Each loop's review stands alone — one review created per loop, fully self-contained on the PR conversation.
31
22
  - **PR description rewrite on every exit.** Step 4.5 runs on `converged`, `cap reached`, and `stuck`. On `error`, the rewrite is best-effort; if it fails, surface the error in the final report and continue to revoke.
32
- - **Outcome XML, not JSON.** Both teammates write structured outcome data (findings or fix outcomes) to `.bugteam-loop-<N>.outcomes.xml`. The lead reads these files between actions. XML chosen for parser robustness against multi-line, special-character, and quoted reason fields.
23
+ - **Outcome XML, not JSON.** Both subagents write structured outcome data (findings or fix outcomes) to `.bugteam-pr<N>-loop<L>.outcomes.xml`. The lead reads these files between actions. XML chosen for parser robustness against multi-line, special-character, and quoted reason fields.
33
24
 
34
25
  ## Why this design
35
26
 
@@ -10,7 +10,7 @@ Keep the spawn prompt self-contained: reference only the PR scope, audit rubric,
10
10
  <branch>head ref</branch>
11
11
  <base_branch>base ref</base_branch>
12
12
  <pr_url>full URL</pr_url>
13
- <loop>N</loop>
13
+ <loop>L</loop>
14
14
  <pr_number>N</pr_number>
15
15
  <worktree_path>absolute path from Step 1 per-PR workspace</worktree_path>
16
16
  </context>
@@ -18,7 +18,7 @@ Keep the spawn prompt self-contained: reference only the PR scope, audit rubric,
18
18
  cd into `<worktree_path>` before any git, gh, or file operation.
19
19
 
20
20
  <scope>
21
- <diff_path>Absolute path to the per-PR patch file: <team_temp_dir>/pr-<N>/loop-<L>.patch (same path as gh pr diff redirect in AUDIT)</diff_path>
21
+ <diff_path>Absolute path to the per-PR patch file: <run_temp_dir>/pr-<N>/loop-<L>.patch (same path as gh pr diff redirect in AUDIT)</diff_path>
22
22
  <scope_rule>Audit only lines added or modified in the diff. Pre-existing code on untouched lines is out of scope.</scope_rule>
23
23
  </scope>
24
24
 
@@ -45,11 +45,17 @@ cd into `<worktree_path>` before any git, gh, or file operation.
45
45
  </constraints>
46
46
 
47
47
  <comment_posting>
48
+ Sibling auditors (-b through -k): run only steps 1–3 (audit, assign IDs,
49
+ capture excerpt, validate anchors), then write outcome XML per <output_format> and return.
50
+ Skip steps 4–8 — sibling auditors do not post PR reviews.
51
+
52
+ Validator (-a) and single-opus auditors: run all steps below.
53
+
48
54
  1. Audit the diff against the 10 categories above. Buffer the findings
49
55
  in memory; all posting happens at step 6 once anchors are validated.
50
- 2. Assign each finding a stable finding_id of exactly the form `loopN-K`
51
- where K is 1-based within this loop.
52
- 3. Validate every finding's (file, line) against the captured diff. Split
56
+ 2. Assign each finding a stable finding_id of exactly the form `loop<L>-<K>`
57
+ where <K> is 1-based within this loop.
58
+ 3. For each finding, capture a verbatim excerpt from the target file at the cited line. Populate the `<excerpt>` element in the outcome XML with it. Validate every finding's (file, line) against the captured diff. Split
53
59
  findings into two buckets: anchored (line is in the diff) and
54
60
  unanchored (line is not in the diff — goes into the review body's
55
61
  "Findings without a diff anchor" section per Step 2.5).
@@ -61,7 +67,7 @@ cd into `<worktree_path>` before any git, gh, or file operation.
61
67
  Category: <letter> (<category name>)
62
68
  <2-3 sentence description with concrete trace>
63
69
 
64
- _From /bugteam audit loop N._
70
+ _From /bugteam audit loop <L>._
65
71
 
66
72
  6. Post ONE review via Step 2.5's per-loop review CLI shape. Harvest the
67
73
  parent review `html_url` from the response JSON and the `comments[]`
@@ -76,17 +82,17 @@ cd into `<worktree_path>` before any git, gh, or file operation.
76
82
  </comment_posting>
77
83
 
78
84
  <output_format>
79
- Write the outcome XML below to .bugteam-pr<N>-loop<L>.outcomes.xml inside
80
- the PR's worktree directory (<worktree_path>). Return only that path on stdout. The schema:
85
+ For the (-a) validator: write the outcome XML below to .bugteam-pr<N>-loop<L>.outcomes.xml inside
86
+ the PR's worktree directory (<worktree_path>). For sibling auditors (-b through -k): write to <run_temp_dir>/pr-<N>/loop-<L>-<letter>.outcomes.xml (absolute path passed in prompt). Sibling auditors do not post PR reviews; set review_url, finding_comment_id, and finding_comment_url to empty strings, and used_fallback to "false". Omit unanchored findings from sibling output — only the validator handles those. Return only that path on stdout. The schema:
81
87
  </output_format>
82
88
  ```
83
89
 
84
90
  ## AUDIT outcome XML schema (bugfind writes this)
85
91
 
86
92
  ```xml
87
- <bugteam_audit loop="<N>" review_url="<url>">
93
+ <bugteam_audit loop="<L>" review_url="<url>">
88
94
  <finding
89
- finding_id="loop<N>-<index>"
95
+ finding_id="loop<L>-<K>"
90
96
  severity="P0|P1|P2"
91
97
  category="<letter>"
92
98
  file="<path>"
@@ -96,6 +102,7 @@ cd into `<worktree_path>` before any git, gh, or file operation.
96
102
  used_fallback="true|false"
97
103
  >
98
104
  <title>one-line title</title>
105
+ <excerpt>verbatim source line or snippet from the file at the cited line</excerpt>
99
106
  <description>2-3 sentence description with concrete trace</description>
100
107
  </finding>
101
108
  <verified_clean>
@@ -114,7 +121,7 @@ After the teammate writes the XML and returns, the lead reads `.bugteam-pr<N>-lo
114
121
  <branch>head</branch>
115
122
  <base_branch>base</base_branch>
116
123
  <pr_url>url</pr_url>
117
- <loop>N</loop>
124
+ <loop>L</loop>
118
125
  <pr_number>N</pr_number>
119
126
  <worktree_path>absolute path from Step 1 per-PR workspace</worktree_path>
120
127
  </context>
@@ -124,7 +131,7 @@ cd into `<worktree_path>` before any git, gh, or file operation.
124
131
  <bugs_to_fix>
125
132
  [for each P0/P1/P2 finding from last_findings:]
126
133
  <bug
127
- finding_id="loop<N>-<index>"
134
+ finding_id="loop<L>-<K>"
128
135
  severity="P0|P1|P2"
129
136
  file="<path>"
130
137
  line="<int>"
@@ -156,9 +163,9 @@ cd into `<worktree_path>` before any git, gh, or file operation.
156
163
  </execution>
157
164
 
158
165
  <outcome_xml_schema>
159
- <bugteam_fix loop="<N>" commit_sha="<sha or empty if no commit>">
166
+ <bugteam_fix loop="<L>" commit_sha="<sha or empty if no commit>">
160
167
  <outcome
161
- finding_id="loop<N>-<index>"
168
+ finding_id="loop<L>-<K>"
162
169
  status="fixed|could_not_address|hook_blocked"
163
170
  commit_sha="<sha if fixed, empty otherwise>"
164
171
  reply_comment_id="<id of the reply posted>"