claude-dev-env 1.36.2 → 1.37.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_shared/pr-loop/scripts/config/preflight_constants.py +29 -8
- package/_shared/pr-loop/scripts/preflight.py +242 -20
- package/_shared/pr-loop/scripts/tests/test_preflight.py +362 -25
- package/_shared/pr-loop/scripts/tests/test_preflight_constants.py +9 -14
- package/hooks/blocking/code_rules_enforcer.py +269 -23
- package/hooks/blocking/test_code_rules_enforcer_unused_imports.py +157 -1
- package/hooks/config/test_unused_module_import_constants.py +48 -0
- package/hooks/config/unused_module_import_constants.py +41 -0
- package/package.json +1 -1
- package/rules/gh-paginate.md +4 -50
- package/rules/no-historical-clutter.md +36 -0
- package/skills/bg-agent/SKILL.md +69 -0
- package/skills/bugteam/CONSTRAINTS.md +10 -19
- package/skills/bugteam/PROMPTS.md +21 -14
- package/skills/bugteam/SKILL.md +122 -208
- package/skills/bugteam/SKILL_EVALS.md +75 -114
- package/skills/bugteam/reference/README.md +2 -4
- package/skills/bugteam/reference/audit-and-teammates.md +21 -48
- package/skills/bugteam/reference/audit-contract.md +7 -7
- package/skills/bugteam/reference/design-rationale.md +3 -8
- package/skills/bugteam/reference/team-setup.md +11 -19
- package/skills/bugteam/reference/teardown-publish-permissions.md +2 -14
- package/skills/bugteam/scripts/config/__init__.py +0 -0
- package/skills/bugteam/scripts/config/reflow_skill_md_constants.py +12 -0
- package/skills/bugteam/scripts/reflow_skill_md.py +51 -47
- package/skills/bugteam/sources.md +1 -25
- package/skills/bugteam/test_skill_additions.py +4 -13
- package/skills/fresh-branch/SKILL.md +71 -0
- package/skills/gotcha/SKILL.md +73 -0
- package/skills/monitor-open-prs/SKILL.md +4 -37
- package/skills/monitor-open-prs/test_skill_contract.py +0 -5
- package/skills/pr-converge/SKILL.md +60 -1298
- package/skills/pr-converge/reference/convergence-gates.md +122 -0
- package/skills/pr-converge/reference/examples.md +76 -0
- package/skills/pr-converge/reference/fix-protocol.md +56 -0
- package/skills/pr-converge/reference/ground-rules.md +13 -0
- package/skills/pr-converge/reference/multi-pr-orchestration.md +204 -0
- package/skills/pr-converge/reference/per-tick.md +204 -0
- package/skills/pr-converge/reference/state-schema.md +19 -0
- package/skills/pr-converge/reference/stop-conditions.md +26 -0
- package/skills/pr-converge/scripts/README.md +36 -9
- package/skills/pr-converge/scripts/check_pr_mergeability.py +1 -2
- package/skills/pr-converge/scripts/config/pr_converge_constants.py +74 -5
- package/skills/pr-converge/scripts/config/reflow_skill_md_constants.py +13 -0
- package/skills/pr-converge/scripts/config/test_pr_converge_constants.py +0 -24
- package/skills/pr-converge/scripts/cursor-agents-continue.ahk +22 -2
- package/skills/pr-converge/scripts/fetch_bugbot_inline_comments.py +19 -59
- package/skills/pr-converge/scripts/fetch_bugbot_reviews.py +15 -61
- package/skills/pr-converge/scripts/fetch_claude_inline_comments.py +70 -0
- package/skills/pr-converge/scripts/fetch_claude_reviews.py +61 -0
- package/skills/pr-converge/scripts/fetch_copilot_inline_comments.py +19 -61
- package/skills/pr-converge/scripts/fetch_copilot_reviews.py +14 -74
- package/skills/pr-converge/scripts/reflow_skill_md.py +71 -50
- package/skills/pr-converge/scripts/reviewer_fetch_core.py +153 -0
- package/skills/pr-converge/scripts/reviewer_specs.py +98 -0
- package/skills/pr-converge/scripts/test_cursor_agents_continue.py +65 -0
- package/skills/pr-converge/scripts/test_fetch_bugbot_inline_comments.py +107 -6
- package/skills/pr-converge/scripts/test_fetch_bugbot_reviews.py +85 -6
- package/skills/pr-converge/scripts/test_fetch_claude_inline_comments.py +485 -0
- package/skills/pr-converge/scripts/test_fetch_claude_reviews.py +368 -0
- package/skills/pr-converge/scripts/test_fetch_copilot_inline_comments.py +74 -6
- package/skills/pr-converge/scripts/test_fetch_copilot_reviews.py +94 -8
- package/skills/pr-converge/scripts/test_reflow_skill_md.py +162 -0
- package/skills/pr-converge/scripts/test_reviewer_fetch_core.py +448 -0
- package/skills/pr-converge/scripts/test_reviewer_specs.py +107 -0
- package/skills/pr-converge/scripts/test_view_pr_context.py +44 -0
- package/skills/pr-converge/scripts/view_pr_context.py +35 -4
- package/skills/pr-converge/workflows/schedule-wakeup-loop.md +24 -22
- package/skills/bugteam/reference/workflow-path-a-orchestrated-teams.md +0 -113
- package/skills/bugteam/reference/workflow-path-b-task-harness.md +0 -48
- package/skills/bugteam/test_team_lifecycle.py +0 -103
- package/skills/monitor-open-prs/test_team_lifecycle.py +0 -46
- package/skills/pr-converge/scripts/open_followup_copilot_pr.py +0 -136
- package/skills/pr-converge/scripts/test_open_followup_copilot_pr.py +0 -236
- package/skills/pr-converge/test_team_lifecycle.py +0 -56
- package/skills/pr-converge/workflows/ahk-auto-continue-loop.md +0 -108
package/rules/gh-paginate.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# gh API Pagination Rule
|
|
2
2
|
|
|
3
|
-
**Root cause:**
|
|
3
|
+
**Root cause:** GitHub REST API list endpoints paginate by default. Without `--paginate --slurp`, callers see only the oldest page, and cross-page jq operations (e.g., `sort_by | last`) operate within a single page — producing wrong-but-confident results.
|
|
4
4
|
|
|
5
5
|
**Rule:** All `gh api` calls that read `pulls/<number>/reviews`, `pulls/<number>/comments`, `issues/<number>/comments`, or any other paginated GitHub list endpoint **must** request the full set of pages AND apply any cross-page jq operation through external `jq`, not through `gh`'s built-in `--jq`. Use `--paginate --slurp | jq` (preferred — see [Safe patterns](#safe-patterns)). Never call these endpoints with their default pagination, and never use `gh`'s `--jq` for cross-page operations like `sort_by | last` or `| reverse | .[0]`.
|
|
6
6
|
|
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
|
|
9
9
|
This rule guards against two distinct silent-truncation defects that compound:
|
|
10
10
|
|
|
11
|
-
1. **Default
|
|
12
|
-
2. **`--jq` runs per-page, not on the concatenated result.** Per [GitHub CLI #10459](https://github.com/cli/cli/issues/10459), `gh api --paginate --jq '<filter>'` applies `<filter>` to each page **separately** and emits one output per page. Cross-page operations like `sort_by(.submitted_at) | last` therefore operate within each page independently, not across the merged result set.
|
|
11
|
+
1. **Default page truncation.** Without `--paginate`, only the first page is fetched.
|
|
12
|
+
2. **`--jq` runs per-page, not on the concatenated result.** Per [GitHub CLI #10459](https://github.com/cli/cli/issues/10459), `gh api --paginate --jq '<filter>'` applies `<filter>` to each page **separately** and emits one output per page. Cross-page operations like `sort_by(.submitted_at) | last` therefore operate within each page independently, not across the merged result set.
|
|
13
13
|
|
|
14
14
|
The safe patterns below fix both defects together: `--paginate --slurp` walks every page AND emits a single merged structure, and an **external** `jq` then runs cross-page operations on that merged structure.
|
|
15
15
|
|
|
@@ -39,7 +39,7 @@ gh api 'repos/<owner>/<repo>/pulls/<number>/reviews?per_page=100' --paginate --s
|
|
|
39
39
|
| jq '[.[][] | select(.user.login=="cursor[bot]")] | sort_by(.submitted_at) | last'
|
|
40
40
|
```
|
|
41
41
|
|
|
42
|
-
The `.[][]` flattens the array-of-pages into one stream of items before the cross-page operators (`sort_by`, `last`, `reverse`) run. Combine with `?per_page=100`
|
|
42
|
+
The `.[][]` flattens the array-of-pages into one stream of items before the cross-page operators (`sort_by`, `last`, `reverse`) run. Combine with `?per_page=100` to reduce round-trips on long PRs.
|
|
43
43
|
|
|
44
44
|
`gh`'s `--jq` flag and `--slurp` flag are mutually exclusive (gh CLI rejects `--paginate --slurp --jq` with `the --slurp option is not supported with --jq or --template`), which is why the filter must run in an external `jq` invocation.
|
|
45
45
|
|
|
@@ -74,52 +74,6 @@ gh api 'repos/<owner>/<repo>/pulls/<number>/reviews?per_page=100' --paginate --s
|
|
|
74
74
|
|
|
75
75
|
This is the canonical pattern for the bugbot ↔ bugteam convergence loop: walk newest-first, stop at the first clean review.
|
|
76
76
|
|
|
77
|
-
## What NOT to do
|
|
78
|
-
|
|
79
|
-
```bash
|
|
80
|
-
# BAD — default 30-item page silently truncates on long PRs
|
|
81
|
-
gh api repos/<owner>/<repo>/pulls/<number>/reviews \
|
|
82
|
-
--jq '[.[] | select(.user.login=="cursor[bot]")] | sort_by(.submitted_at) | last'
|
|
83
|
-
|
|
84
|
-
# BAD — `?per_page=100` alone caps at 100 items; PRs with 100+ reviews still truncate
|
|
85
|
-
gh api 'repos/<owner>/<repo>/pulls/<number>/reviews?per_page=100' \
|
|
86
|
-
--jq '[.[] | select(.user.login=="cursor[bot]")] | sort_by(.submitted_at) | last'
|
|
87
|
-
|
|
88
|
-
# BAD — --paginate fetches every page, but `--jq` runs PER-PAGE (gh CLI #10459).
|
|
89
|
-
# `sort_by(.submitted_at) | last` operates within each page independently and
|
|
90
|
-
# emits one "latest" per page, not the actual latest across the full result set.
|
|
91
|
-
gh api 'repos/<owner>/<repo>/pulls/<number>/reviews?per_page=100' --paginate \
|
|
92
|
-
--jq '[.[] | select(.user.login=="cursor[bot]")] | sort_by(.submitted_at) | last'
|
|
93
|
-
|
|
94
|
-
# BAD — taking `| last` on an unpaginated read returns the latest of the first 30,
|
|
95
|
-
# not the actual latest. Same defect for `| reverse | .[0]`.
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
## Why both defects matter
|
|
99
|
-
|
|
100
|
-
`gh api`'s default page is the FIRST page of results, ordered oldest-to-newest by the GitHub API. When the result set exceeds 30 items, page 1 contains the OLDEST 30 — not the newest. A jq `| last` after `sort_by(.submitted_at)` picks the latest entry within those 30 oldest items, producing output that looks correct but reports a state from days or weeks ago.
|
|
101
|
-
|
|
102
|
-
`--paginate` alone does NOT fix this when paired with `--jq`: gh applies the jq filter to each page separately and emits one result per page. A consumer reading "the last line of output" still gets the latest within a single page, not the latest across all pages. The skill that consumes this output then makes decisions (re-trigger bugbot, mark a finding stale, report convergence) against an obsolete view of the PR.
|
|
103
|
-
|
|
104
|
-
`--paginate --slurp | jq` fixes both defects: every page is fetched, every page is merged into one structure before any jq operator runs, and cross-page operations see the full result set.
|
|
105
|
-
|
|
106
|
-
## Consumers
|
|
107
|
-
|
|
108
|
-
Skills and scripts in this repo that read paginated endpoints and must therefore use `--paginate --slurp` plus external `jq`:
|
|
109
|
-
|
|
110
|
-
- `pr-converge` — bugbot review walk (BUGBOT phase, Step 2.a) and inline-comments fetch (Step 2.b).
|
|
111
|
-
- `bugteam` — review threads, inline comments, audit-loop history.
|
|
112
|
-
- `qbug` — same as bugteam, scoped to a single subagent loop.
|
|
113
|
-
- `pr-review-responder` — review comments fetch (already enforced; this rule extends the same constraint to reviews and other endpoints).
|
|
114
|
-
- `monitor-many` — open-PR enumeration and per-PR review/comment scans.
|
|
115
|
-
- `babysit-pr` — review-comment polling.
|
|
116
|
-
|
|
117
|
-
Updating any of these to read paginated endpoints requires `--paginate --slurp` plus external `jq` (or a documented single-page bound on a small list).
|
|
118
|
-
|
|
119
77
|
## Enforcement
|
|
120
78
|
|
|
121
79
|
This rule is documentation-only at present. A future PreToolUse hook may pattern-match `Bash` invocations of `gh api repos/.../pulls/<n>/(reviews|comments)` without `--paginate --slurp` (or with `--paginate --jq` doing cross-page operations) and return a corrective message. Until that hook lands, treat this rule as binding by review and rely on it during skill authoring.
|
|
122
|
-
|
|
123
|
-
## Precedent
|
|
124
|
-
|
|
125
|
-
The `pr-review-responder` skill predated this rule and forbids default pagination on `pulls/<n>/comments` reads (`packages/claude-dev-env/skills/pr-review-responder/SKILL.md` Rule 1). This file generalizes that constraint to every paginated GitHub endpoint, adds the `--jq` per-page defect (gh CLI #10459) discovered while reviewing this rule, and centralizes the safe patterns so additional skills inherit the rule by reference instead of restating it.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
---
|
|
2
|
+
paths: **/*.md
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# No Historical Clutter in Documentation
|
|
6
|
+
|
|
7
|
+
**When this applies:** Any Write or Edit to `.md` files.
|
|
8
|
+
|
|
9
|
+
## Rule
|
|
10
|
+
|
|
11
|
+
Never reference removed implementations, old defaults, prior behaviors, or how something "used to be" when updating documentation. The current state is all that matters.
|
|
12
|
+
|
|
13
|
+
## Examples of prohibited patterns
|
|
14
|
+
|
|
15
|
+
| Pattern | Why it's clutter |
|
|
16
|
+
|---------|-----------------|
|
|
17
|
+
| "instead of 30" in a pagination rule | The old default no longer exists in code; the rule reader doesn't need to know what it was |
|
|
18
|
+
| "previously this used X" | If X is gone, it's noise |
|
|
19
|
+
| "before this rule, we did Y" | The rule exists now; the before-state is irrelevant |
|
|
20
|
+
| "migrated from Z to W" | If Z is fully removed, the migration story is git history, not documentation |
|
|
21
|
+
| "the old implementation did A" | If A is gone, the reader gains nothing from knowing it existed |
|
|
22
|
+
| "originally" / "used to be" | Same — dead context |
|
|
23
|
+
|
|
24
|
+
## What IS allowed
|
|
25
|
+
|
|
26
|
+
- Comparisons to *currently existing* alternatives (e.g., "use `--paginate --slurp | jq`, not `--jq` alone")
|
|
27
|
+
- Rationale that explains *why* a pattern is wrong in terms of present behavior (e.g., "`--jq` runs per-page, so cross-page operations produce wrong results")
|
|
28
|
+
- References to external sources for defects that still exist (e.g., gh CLI #10459)
|
|
29
|
+
|
|
30
|
+
## The test
|
|
31
|
+
|
|
32
|
+
After writing documentation, ask: **"If someone reads this a year from now, with no knowledge of what came before, does every sentence still make sense and add value?"** If a sentence only adds value to someone who knew the old state, delete it.
|
|
33
|
+
|
|
34
|
+
## Why
|
|
35
|
+
|
|
36
|
+
Historical references clog context windows and force readers to mentally filter "what was" from "what is." The git log is the authoritative record of what changed and why. Documentation describes the current contract.
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: bg-agent
|
|
3
|
+
description: Delegates a task to a background agent. Invoked as "bg-agent [task to do]". Claude picks a suitable agent type from the available agents list and spawns it via Agent with run_in_background: true. Triggers on "/bg-agent", "bg-agent", "background agent for this".
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# bg-agent
|
|
7
|
+
|
|
8
|
+
## Overview
|
|
9
|
+
|
|
10
|
+
Delegates a task to a background agent so the main session can continue without waiting. This is the programmatic invocation path for background work — other skills (e.g. gotcha) and the user can both invoke it.
|
|
11
|
+
|
|
12
|
+
**Announce at start:** "Delegating to a background agent: `<one-line summary of task>`."
|
|
13
|
+
|
|
14
|
+
## Instructions
|
|
15
|
+
|
|
16
|
+
### Step 1 — Parse the task
|
|
17
|
+
|
|
18
|
+
The user (or calling skill) provides a task description after `bg-agent`. Example:
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
bg-agent add a gotcha to the rebase skill about force-push lease format
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Extract the full task description from the arguments.
|
|
25
|
+
|
|
26
|
+
### Step 2 — Select the right agent
|
|
27
|
+
|
|
28
|
+
Review the available agent types (listed in the system prompt's Agent tool description) and pick the most suitable one for the task:
|
|
29
|
+
|
|
30
|
+
- **Read-only tasks** (research, search, exploring code) → Explore agent or general-purpose agent.
|
|
31
|
+
- **Code authoring tasks** (writing/editing skill files, creating PRs) → general-purpose agent with `run_in_background: true`.
|
|
32
|
+
- **Specialized tasks** → pick the agent whose description best matches the task. For example, use `pr-description-writer` for PR descriptions, `git-commit-crafter` for commits.
|
|
33
|
+
|
|
34
|
+
If no specialized agent fits, use the general-purpose agent.
|
|
35
|
+
|
|
36
|
+
### Step 3 — Spawn the background agent
|
|
37
|
+
|
|
38
|
+
Use the `Agent` tool with `run_in_background: true`. Write a self-contained prompt that:
|
|
39
|
+
|
|
40
|
+
- States the exact goal and expected output.
|
|
41
|
+
- Lists the files or directories involved (from the caller's context).
|
|
42
|
+
- Includes any constraints (do not create a PR, do not push, etc.).
|
|
43
|
+
- Specifies what success looks like.
|
|
44
|
+
|
|
45
|
+
Example for a gotcha-adding task:
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
Agent({
|
|
49
|
+
description: "Add gotcha to skill file",
|
|
50
|
+
prompt: "Add a gotcha entry to packages/claude-dev-env/skills/rebase/SKILL.md. The gotcha is: 'force-push --force-with-lease requires the full <branch>:<sha> format, not just the branch name.' Add it under the ## Gotchas section. If no ## Gotchas section exists, create one at the bottom of the file.",
|
|
51
|
+
subagent_type: "general-purpose",
|
|
52
|
+
run_in_background: true
|
|
53
|
+
})
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Step 4 — Report spawn
|
|
57
|
+
|
|
58
|
+
Confirm the agent was spawned and state its task in one sentence. The caller does not need to wait for completion — background agents notify on completion automatically.
|
|
59
|
+
|
|
60
|
+
## Constraints
|
|
61
|
+
|
|
62
|
+
- Always use `run_in_background: true`. This skill is specifically for background delegation.
|
|
63
|
+
- Never run the task inline in the main session. The point is to offload it.
|
|
64
|
+
- If the task requires a PR, the spawned agent handles the full flow (branch → commit → push → PR).
|
|
65
|
+
- Return control to the caller immediately after spawning. Do not poll for completion.
|
|
66
|
+
|
|
67
|
+
## Gotchas
|
|
68
|
+
|
|
69
|
+
See the gotcha reference at the bottom of this file. When a new gotcha is discovered during use, invoke `/gotcha` to add it here.
|
|
@@ -1,35 +1,26 @@
|
|
|
1
1
|
# Bugteam — invariants and design rationale
|
|
2
2
|
|
|
3
|
-
## Path A vs Path B
|
|
4
|
-
|
|
5
|
-
**Path A** (`CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1`): the constraints below apply as written — `TeamCreate`, isolated teammate sessions, lead-only `TeamDelete`. **Path B** (Task harness): read [`reference/workflow-path-b-task-harness.md`](reference/workflow-path-b-task-harness.md) for harness-only steps; **agent types** (`code-quality-agent`, `clean-coder`), **models**, **one commit per fix**, **gate-before-AUDIT**, **10-loop cap**, and **outcome XML** remain identical to `SKILL.md`. Path B intentionally uses **`Task`** from the lead instead of teammate isolation — see that file **Clean-room note**.
|
|
6
|
-
|
|
7
3
|
## Constraints
|
|
8
4
|
|
|
9
|
-
- **
|
|
10
|
-
- **Path B — Cursor `Task` registry.** When the host `Task` tool rejects `subagent_type="clean-coder"`, Path B FIX MUST use `subagent_type: "generalPurpose"` plus the mandatory **Read** of `clean-coder.md` in the FIX prompt per [`reference/workflow-path-b-task-harness.md`](reference/workflow-path-b-task-harness.md) (FIX spawn, Cursor host split). This is the documented shim, not an ad-hoc `generalPurpose` bypass of the clean-coder contract.
|
|
11
|
-
- **Path A — orchestrator-only `TeamCreate`.** Only the lead session (this session, when `/bugteam` is invoked) calls `TeamCreate`. Teammates never call `TeamCreate` — if a teammate's spawn prompt instructs it to, that is a skill defect. When additional parallel work is needed (e.g., parallel auditors from loop 4 onward, supplementary audit of adjacent files), the lead spawns additional teammates into the EXISTING team by passing the current `team_name` to every `Agent(...)` call. Multiple teammate "sets" live inside one team under one orchestrator. The runtime enforces this: `TeamCreate` called while the session already leads a team returns the error `Already leading team "<name>". A leader can only manage one team at a time. Use TeamDelete to end the current team before creating a new one.` — direct quote from the runtime's response when this invariant is violated. The Step 2 lifecycle resolution in [Team lifecycle](SKILL.md#team-lifecycle-path-a-only) parses this exact error in `auto` mode to attach to the existing team rather than fail. **Path B:** no `TeamCreate`; parallel work uses parallel **`Task`** calls per [`reference/workflow-path-b-task-harness.md`](reference/workflow-path-b-task-harness.md).
|
|
12
|
-
- **One team per invocation, multi-PR supported.** All PRs in a single /bugteam invocation share one team created by the orchestrator. Per-PR identity lives in the teammate name prefix (`bugfind-pr<N>-loop<L>` / `bugfix-pr<N>-loop<L>`) and the `<team_temp_dir>/pr-<N>/` subfolder containing that PR's git worktree, diff patches, and outcome XML files.
|
|
5
|
+
- **One run per invocation, multi-PR supported.** All PRs in a single /bugteam invocation share one `run_temp_dir`. Per-PR identity lives in the subagent name prefix (`bugfind-pr<N>-loop<L>` / `bugfix-pr<N>-loop<L>`) and the `<run_temp_dir>/pr-<N>/` subfolder containing that PR's git worktree, diff patches, and outcome XML files.
|
|
13
6
|
- **Grant before any spawn, revoke before any return.** Step 0 grants project `.claude/**` permissions; Step 5 revokes. Both are mandatory. Revoke runs on every exit path including error, cap-reached, and stuck.
|
|
14
|
-
- **Fresh
|
|
7
|
+
- **Fresh subagent per loop.** Both bugfind and bugfix are spawned new each loop. Reusing a subagent across loops accumulates context inside that subagent's window — defeats clean-room.
|
|
15
8
|
- **One up-front confirmation = whole cycle.** The `/bugteam` invocation authorizes the entire cycle; every subsequent decision runs on that single authorization.
|
|
16
9
|
- **10-loop hard cap.** Counted as **AUDIT** completions (increment in Step 3). Standards-fix passes before an audit do not advance `loop_count`. Worst case includes extra clean-coder spawns for the code-rules gate.
|
|
17
10
|
- **Code rules gate before every AUDIT.** Run `_shared/pr-loop/scripts/code_rules_gate.py` (resolved via `${CLAUDE_SKILL_DIR}/../../_shared/pr-loop/scripts/code_rules_gate.py`) until exit **0** before spawning **bugfind**. Same `validate_content` logic as `hooks/blocking/code_rules_enforcer.py`.
|
|
18
|
-
- **Clean-room audits, every loop.** Each bugfind
|
|
19
|
-
- **Targeted fixes.** Each fix
|
|
20
|
-
- **Opus 4.7 at xhigh effort for
|
|
21
|
-
- **Fix
|
|
11
|
+
- **Clean-room audits, every loop.** Each bugfind subagent's spawn prompt contains only the PR scope, audit rubric, and the current loop number. Prior loop history stays in the lead.
|
|
12
|
+
- **Targeted fixes.** Each fix subagent sees ONLY the most recent audit's findings. Prior loops are invisible to the fix subagent.
|
|
13
|
+
- **Opus 4.7 at xhigh effort for validator and fix subagents.** Single-auditor mode, validator, and fix spawns pass `model="opus"`; parallel-auditor siblings (`-b` through `-k`) pass `model="haiku"`. Opus 4.7's default effort level in Claude Code is `xhigh` (https://code.claude.com/docs/en/model-config — *"On Opus 4.7, the default effort is `xhigh` for all plans and providers."*), so no `effort` override is needed at spawn time. Effort is set per-subagent in YAML frontmatter, not via the `Agent` tool's parameters; `code-quality-agent` and `clean-coder` rely on the model default. The trade vs Sonnet is higher per-loop cost in exchange for deeper audit recall and stronger fix correctness on bug-hunting work, which the per-PR loop economics tolerate (10-loop hard cap bounds total spend).
|
|
14
|
+
- **Fix subagent receives the latest audit as its input contract.** Passing the audit's findings to the fix subagent is the input contract — each loop's fix run operates on the current audit's output and only that.
|
|
22
15
|
- **One commit per fix action.** Loops produce one commit per loop, not one per bug.
|
|
23
16
|
- **Linear branch, fixed PR base.** Every loop appends one forward-only commit; existing commits and the PR base stay intact throughout the cycle.
|
|
24
|
-
- **Lead-only cleanup
|
|
25
|
-
- **
|
|
26
|
-
- **
|
|
27
|
-
- **Cleanup all `.bugteam-*` files on exit.** `.bugteam-loop-*.patch`, `.bugteam-loop-*.outcomes.xml`, `.bugteam-final.diff`, `.bugteam-original-body.md`, `.bugteam-final-body.md`. Working directory ends clean.
|
|
28
|
-
- **Audit/fix comment posting.** **Path A:** Bugfind posts ONE per-loop review (parent body + child finding comments in a single batched POST, with review-fallback to a top-level issue comment). Bugfix posts the fix replies after committing. All comment, review, and reply POSTs belong to the teammates; the lead's single PR-write action is the final description rewrite at Step 4.5. **Path B:** the **lead** performs the same POSTs after Task handoffs (`SKILL.md` Step 2.5 + [`reference/workflow-path-b-task-harness.md`](reference/workflow-path-b-task-harness.md) § Step 2.5).
|
|
17
|
+
- **Lead-only cleanup.** Cleanup runs in the lead (this session) only. Step 4 removes the full `<run_temp_dir>` so no loop patches leak between runs.
|
|
18
|
+
- **Cleanup all `.bugteam-*` files on exit.** The per-run `<run_temp_dir>` is removed entirely by Step 4, which covers `<run_temp_dir>/pr-<N>/loop-<L>.patch` and `<run_temp_dir>/pr-<N>/loop-<L>-<letter>.outcomes.xml`. The per-loop outcomes XML at `<worktree_path>/.bugteam-pr<N>-loop<L>.outcomes.xml` is removed with the worktree. Step 4.5 deletes `.bugteam-final.diff`, `.bugteam-original-body.md`, and `.bugteam-final-body.md`. Working directory ends clean.
|
|
19
|
+
- **Audit/fix comment posting.** The bugfind subagent posts ONE per-loop review (parent body + child finding comments in a single batched POST, with review-fallback to a top-level issue comment). The bugfix subagent posts the fix replies after committing. All comment, review, and reply POSTs belong to the subagents; the lead's single PR-write action is the final description rewrite at Step 4.5.
|
|
29
20
|
- **Lead owns the final PR description rewrite only** (Step 4.5), and only via the `pr-description-writer` agent. The lead does not compose the description inline.
|
|
30
21
|
- **One review per loop, findings as child comments of that review.** Each loop posts a single pull-request review whose body is the loop header and whose `comments[]` are the anchored findings. Each loop's review stands alone — one review created per loop, fully self-contained on the PR conversation.
|
|
31
22
|
- **PR description rewrite on every exit.** Step 4.5 runs on `converged`, `cap reached`, and `stuck`. On `error`, the rewrite is best-effort; if it fails, surface the error in the final report and continue to revoke.
|
|
32
|
-
- **Outcome XML, not JSON.** Both
|
|
23
|
+
- **Outcome XML, not JSON.** Both subagents write structured outcome data (findings or fix outcomes) to `.bugteam-pr<N>-loop<L>.outcomes.xml`. The lead reads these files between actions. XML chosen for parser robustness against multi-line, special-character, and quoted reason fields.
|
|
33
24
|
|
|
34
25
|
## Why this design
|
|
35
26
|
|
|
@@ -10,7 +10,7 @@ Keep the spawn prompt self-contained: reference only the PR scope, audit rubric,
|
|
|
10
10
|
<branch>head ref</branch>
|
|
11
11
|
<base_branch>base ref</base_branch>
|
|
12
12
|
<pr_url>full URL</pr_url>
|
|
13
|
-
<loop>
|
|
13
|
+
<loop>L</loop>
|
|
14
14
|
<pr_number>N</pr_number>
|
|
15
15
|
<worktree_path>absolute path from Step 1 per-PR workspace</worktree_path>
|
|
16
16
|
</context>
|
|
@@ -18,7 +18,7 @@ Keep the spawn prompt self-contained: reference only the PR scope, audit rubric,
|
|
|
18
18
|
cd into `<worktree_path>` before any git, gh, or file operation.
|
|
19
19
|
|
|
20
20
|
<scope>
|
|
21
|
-
<diff_path>Absolute path to the per-PR patch file: <
|
|
21
|
+
<diff_path>Absolute path to the per-PR patch file: <run_temp_dir>/pr-<N>/loop-<L>.patch (same path as gh pr diff redirect in AUDIT)</diff_path>
|
|
22
22
|
<scope_rule>Audit only lines added or modified in the diff. Pre-existing code on untouched lines is out of scope.</scope_rule>
|
|
23
23
|
</scope>
|
|
24
24
|
|
|
@@ -45,11 +45,17 @@ cd into `<worktree_path>` before any git, gh, or file operation.
|
|
|
45
45
|
</constraints>
|
|
46
46
|
|
|
47
47
|
<comment_posting>
|
|
48
|
+
Sibling auditors (-b through -k): run only steps 1–3 (audit, assign IDs,
|
|
49
|
+
capture excerpt, validate anchors), then write outcome XML per <output_format> and return.
|
|
50
|
+
Skip steps 4–8 — sibling auditors do not post PR reviews.
|
|
51
|
+
|
|
52
|
+
Validator (-a) and single-opus auditors: run all steps below.
|
|
53
|
+
|
|
48
54
|
1. Audit the diff against the 10 categories above. Buffer the findings
|
|
49
55
|
in memory; all posting happens at step 6 once anchors are validated.
|
|
50
|
-
2. Assign each finding a stable finding_id of exactly the form `
|
|
51
|
-
where K is 1-based within this loop.
|
|
52
|
-
3. Validate every finding's (file, line) against the captured diff. Split
|
|
56
|
+
2. Assign each finding a stable finding_id of exactly the form `loop<L>-<K>`
|
|
57
|
+
where <K> is 1-based within this loop.
|
|
58
|
+
3. For each finding, capture a verbatim excerpt from the target file at the cited line. Populate the `<excerpt>` element in the outcome XML with it. Validate every finding's (file, line) against the captured diff. Split
|
|
53
59
|
findings into two buckets: anchored (line is in the diff) and
|
|
54
60
|
unanchored (line is not in the diff — goes into the review body's
|
|
55
61
|
"Findings without a diff anchor" section per Step 2.5).
|
|
@@ -61,7 +67,7 @@ cd into `<worktree_path>` before any git, gh, or file operation.
|
|
|
61
67
|
Category: <letter> (<category name>)
|
|
62
68
|
<2-3 sentence description with concrete trace>
|
|
63
69
|
|
|
64
|
-
_From /bugteam audit loop
|
|
70
|
+
_From /bugteam audit loop <L>._
|
|
65
71
|
|
|
66
72
|
6. Post ONE review via Step 2.5's per-loop review CLI shape. Harvest the
|
|
67
73
|
parent review `html_url` from the response JSON and the `comments[]`
|
|
@@ -76,17 +82,17 @@ cd into `<worktree_path>` before any git, gh, or file operation.
|
|
|
76
82
|
</comment_posting>
|
|
77
83
|
|
|
78
84
|
<output_format>
|
|
79
|
-
|
|
80
|
-
the PR's worktree directory (<worktree_path>). Return only that path on stdout. The schema:
|
|
85
|
+
For the (-a) validator: write the outcome XML below to .bugteam-pr<N>-loop<L>.outcomes.xml inside
|
|
86
|
+
the PR's worktree directory (<worktree_path>). For sibling auditors (-b through -k): write to <run_temp_dir>/pr-<N>/loop-<L>-<letter>.outcomes.xml (absolute path passed in prompt). Sibling auditors do not post PR reviews; set review_url, finding_comment_id, and finding_comment_url to empty strings, and used_fallback to "false". Omit unanchored findings from sibling output — only the validator handles those. Return only that path on stdout. The schema:
|
|
81
87
|
</output_format>
|
|
82
88
|
```
|
|
83
89
|
|
|
84
90
|
## AUDIT outcome XML schema (bugfind writes this)
|
|
85
91
|
|
|
86
92
|
```xml
|
|
87
|
-
<bugteam_audit loop="<
|
|
93
|
+
<bugteam_audit loop="<L>" review_url="<url>">
|
|
88
94
|
<finding
|
|
89
|
-
finding_id="loop<
|
|
95
|
+
finding_id="loop<L>-<K>"
|
|
90
96
|
severity="P0|P1|P2"
|
|
91
97
|
category="<letter>"
|
|
92
98
|
file="<path>"
|
|
@@ -96,6 +102,7 @@ cd into `<worktree_path>` before any git, gh, or file operation.
|
|
|
96
102
|
used_fallback="true|false"
|
|
97
103
|
>
|
|
98
104
|
<title>one-line title</title>
|
|
105
|
+
<excerpt>verbatim source line or snippet from the file at the cited line</excerpt>
|
|
99
106
|
<description>2-3 sentence description with concrete trace</description>
|
|
100
107
|
</finding>
|
|
101
108
|
<verified_clean>
|
|
@@ -114,7 +121,7 @@ After the teammate writes the XML and returns, the lead reads `.bugteam-pr<N>-lo
|
|
|
114
121
|
<branch>head</branch>
|
|
115
122
|
<base_branch>base</base_branch>
|
|
116
123
|
<pr_url>url</pr_url>
|
|
117
|
-
<loop>
|
|
124
|
+
<loop>L</loop>
|
|
118
125
|
<pr_number>N</pr_number>
|
|
119
126
|
<worktree_path>absolute path from Step 1 per-PR workspace</worktree_path>
|
|
120
127
|
</context>
|
|
@@ -124,7 +131,7 @@ cd into `<worktree_path>` before any git, gh, or file operation.
|
|
|
124
131
|
<bugs_to_fix>
|
|
125
132
|
[for each P0/P1/P2 finding from last_findings:]
|
|
126
133
|
<bug
|
|
127
|
-
finding_id="loop<
|
|
134
|
+
finding_id="loop<L>-<K>"
|
|
128
135
|
severity="P0|P1|P2"
|
|
129
136
|
file="<path>"
|
|
130
137
|
line="<int>"
|
|
@@ -156,9 +163,9 @@ cd into `<worktree_path>` before any git, gh, or file operation.
|
|
|
156
163
|
</execution>
|
|
157
164
|
|
|
158
165
|
<outcome_xml_schema>
|
|
159
|
-
<bugteam_fix loop="<
|
|
166
|
+
<bugteam_fix loop="<L>" commit_sha="<sha or empty if no commit>">
|
|
160
167
|
<outcome
|
|
161
|
-
finding_id="loop<
|
|
168
|
+
finding_id="loop<L>-<K>"
|
|
162
169
|
status="fixed|could_not_address|hook_blocked"
|
|
163
170
|
commit_sha="<sha if fixed, empty otherwise>"
|
|
164
171
|
reply_comment_id="<id of the reply posted>"
|