claude-dev-env 1.37.0 → 1.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +3 -0
- package/_shared/pr-loop/audit-contract.md +4 -3
- package/_shared/pr-loop/fix-protocol.md +2 -0
- package/_shared/pr-loop/gh-payloads.md +38 -37
- package/_shared/pr-loop/scripts/README.md +0 -1
- package/_shared/pr-loop/scripts/preflight.py +2 -1
- package/_shared/pr-loop/scripts/tests/test_code_rules_gate.py +2 -2
- package/_shared/pr-loop/scripts/tests/test_preflight.py +22 -0
- package/_shared/pr-loop/state-schema.md +10 -10
- package/agents/clean-coder.md +4 -0
- package/agents/code-quality-agent.md +23 -85
- package/agents/groq-coder.md +8 -6
- package/hooks/blocking/__init__.py +0 -0
- package/hooks/blocking/hedging_language_blocker.py +2 -2
- package/hooks/blocking/state_description_blocker.py +243 -0
- package/hooks/blocking/tdd_enforcer.py +94 -0
- package/hooks/blocking/test_hedging_language_blocker.py +1 -1
- package/hooks/blocking/test_state_description_blocker.py +618 -0
- package/hooks/blocking/test_tdd_enforcer.py +152 -0
- package/hooks/config/state_description_blocker_constants.py +130 -0
- package/hooks/hooks.json +10 -0
- package/package.json +1 -1
- package/rules/gh-paginate.md +4 -50
- package/rules/no-historical-clutter.md +57 -0
- package/scripts/config/groq_bugteam_config.py +13 -5
- package/skills/bugteam/CONSTRAINTS.md +20 -27
- package/skills/bugteam/EXAMPLES.md +1 -1
- package/skills/bugteam/PROMPTS.md +78 -42
- package/skills/bugteam/SKILL.md +76 -63
- package/skills/bugteam/SKILL_EVALS.md +12 -12
- package/skills/bugteam/reference/audit-and-teammates.md +21 -48
- package/skills/bugteam/reference/audit-contract.md +7 -7
- package/skills/bugteam/reference/github-pr-reviews.md +31 -31
- package/skills/bugteam/reference/team-setup.md +1 -1
- package/skills/bugteam/reference/teardown-publish-permissions.md +4 -4
- package/skills/copilot-review/SKILL.md +7 -14
- package/skills/findbugs/SKILL.md +2 -2
- package/skills/fixbugs/SKILL.md +1 -1
- package/skills/monitor-open-prs/SKILL.md +6 -6
- package/skills/pr-converge/SKILL.md +7 -6
- package/skills/pr-converge/reference/convergence-gates.md +46 -44
- package/skills/pr-converge/reference/examples.md +4 -4
- package/skills/pr-converge/reference/fix-protocol.md +8 -8
- package/skills/pr-converge/reference/multi-pr-orchestration.md +10 -10
- package/skills/pr-converge/reference/per-tick.md +24 -36
- package/skills/pr-converge/reference/stop-conditions.md +7 -7
- package/skills/pr-converge/scripts/README.md +65 -117
- package/skills/pr-review-responder/EXAMPLES.md +2 -2
- package/skills/pr-review-responder/PRINCIPLES.md +2 -8
- package/skills/pr-review-responder/README.md +7 -48
- package/skills/pr-review-responder/SKILL.md +2 -3
- package/skills/pr-review-responder/TESTING.md +8 -65
- package/skills/qbug/SKILL.md +10 -16
- package/_shared/pr-loop/scripts/config/gh_util_constants.py +0 -31
- package/_shared/pr-loop/scripts/gh_util.py +0 -193
- package/_shared/pr-loop/scripts/tests/test_gh_util.py +0 -257
- package/_shared/pr-loop/scripts/tests/test_gh_util_constants.py +0 -61
- package/skills/pr-converge/scripts/check_pr_mergeability.py +0 -78
- package/skills/pr-converge/scripts/config/pr_converge_constants.py +0 -118
- package/skills/pr-converge/scripts/config/test_pr_converge_constants.py +0 -152
- package/skills/pr-converge/scripts/fetch_bugbot_inline_comments.py +0 -70
- package/skills/pr-converge/scripts/fetch_bugbot_reviews.py +0 -57
- package/skills/pr-converge/scripts/fetch_claude_inline_comments.py +0 -70
- package/skills/pr-converge/scripts/fetch_claude_reviews.py +0 -61
- package/skills/pr-converge/scripts/fetch_copilot_inline_comments.py +0 -70
- package/skills/pr-converge/scripts/fetch_copilot_reviews.py +0 -61
- package/skills/pr-converge/scripts/mark_pr_ready.py +0 -54
- package/skills/pr-converge/scripts/post-bugbot-run.helpers.ps1 +0 -49
- package/skills/pr-converge/scripts/post-bugbot-run.ps1 +0 -33
- package/skills/pr-converge/scripts/reply_to_inline_comment.py +0 -84
- package/skills/pr-converge/scripts/request_copilot_review.py +0 -71
- package/skills/pr-converge/scripts/resolve_pr_head.py +0 -58
- package/skills/pr-converge/scripts/review_field_helpers.py +0 -43
- package/skills/pr-converge/scripts/reviewer_fetch_core.py +0 -153
- package/skills/pr-converge/scripts/reviewer_specs.py +0 -98
- package/skills/pr-converge/scripts/test_check_pr_mergeability.py +0 -126
- package/skills/pr-converge/scripts/test_fetch_bugbot_inline_comments.py +0 -443
- package/skills/pr-converge/scripts/test_fetch_bugbot_reviews.py +0 -299
- package/skills/pr-converge/scripts/test_fetch_claude_inline_comments.py +0 -485
- package/skills/pr-converge/scripts/test_fetch_claude_reviews.py +0 -368
- package/skills/pr-converge/scripts/test_fetch_copilot_inline_comments.py +0 -440
- package/skills/pr-converge/scripts/test_fetch_copilot_reviews.py +0 -366
- package/skills/pr-converge/scripts/test_mark_pr_ready.py +0 -69
- package/skills/pr-converge/scripts/test_post_bugbot_run.py +0 -195
- package/skills/pr-converge/scripts/test_reply_to_inline_comment.py +0 -159
- package/skills/pr-converge/scripts/test_request_copilot_review.py +0 -101
- package/skills/pr-converge/scripts/test_resolve_pr_head.py +0 -79
- package/skills/pr-converge/scripts/test_review_field_helpers.py +0 -80
- package/skills/pr-converge/scripts/test_reviewer_fetch_core.py +0 -448
- package/skills/pr-converge/scripts/test_reviewer_specs.py +0 -107
- package/skills/pr-converge/scripts/test_trigger_bugbot.py +0 -139
- package/skills/pr-converge/scripts/test_view_pr_context.py +0 -111
- package/skills/pr-converge/scripts/trigger_bugbot.py +0 -77
- package/skills/pr-converge/scripts/view_pr_context.py +0 -47
- package/skills/pr-review-responder/scripts/respond_to_reviews.py +0 -376
package/CLAUDE.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Claude Development Assistant
|
|
2
2
|
|
|
3
|
+
The user delegates execution to you and expects zero manual steps unless strictly necessary. Execute every command you can directly. Only instruct the user to do something manually when you are technically unable to do it yourself. When a task involves credentials or other sensitive input, display a minimal secure UI (e.g., a password dialog) to collect it rather than asking the user to paste it into chat or run the command themselves. When direction is ambiguous, use AskUserQuestion to clarify before acting.
|
|
4
|
+
|
|
3
5
|
## Code Rules
|
|
4
6
|
@~/.claude/docs/CODE_RULES.md
|
|
5
7
|
|
|
@@ -42,6 +44,7 @@ Full rule including the reconciliation with Right-Sized Engineering, misapplicat
|
|
|
42
44
|
|
|
43
45
|
## Tool Policies
|
|
44
46
|
- **context7:** Before writing code using any library/framework/SDK/API, call `resolve-library-id` then `query-docs` via Context7 MCP. Use the fetched docs to write code. Applies to all libs including React, Next.js, Django, Express, Prisma.
|
|
47
|
+
- **gh MCP:** Always use `mcp__plugin_github_github__*` tools for any GitHub operations (branches, PRs, file operations). Do not use the `Bash` tool to invoke `gh` or `git` CLI for GitHub operations.
|
|
45
48
|
|
|
46
49
|
## Compaction
|
|
47
50
|
When compacting, always preserve:
|
|
@@ -121,9 +121,10 @@ Sequence:
|
|
|
121
121
|
4. Compute `fix_diff` against pre-fix contents for the modified set.
|
|
122
122
|
5. Run `bugteam_code_rules_gate.py` with explicit paths for every modified file.
|
|
123
123
|
6. Spawn a scoped audit of `fix_diff` with full A–J rigor, Shape A/B contract, adversarial pass, AND Haiku secondary in parallel (paranoid mode on post-fix).
|
|
124
|
-
7.
|
|
125
|
-
8.
|
|
126
|
-
9.
|
|
124
|
+
7. Read the previous loop's outcome XML (`<worktree_path>/.bugteam-pr<N>-loop<L-1>.outcomes.xml`) and obtain its total finding count. If this is the first loop (L <= 1) or the file does not exist, skip this comparison. Compute the post-fix total: previous total minus bugs fixed in this round plus new violations found in the post-fix audit (step 6). If the post-fix total exceeds the previous total, flag all new findings as same-loop fix-targets and revise. An increase in total findings across loop transitions is a regression.
|
|
125
|
+
8. Any new findings become same-loop fix-targets. Internal iteration count increments by one.
|
|
126
|
+
9. After 3 internal iterations with fresh findings each time, exit `stuck: post-fix audit not converging`.
|
|
127
|
+
10. Only when `gate_findings` empty AND `post_fix_findings` empty: `git add`, commit, push.
|
|
127
128
|
|
|
128
129
|
`converged` exit condition: `primary_audit_clean AND post_fix_audit_clean` for the committing loop.
|
|
129
130
|
|
|
@@ -35,3 +35,5 @@ After step 11, when `git rev-parse HEAD` is unchanged from `pre_fix_sha`, the fi
|
|
|
35
35
|
- Append commits; the branch stays linear (one commit per fix loop, fast-forward push only).
|
|
36
36
|
- No comment deletion on lines left unchanged.
|
|
37
37
|
- No `--no-verify`. Hook rejections flag real underlying issues worth investigating.
|
|
38
|
+
- **Narrow scope.** Fix only the exact defect at the specified file:line. No structural refactoring, no inlining helpers.
|
|
39
|
+
- **Preserve helpers.** Do not remove or inline existing helper functions unless the finding explicitly names them.
|
|
@@ -1,33 +1,28 @@
|
|
|
1
|
-
#
|
|
1
|
+
# MCP-based payloads
|
|
2
2
|
|
|
3
3
|
Shared payload shapes for posting PR reviews and replies. Used by `bugteam`, `qbug`, `pr-converge`, `monitor-many`.
|
|
4
4
|
|
|
5
|
-
## Build payloads with
|
|
5
|
+
## Build payloads with MCP tools
|
|
6
6
|
|
|
7
|
-
Build
|
|
7
|
+
Build payloads as structured arguments to MCP tools. Body content passes as a string parameter directly.
|
|
8
8
|
|
|
9
9
|
## One review per loop
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
{path: $path_1, line: $line_1, side: "RIGHT", body: $finding_body_1}
|
|
27
|
-
[, ... ]
|
|
28
|
-
]
|
|
29
|
-
}' \
|
|
30
|
-
| gh api repos/<owner>/<repo>/pulls/<number>/reviews -X POST --input -
|
|
11
|
+
Call `pull_request_review_write` once per audit loop. Payload: `event: "COMMENT"`, the review body, and one `comments[]` object per anchored finding.
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
pull_request_review_write(
|
|
15
|
+
method="create",
|
|
16
|
+
event="COMMENT",
|
|
17
|
+
body=review_body,
|
|
18
|
+
commitID=head_sha,
|
|
19
|
+
owner=owner,
|
|
20
|
+
repo=repo,
|
|
21
|
+
pullNumber=pull_number,
|
|
22
|
+
comments=[
|
|
23
|
+
{path: file_path, line: line_number, side: "RIGHT", body: finding_body}
|
|
24
|
+
]
|
|
25
|
+
)
|
|
31
26
|
```
|
|
32
27
|
|
|
33
28
|
Single-line anchors: `{path, line, side: "RIGHT", body}`. Multi-line anchors add `start_line` and `start_side: "RIGHT"`.
|
|
@@ -48,11 +43,16 @@ Zero findings still post one review. Body line: `## /<workflow> loop <N> audit:
|
|
|
48
43
|
|
|
49
44
|
## Reply to a finding
|
|
50
45
|
|
|
51
|
-
|
|
46
|
+
Call `add_reply_to_pull_request_comment` with the finding comment ID and reply body:
|
|
52
47
|
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
|
|
48
|
+
```
|
|
49
|
+
add_reply_to_pull_request_comment(
|
|
50
|
+
commentId=finding_comment_id,
|
|
51
|
+
body=reply_body,
|
|
52
|
+
owner=owner,
|
|
53
|
+
repo=repo,
|
|
54
|
+
pullNumber=pull_number
|
|
55
|
+
)
|
|
56
56
|
```
|
|
57
57
|
|
|
58
58
|
## Anchor fallback (line not in diff)
|
|
@@ -61,25 +61,26 @@ Lines not in the PR diff cannot anchor an inline comment. Omit them from `commen
|
|
|
61
61
|
|
|
62
62
|
## Review POST failure fallback (issue comment)
|
|
63
63
|
|
|
64
|
-
When the review POST fails,
|
|
64
|
+
When the review POST fails, call `add_issue_comment` with the full review body:
|
|
65
65
|
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
|
|
66
|
+
```
|
|
67
|
+
add_issue_comment(
|
|
68
|
+
owner=owner,
|
|
69
|
+
repo=repo,
|
|
70
|
+
issueNumber=pull_number,
|
|
71
|
+
body=fallback_body
|
|
72
|
+
)
|
|
69
73
|
```
|
|
70
74
|
|
|
71
75
|
All findings in the loop record `used_fallback="true"`; `finding_comment_url` = issue comment URL.
|
|
72
76
|
|
|
73
77
|
## Endpoints
|
|
74
78
|
|
|
75
|
-
- Review
|
|
76
|
-
- Reply
|
|
77
|
-
- Fallback issue comment: `
|
|
79
|
+
- Review: `pull_request_review_write(method="create", ...)`
|
|
80
|
+
- Reply: `add_reply_to_pull_request_comment(...)`
|
|
81
|
+
- Fallback issue comment: `add_issue_comment(...)`
|
|
78
82
|
|
|
79
83
|
## SHA capture timing
|
|
80
84
|
|
|
81
85
|
`commit_id` and any `<head_sha_at_post_time>` reference: `git rev-parse HEAD` immediately before the POST, in the cwd of whichever subagent or process is posting.
|
|
82
86
|
|
|
83
|
-
## Body file UTF-8 encoding
|
|
84
|
-
|
|
85
|
-
Write each markdown body to a temp file via the BOM-free PowerShell pattern (`[IO.File]::WriteAllText($path, $content, [Text.UTF8Encoding]::new($false))`) before `gh api` consumes it. See `~/.claude/rules/gh-body-file.md`.
|
|
@@ -9,7 +9,6 @@ Runnable helpers used by **bugteam**, **qbug**, **pr-converge**, and related ski
|
|
|
9
9
|
| `preflight.py` | Local checks before a PR-loop run (pytest discovery, optional pre-commit, hooksPath sanity). |
|
|
10
10
|
| `code_rules_gate.py` | CODE_RULES gate over PR-scoped diffs (`--base`, staged-only, path filters). |
|
|
11
11
|
| `fix_hookspath.py` | Repair `core.hooksPath` when it does not point at the packaged git-hooks tree. |
|
|
12
|
-
| `gh_util.py` | GitHub CLI helpers (pagination-safe JSON parsing, review fetches). |
|
|
13
12
|
| `grant_project_claude_permissions.py` / `revoke_project_claude_permissions.py` | Claude Code permission JSON helpers used during publish-style flows. |
|
|
14
13
|
| `_claude_permissions_common.py` | Shared implementation for the permission scripts. |
|
|
15
14
|
|
|
@@ -378,6 +378,7 @@ def main(all_arguments: list[str]) -> int:
|
|
|
378
378
|
hooks_path_exit_code = verify_git_hooks_path(repository_root)
|
|
379
379
|
if hooks_path_exit_code != 0:
|
|
380
380
|
return hooks_path_exit_code
|
|
381
|
+
discovery_result: bool | None = True
|
|
381
382
|
if not arguments.no_pytest and has_pytest_configuration(repository_root):
|
|
382
383
|
discovery_result = has_discoverable_tests(repository_root)
|
|
383
384
|
if discovery_result is None:
|
|
@@ -433,7 +434,7 @@ def main(all_arguments: list[str]) -> int:
|
|
|
433
434
|
exit_code = run_pytest(repository_root, arguments.verbose)
|
|
434
435
|
if exit_code != 0:
|
|
435
436
|
return exit_code
|
|
436
|
-
elif not arguments.no_pytest:
|
|
437
|
+
elif not arguments.no_pytest and discovery_result is not False:
|
|
437
438
|
print(
|
|
438
439
|
"bugteam_preflight: no pytest configuration found; skipping pytest.",
|
|
439
440
|
file=sys.stderr,
|
|
@@ -537,8 +537,8 @@ def test_check_wrapper_plumb_through_accepts_positional_or_keyword_forwarder() -
|
|
|
537
537
|
|
|
538
538
|
When a wrapper exposes the delegate's optional kwarg as a positional-or-keyword
|
|
539
539
|
parameter with a default value and forwards it correctly, the check must produce
|
|
540
|
-
zero findings. This mirrors
|
|
541
|
-
|
|
540
|
+
zero findings. This mirrors a wrapper/delegate signature pairing
|
|
541
|
+
where the wrapper exposes the delegate's optional kwarg.
|
|
542
542
|
"""
|
|
543
543
|
source = (
|
|
544
544
|
"def run_gh(all_command, *, timeout_seconds=30):\n"
|
|
@@ -651,6 +651,28 @@ def test_main_does_not_print_no_related_tests_when_get_changed_files_returns_non
|
|
|
651
651
|
assert "no related tests found" not in captured.err
|
|
652
652
|
|
|
653
653
|
|
|
654
|
+
def test_main_should_not_print_no_pytest_config_when_pytest_configured_but_no_tests(
|
|
655
|
+
capsys: pytest.CaptureFixture[str],
|
|
656
|
+
) -> None:
|
|
657
|
+
"""When pytest is configured but no tests are found, main must not print
|
|
658
|
+
the misleading 'no pytest configuration found' message."""
|
|
659
|
+
with (
|
|
660
|
+
patch.object(preflight, "verify_git_hooks_path", return_value=0),
|
|
661
|
+
patch.object(preflight, "has_pytest_configuration", return_value=True),
|
|
662
|
+
patch.object(preflight, "has_discoverable_tests", return_value=False),
|
|
663
|
+
):
|
|
664
|
+
exit_code = preflight.main([])
|
|
665
|
+
assert exit_code == 0
|
|
666
|
+
captured = capsys.readouterr()
|
|
667
|
+
assert "bugteam_preflight: pytest configured but no tests found" in captured.err, (
|
|
668
|
+
"Must print the correct message about configured pytest with no tests"
|
|
669
|
+
)
|
|
670
|
+
assert "bugteam_preflight: no pytest configuration found" not in captured.err, (
|
|
671
|
+
"Must not print the misleading 'no pytest configuration found' message "
|
|
672
|
+
"when pytest IS configured"
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
|
|
654
676
|
def test_main_prints_no_related_tests_when_get_changed_files_returns_empty(
|
|
655
677
|
capsys: pytest.CaptureFixture[str],
|
|
656
678
|
) -> None:
|
|
@@ -7,7 +7,7 @@ State each PR-loop workflow tracks across iterations. Workflows differ on persis
|
|
|
7
7
|
| Field | Type | Purpose |
|
|
8
8
|
|---|---|---|
|
|
9
9
|
| `loop_count` | int | Iterations completed; bumps on each AUDIT or tick |
|
|
10
|
-
| `last_action` | enum | `fresh
|
|
10
|
+
| `last_action` | enum | `fresh`, `audited`, `fixed` — drives next-step dispatch |
|
|
11
11
|
| `last_findings` | object | `{p0, p1, p2, total}` count of findings from most recent AUDIT |
|
|
12
12
|
| `audit_log` | list[str] | Per-iteration one-line summaries for the final report |
|
|
13
13
|
| `starting_sha` | str | `git rev-parse HEAD` at workflow start |
|
|
@@ -35,11 +35,11 @@ Adds the same **traffic** fields whether they live in **`state.json`** or in the
|
|
|
35
35
|
|
|
36
36
|
| Field | Type | Purpose |
|
|
37
37
|
|---|---|---|
|
|
38
|
-
| `phase` | enum | `BUGBOT
|
|
39
|
-
| `current_head` | str | PR `
|
|
38
|
+
| `phase` | enum | `BUGBOT`, `BUGTEAM` — which reviewer the current tick drives |
|
|
39
|
+
| `current_head` | str | PR `.head.sha` / `git rev-parse` for the PR under work (each tick; from `pull_request_read(method="get")` when no file store) |
|
|
40
40
|
| `bugbot_clean_at` | str \| null | HEAD SHA at which Cursor Bugbot last reported clean, or `null` (reset on every push) |
|
|
41
41
|
| `copilot_clean_at` | str \| null | HEAD SHA at which the GitHub Copilot reviewer (`copilot-pull-request-reviewer[bot]`) last reported clean (review `state == "APPROVED"`), or `null`. Reset on every push. Convergence gates require this equals `current_head` after bugbot+bugteam are clean (see `skills/pr-converge/SKILL.md` § Convergence gates). |
|
|
42
|
-
| `merge_state_status` | str \| null | Last-observed `
|
|
42
|
+
| `merge_state_status` | str \| null | Last-observed `mergeable_state` from `pull_request_read(method="get")` (e.g., `clean`, `dirty`, `blocked`, `behind`, `unknown`), or `null` before the first check. Reset on every push. `dirty` triggers the rebase invocation; non-`clean` non-`dirty` is a hard blocker per pr-converge `Stop conditions`. |
|
|
43
43
|
| `inline_lag_streak` | int | Consecutive ticks where bugbot's review body claims findings but inline-comments API returns zero rows for `current_head` |
|
|
44
44
|
| `tick_count` | int | Observability only — **no ceiling**; loop ends on convergence or **Stop conditions** in `pr-converge` |
|
|
45
45
|
|
|
@@ -48,9 +48,9 @@ Adds the same **traffic** fields whether they live in **`state.json`** or in the
|
|
|
48
48
|
| Mode | When it applies | Source of truth | `tick_count` bump |
|
|
49
49
|
|---|---|---|---|
|
|
50
50
|
| **`state.json`** | File exists at `<TMPDIR>/pr-converge-<session_id>/state.json` (multi-PR orchestration or other file-backed session) | JSON: top-level `session_id`; per-PR objects under `prs[<number>]` with `owner`, `repo`, `branch`, `phase`, `current_head`, `bugbot_clean_at`, `inline_lag_streak`, `tick_count`, `last_action`, `status`, `last_updated`. Optional sibling `converged.log` (append-only; multi-PR only). Writes use lock + atomic replace per skill **Concurrency** | **Orchestrator only** at tick start (locked merge for every non-terminal PR); **never** bump `tick_count` in Step 1 when this file is in use |
|
|
51
|
-
| **Conversation state line** | **No** `state.json` (typical single-PR `/pr-converge` in Cursor) | Persist **`phase`**, **`bugbot_clean_at`**, **`inline_lag_streak`**, **`tick_count`** as **plain text** in each assistant turn; next tick reads them from the **most recent assistant message**. **`current_head` is not serialized in that line** — re-resolve each tick via `
|
|
51
|
+
| **Conversation state line** | **No** `state.json` (typical single-PR `/pr-converge` in Cursor) | Persist **`phase`**, **`bugbot_clean_at`**, **`inline_lag_streak`**, **`tick_count`** as **plain text** in each assistant turn; next tick reads them from the **most recent assistant message**. **`current_head` is not serialized in that line** — re-resolve each tick via `pull_request_read(method="get")` (same contract as `skills/pr-converge/SKILL.md` § State across ticks). | **Step 1** increments `tick_count` in that line **only** when no `state.json` — must not double-count with any file-backed path |
|
|
52
52
|
|
|
53
|
-
**`status` (file-backed `prs[...]` only):** `fresh
|
|
53
|
+
**`status` (file-backed `prs[...]` only):** `fresh | in_progress | awaiting_bugbot | awaiting_bugteam | converged | blocked`
|
|
54
54
|
|
|
55
55
|
### monitor-many
|
|
56
56
|
|
|
@@ -60,9 +60,9 @@ Adds per-PR JSON state file at `~/.claude/skills/monitor-many/state/<owner>-<rep
|
|
|
60
60
|
|---|---|---|
|
|
61
61
|
| `repo_name` | str | Full `owner/repo` |
|
|
62
62
|
| `pr_number` | int | PR number |
|
|
63
|
-
| `status` | enum | `open
|
|
64
|
-
| `copilot_review` | enum | `none
|
|
65
|
-
| `bugbot_review` | enum | Same vocabulary as `copilot_review` |
|
|
63
|
+
| `status` | enum | `open`, `blocked_escalation`, `fixing`, `ready_candidate`, `closed` |
|
|
64
|
+
| `copilot_review` | enum | `none`, `requested`, `pending`, `commented`, `approved` |
|
|
65
|
+
| `bugbot_review` | enum | Same vocabulary as `copilot_review`; one of `none`, `requested`, `pending`, `commented`, `approved` |
|
|
66
66
|
| `last_seen_comment_id` | int \| null | Highest processed review-comment id (incremental polling watermark) |
|
|
67
67
|
| `review_comments` | list[object] | Optional cache; `{id, author, path, line}` per entry |
|
|
68
68
|
| `escalation_queue` | list[object] | Pending human-judgment items: `{comment_id, summary, created_at}` |
|
|
@@ -77,5 +77,5 @@ Adds per-PR JSON state file at `~/.claude/skills/monitor-many/state/<owner>-<rep
|
|
|
77
77
|
## Convergence checks
|
|
78
78
|
|
|
79
79
|
- bugteam, qbug: `last_action == "audited"` AND `last_findings.total == 0` → `converged`
|
|
80
|
-
- pr-converge: `bugbot_clean_at == current_head` AND most-recent bugteam exit is `converged` AND no push during the bugteam tick AND no outstanding Copilot findings on `current_head` AND `merge_state_status == "
|
|
80
|
+
- pr-converge: `bugbot_clean_at == current_head` AND most-recent bugteam exit is `converged` AND no push during the bugteam tick AND no outstanding Copilot findings on `current_head` AND `merge_state_status == "clean"` (per `skills/pr-converge/SKILL.md` § Convergence gates) → back-to-back clean → `update_pull_request(draft=false)` (read `current_head` / `bugbot_clean_at` / `copilot_clean_at` / `merge_state_status` from `state.json` when file-backed, else from the conversation state line and Step 1 `pull_request_read(method="get")` output)
|
|
81
81
|
- monitor-many: no unresolved comments requiring code changes AND required checks green AND review policy satisfied → `gh pr ready`
|
package/agents/clean-coder.md
CHANGED
|
@@ -436,6 +436,10 @@ This default is overridden by explicit user instruction such as "refactor this e
|
|
|
436
436
|
|
|
437
437
|
Docstrings on functions, methods, classes, and modules are encouraged for public APIs. The self-documenting-names gate inspects inline `#` and block `#` comments only; docstrings are exempt from that gate.
|
|
438
438
|
|
|
439
|
+
## Audit Awareness
|
|
440
|
+
|
|
441
|
+
Code clean-coder writes will be audited later against the A–K bug categories from `code-quality-agent`. The hooks listed in this file enforce the Category J slice at write time, but A–I and K (codebase conflicts / incomplete propagation) surface only in audit. For each category's full rubric, sub-bucket decomposition, and concrete checks, see `../audit-rubrics/category_rubrics/` (relative to this agent file). While generating code, anticipate the full A–K surface so the first write clears every audit category.
|
|
442
|
+
|
|
439
443
|
## What You Produce
|
|
440
444
|
|
|
441
445
|
Every line you write or modify will:
|
|
@@ -9,7 +9,7 @@ color: red
|
|
|
9
9
|
|
|
10
10
|
You audit a pull request diff for bugs and CODE_RULES.md compliance issues. You return findings; the orchestrator handles fixes.
|
|
11
11
|
|
|
12
|
-
**Announce at start:** "Using code-quality-agent — auditing diff against A–
|
|
12
|
+
**Announce at start:** "Using code-quality-agent — auditing diff against A–K categories with CODE_RULES.md awareness."
|
|
13
13
|
|
|
14
14
|
## Scope
|
|
15
15
|
|
|
@@ -19,8 +19,8 @@ Audit only added or modified lines in the diff. Pre-existing code on untouched l
|
|
|
19
19
|
|
|
20
20
|
This agent runs in one of two modes depending on the calling prompt:
|
|
21
21
|
|
|
22
|
-
- **Unscoped (default):** the prompt names no categories. Walk all of A through
|
|
23
|
-
- **Category-restricted:** the prompt names a subset of categories ("audit only category F" or "investigate only H, I, and
|
|
22
|
+
- **Unscoped (default):** the prompt names no categories. Walk all of A through K and produce Shape A/B for every category.
|
|
23
|
+
- **Category-restricted:** the prompt names a subset of categories ("audit only category F" or "investigate only H, I, and K"). Audit only the named categories and produce Shape A/B for those alone; skip the rest.
|
|
24
24
|
|
|
25
25
|
Tradeoff for callers picking the category-restricted mode: parallel category invocation loses cross-category reasoning. A security finding in Category H may inform a Category J classification, and a parallel split misses that connection. When categories need to inform each other, prefer the unscoped mode.
|
|
26
26
|
|
|
@@ -32,93 +32,31 @@ Preserve every existing comment. Findings on production code report only on new
|
|
|
32
32
|
|
|
33
33
|
Report findings only. Author zero edits. Author zero diffs. Run zero commits or pushes. The orchestrator (and the calling skill) handles fix application, commit creation, and PR posting based on your finding list.
|
|
34
34
|
|
|
35
|
-
## Bug Categories A–
|
|
35
|
+
## Bug Categories A–K
|
|
36
36
|
|
|
37
|
-
Every audit pass walks all
|
|
37
|
+
Every audit pass walks all eleven categories. Each category produces either at least one Shape A finding (concrete bug at a file:line) or at least one Shape B proof-of-absence entry (audited and clean, with adversarial probes documented). A category that returns neither is a protocol gap per the audit contract.
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
For each category's full description, examples, sub-bucket decomposition, and concrete checks, read the matching rubric in `../audit-rubrics/category_rubrics/`:
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
41
|
+
| Letter | Category | Reference file |
|
|
42
|
+
|---|---|---|
|
|
43
|
+
| A | API contract verification | `../audit-rubrics/category_rubrics/category-a-api-contracts.md` |
|
|
44
|
+
| B | Selector / query / engine compatibility | `../audit-rubrics/category_rubrics/category-b-selector-engine-compat.md` |
|
|
45
|
+
| C | Resource cleanup and lifecycle | `../audit-rubrics/category_rubrics/category-c-resource-cleanup.md` |
|
|
46
|
+
| D | Variable scoping, ordering, and unbound references | `../audit-rubrics/category_rubrics/category-d-scoping-and-ordering.md` |
|
|
47
|
+
| E | Dead code and unused imports | `../audit-rubrics/category_rubrics/category-e-dead-code.md` |
|
|
48
|
+
| F | Silent failures | `../audit-rubrics/category_rubrics/category-f-silent-failures.md` |
|
|
49
|
+
| G | Off-by-one, bounds, integer overflow | `../audit-rubrics/category_rubrics/category-g-bounds-and-overflow.md` |
|
|
50
|
+
| H | Security boundaries | `../audit-rubrics/category_rubrics/category-h-security-boundaries.md` |
|
|
51
|
+
| I | Concurrency hazards | `../audit-rubrics/category_rubrics/category-i-concurrency.md` |
|
|
52
|
+
| J | CODE_RULES.md compliance | `../audit-rubrics/category_rubrics/category-j-code-rules-compliance.md` |
|
|
53
|
+
| K | Codebase conflicts (incomplete propagation) | `../audit-rubrics/category_rubrics/category-k-codebase-conflicts.md` |
|
|
45
54
|
|
|
46
|
-
|
|
55
|
+
Test files (`test_*.py`, `*_test.py`, `*.test.*`, `*.spec.*`, `conftest.py`, and any path under `/tests/`) are exempt from category J. The exempt path families documented in the J reference also opt out of the constants-location sub-item.
|
|
47
56
|
|
|
48
|
-
|
|
49
|
-
- CSS selector uses a pseudo-class the target browser engine lacks.
|
|
50
|
-
- SQL uses a window function on a database version that lacks it.
|
|
51
|
-
- A regex flag is set in syntax that the engine treats as a literal character.
|
|
57
|
+
Category K Shape A findings always cite TWO line locations: the changed line and the unchanged-but-should-have-changed parallel line. The `failure_mode` field describes the contradiction between the two states. K is narrow but recurrent — linters and unit tests rarely catch these findings.
|
|
52
58
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
File handles, network connections, processes, locks, subscriptions.
|
|
56
|
-
- File opened in a function that returns before reaching `close()` or a `with` block.
|
|
57
|
-
- Database connection acquired without a release path on every error branch.
|
|
58
|
-
- Background task started without a cancellation hook.
|
|
59
|
-
|
|
60
|
-
### D. Variable scoping, ordering, and unbound references
|
|
61
|
-
|
|
62
|
-
Closures, variable hoisting, ordering of declarations, late binding in loops.
|
|
63
|
-
- Variable referenced before assignment on one branch.
|
|
64
|
-
- Loop closure captures the loop variable by reference where by-value capture is required.
|
|
65
|
-
- A name shadows an outer-scope variable the function still relies on.
|
|
66
|
-
|
|
67
|
-
### E. Dead code and unused imports
|
|
68
|
-
|
|
69
|
-
Imports the diff adds but leaves unreferenced; functions defined but uncalled; branches unreachable due to a prior return.
|
|
70
|
-
- New `import` line with zero corresponding references.
|
|
71
|
-
- A defined helper function whose call sites the diff also removed.
|
|
72
|
-
- Code after an unconditional `return` or `raise`.
|
|
73
|
-
|
|
74
|
-
### F. Silent failures
|
|
75
|
-
|
|
76
|
-
Catch-all excepts, unconditional success returns, missing error propagation.
|
|
77
|
-
- `except Exception: pass` swallows every error including programming bugs.
|
|
78
|
-
- A function returns `True` on the success path and `True` on every error path too.
|
|
79
|
-
- An async task error is logged while the caller continues as if it succeeded.
|
|
80
|
-
|
|
81
|
-
### G. Off-by-one, bounds, integer overflow
|
|
82
|
-
|
|
83
|
-
Loop bounds, slice indices, signed/unsigned overflow, floating-point comparison.
|
|
84
|
-
- `range(len(items) + 1)` walks one element past the end of the array.
|
|
85
|
-
- Timestamp arithmetic uses 32-bit integer math on a 64-bit value.
|
|
86
|
-
- `==` between floats where epsilon comparison is required.
|
|
87
|
-
|
|
88
|
-
### H. Security boundaries
|
|
89
|
-
|
|
90
|
-
Injection, path traversal, auth bypass, secret leakage.
|
|
91
|
-
- User input concatenated into SQL rather than parameterized.
|
|
92
|
-
- File path joined from untrusted input without normalization or root containment.
|
|
93
|
-
- Token, password, or API key written to a log line.
|
|
94
|
-
|
|
95
|
-
### I. Concurrency hazards
|
|
96
|
-
|
|
97
|
-
Race conditions, missing awaits, shared mutable state, lock ordering.
|
|
98
|
-
- Two coroutines append to the same list without synchronization.
|
|
99
|
-
- An `await` is missing on a critical-section operation.
|
|
100
|
-
- A lock is acquired in different orders on two code paths.
|
|
101
|
-
|
|
102
|
-
### J. CODE_RULES.md compliance
|
|
103
|
-
|
|
104
|
-
Hook-enforced and rubric-enforced rules from CODE_RULES.md. Every PR passes through `code_rules_enforcer.py`; flagging these in the audit prevents fix loops that the gate would otherwise trigger.
|
|
105
|
-
|
|
106
|
-
Sub-items the audit walks:
|
|
107
|
-
|
|
108
|
-
| Sub-item | What this rule looks for |
|
|
109
|
-
|---|---|
|
|
110
|
-
| Magic values | Literals other than `0`, `1`, `-1` inside production function bodies |
|
|
111
|
-
| String-template magic | f-strings whose structural literal text (paths, URLs, patterns) belongs in `config/` |
|
|
112
|
-
| Constants location | Module-level `UPPER_SNAKE = ...` outside `config/` in production code (exempt path families: `config/*`, `/migrations/`, `/workflow/`, `_tab.py`, `/states.py`, `/modules.py`, test files) |
|
|
113
|
-
| File-global use-count | A file-global constant referenced by fewer than two methods, functions, or classes in the same file |
|
|
114
|
-
| Abbreviations | `ctx`, `cfg`, `msg`, `btn`, `idx`, `cnt`, `elem`, `val`, `tmp`, `str`, `num`, `arr`, `obj`, `fn`, `cb`, `req`, `res` (single-letter loop counters and `e` for exceptions are exempt) |
|
|
115
|
-
| Vague-name list | `result`, `data`, `output`, `response`, `value`, `item`, `temp`, `info`, `stuff`, `thing`; vague prefixes: `handle`, `process`, `manage`, `do` |
|
|
116
|
-
| Type hints | Missing type annotation on a parameter or return; presence of `Any` or `# type: ignore` |
|
|
117
|
-
| New inline comments | New `#` or `//` comments in production code that the diff adds (existing comments are preserved untouched and stay outside scope) |
|
|
118
|
-
| Logging format | `log_*(f"...")` rather than `log_*("...", arg)` |
|
|
119
|
-
| Imports inside functions | `import` statements placed inside function bodies |
|
|
120
|
-
|
|
121
|
-
Test files (`test_*.py`, `*_test.py`, `*.test.*`, `*.spec.*`, `conftest.py`, and any path under `/tests/`) are exempt from category J. The exempt path families above also opt out of the constants-location sub-item.
|
|
59
|
+
For reusable Variant C audit prompts scoped to a single category, see `../audit-rubrics/prompts/`. **Each prompt file is a two-section artifact**: above the `---` separator is a PR/repo-INDEPENDENT generalized robust skeleton (full sub-bucket structure with `[BRACKETED_PLACEHOLDERS]` for `[REPO/ARTIFACT]`, `[TARGET_ID]`, `[INLINE THE FULL ARTIFACT HERE]`, etc.) — copy this and fill in for a new audit on any artifact. Below the separator is a worked example against an authentic PR — Category A's worked example is the literal May 2026 audit-experiment prompt against PR #394 (8–10 findings); Category K's worked example is against PR #397 r3210166636 (the K canonical case); Categories B–J are walked against PR #394. Use the skeleton to author a new prompt; read the worked example for depth-and-quality calibration.
|
|
122
60
|
|
|
123
61
|
## Output Schema
|
|
124
62
|
|
|
@@ -172,7 +110,7 @@ A bare verified-clean label is inadequate: every Shape B entry lists the files o
|
|
|
172
110
|
|
|
173
111
|
## Per-Category Expectation
|
|
174
112
|
|
|
175
|
-
Every category A through
|
|
113
|
+
Every category A through K is investigated. The output for each category is one of:
|
|
176
114
|
- one or more Shape A findings, or
|
|
177
115
|
- one Shape B proof-of-absence entry with concrete files, quoted lines, and adversarial probes.
|
|
178
116
|
|
package/agents/groq-coder.md
CHANGED
|
@@ -14,7 +14,7 @@ You are the FIX teammate for bugteam when `BUGTEAM_FIX_IMPLEMENTER=groq-coder`.
|
|
|
14
14
|
|
|
15
15
|
## Contract
|
|
16
16
|
|
|
17
|
-
You receive the standard bugteam FIX spawn XML documented in `skills/bugteam/PROMPTS.md`, including a `bugs_to_fix` block and a `<worktree_path>` to operate in. Outputs conform to the FIX outcome XML schema in the same file: `.bugteam-pr<N>-loop<L>.outcomes.xml` inside the worktree.
|
|
17
|
+
You receive the standard bugteam FIX spawn XML documented in `skills/bugteam/PROMPTS.md`, including a `bugs_to_fix` block and a `<worktree_path>` to operate in. Outputs conform to the FIX outcome XML schema in the same file: `.bugteam-pr<N>-loop<L>.fix-outcomes.xml` inside the worktree.
|
|
18
18
|
|
|
19
19
|
## Validation Gate (before any patch)
|
|
20
20
|
|
|
@@ -82,20 +82,22 @@ After Groq returns:
|
|
|
82
82
|
|
|
83
83
|
After all files have been patched (or skipped):
|
|
84
84
|
|
|
85
|
-
1.
|
|
86
|
-
2.
|
|
85
|
+
1. Run the project's test suite and confirm all existing tests pass. If a test fails, diagnose the regression and fix it before committing.
|
|
86
|
+
2. Read the previous loop's outcome XML (`<worktree_path>/.bugteam-pr<N>-loop<L-1>.outcomes.xml`) and obtain its total finding count. If this is the first loop (L <= 1) or the file does not exist, skip this comparison. Re-read each changed file and count any new violations. Compute the post-fix total: previous total minus bugs fixed in this round plus new violations. If the post-fix total exceeds the previous total, flag all new findings as same-loop fix-targets and revise before committing.
|
|
87
|
+
3. `git add` every patched file by explicit path — never `git add -A`.
|
|
88
|
+
4. `git commit` with a message summarizing the addressed findings. Example:
|
|
87
89
|
```
|
|
88
90
|
fix(groq-coder): address N findings from bugteam loop <L>
|
|
89
91
|
|
|
90
92
|
Findings: <comma-separated finding_ids>
|
|
91
93
|
```
|
|
92
94
|
Let every git hook run. Never pass `--no-verify`. Never pass `--no-gpg-sign`. If the commit is hook-blocked: capture stderr, write `status=hook_blocked` for every finding in this loop, populate `hook_output`, and return without retrying — the lead treats this loop as no-progress.
|
|
93
|
-
|
|
94
|
-
|
|
95
|
+
5. `git push` with a plain fast-forward push. If signing issues surface, stop and report to the user rather than bypassing.
|
|
96
|
+
6. For each finding, post a reply to its `finding_comment_id` via the Step 2.5 reply CLI shape from `skills/bugteam/SKILL.md`:
|
|
95
97
|
- `Fixed in <commit_sha>` when `status=fixed`.
|
|
96
98
|
- `Could not address this loop: <reason>` when `status=could_not_address`.
|
|
97
99
|
- `Hook blocked the fix commit: <one-line summary>` when `status=hook_blocked`.
|
|
98
|
-
|
|
100
|
+
7. Write `.bugteam-pr<N>-loop<L>.fix-outcomes.xml` inside `<worktree_path>` per the FIX outcome schema.
|
|
99
101
|
|
|
100
102
|
## Non-Negotiable Guardrails
|
|
101
103
|
|
|
File without changes
|
|
@@ -123,7 +123,7 @@ def main() -> None:
|
|
|
123
123
|
else:
|
|
124
124
|
skill_reference = (
|
|
125
125
|
"under research-mode constraints "
|
|
126
|
-
"(no research-mode skill installed; verify with sources or
|
|
126
|
+
"(no research-mode skill installed; verify with sources or prompt the user via AskUserQuestion with potential options + context)"
|
|
127
127
|
)
|
|
128
128
|
|
|
129
129
|
block_response = {
|
|
@@ -134,7 +134,7 @@ def main() -> None:
|
|
|
134
134
|
f"These words signal unverified claims. You MUST rewrite your response "
|
|
135
135
|
f"{skill_reference}\n\n"
|
|
136
136
|
f"Do NOT simply remove the hedging word and keep the unverified claim. "
|
|
137
|
-
f"
|
|
137
|
+
f"Do more research to VERIFY it with a source, or prompt the user via AskUserQuestion with some potential options + context if you are unable to find anything online.\n\n"
|
|
138
138
|
f"You MUST re-output the complete, revised response with the corrections applied."
|
|
139
139
|
),
|
|
140
140
|
"systemMessage": USER_FACING_NOTICE,
|