claude-dev-env 1.37.1 → 1.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/CLAUDE.md +3 -0
  2. package/_shared/pr-loop/audit-contract.md +4 -3
  3. package/_shared/pr-loop/fix-protocol.md +2 -0
  4. package/_shared/pr-loop/gh-payloads.md +38 -37
  5. package/_shared/pr-loop/scripts/README.md +0 -1
  6. package/_shared/pr-loop/scripts/preflight.py +2 -1
  7. package/_shared/pr-loop/scripts/tests/test_code_rules_gate.py +2 -2
  8. package/_shared/pr-loop/scripts/tests/test_preflight.py +22 -0
  9. package/_shared/pr-loop/state-schema.md +10 -10
  10. package/agents/clean-coder.md +4 -0
  11. package/agents/code-quality-agent.md +23 -85
  12. package/agents/groq-coder.md +8 -6
  13. package/hooks/blocking/__init__.py +0 -0
  14. package/hooks/blocking/hedging_language_blocker.py +2 -2
  15. package/hooks/blocking/state_description_blocker.py +243 -0
  16. package/hooks/blocking/tdd_enforcer.py +94 -0
  17. package/hooks/blocking/test_hedging_language_blocker.py +1 -1
  18. package/hooks/blocking/test_state_description_blocker.py +618 -0
  19. package/hooks/blocking/test_tdd_enforcer.py +152 -0
  20. package/hooks/config/state_description_blocker_constants.py +130 -0
  21. package/hooks/hooks.json +10 -0
  22. package/package.json +1 -1
  23. package/rules/no-historical-clutter.md +31 -10
  24. package/scripts/config/groq_bugteam_config.py +13 -5
  25. package/skills/bugteam/CONSTRAINTS.md +20 -27
  26. package/skills/bugteam/EXAMPLES.md +1 -1
  27. package/skills/bugteam/PROMPTS.md +60 -31
  28. package/skills/bugteam/SKILL.md +47 -47
  29. package/skills/bugteam/SKILL_EVALS.md +8 -8
  30. package/skills/bugteam/reference/github-pr-reviews.md +31 -31
  31. package/skills/bugteam/reference/team-setup.md +1 -1
  32. package/skills/bugteam/reference/teardown-publish-permissions.md +4 -4
  33. package/skills/copilot-review/SKILL.md +7 -14
  34. package/skills/findbugs/SKILL.md +2 -2
  35. package/skills/fixbugs/SKILL.md +1 -1
  36. package/skills/monitor-open-prs/SKILL.md +6 -6
  37. package/skills/pr-converge/SKILL.md +7 -6
  38. package/skills/pr-converge/reference/convergence-gates.md +28 -30
  39. package/skills/pr-converge/reference/examples.md +4 -4
  40. package/skills/pr-converge/reference/fix-protocol.md +6 -8
  41. package/skills/pr-converge/reference/multi-pr-orchestration.md +10 -10
  42. package/skills/pr-converge/reference/per-tick.md +18 -33
  43. package/skills/pr-converge/reference/stop-conditions.md +7 -7
  44. package/skills/pr-converge/scripts/README.md +65 -117
  45. package/skills/pr-review-responder/EXAMPLES.md +2 -2
  46. package/skills/pr-review-responder/PRINCIPLES.md +2 -8
  47. package/skills/pr-review-responder/README.md +7 -48
  48. package/skills/pr-review-responder/SKILL.md +2 -3
  49. package/skills/pr-review-responder/TESTING.md +8 -65
  50. package/skills/qbug/SKILL.md +10 -16
  51. package/_shared/pr-loop/scripts/config/gh_util_constants.py +0 -31
  52. package/_shared/pr-loop/scripts/gh_util.py +0 -193
  53. package/_shared/pr-loop/scripts/tests/test_gh_util.py +0 -257
  54. package/_shared/pr-loop/scripts/tests/test_gh_util_constants.py +0 -61
  55. package/skills/pr-converge/scripts/check_pr_mergeability.py +0 -78
  56. package/skills/pr-converge/scripts/config/pr_converge_constants.py +0 -134
  57. package/skills/pr-converge/scripts/config/test_pr_converge_constants.py +0 -152
  58. package/skills/pr-converge/scripts/fetch_bugbot_inline_comments.py +0 -70
  59. package/skills/pr-converge/scripts/fetch_bugbot_reviews.py +0 -57
  60. package/skills/pr-converge/scripts/fetch_claude_inline_comments.py +0 -70
  61. package/skills/pr-converge/scripts/fetch_claude_reviews.py +0 -61
  62. package/skills/pr-converge/scripts/fetch_copilot_inline_comments.py +0 -70
  63. package/skills/pr-converge/scripts/fetch_copilot_reviews.py +0 -61
  64. package/skills/pr-converge/scripts/mark_pr_ready.py +0 -54
  65. package/skills/pr-converge/scripts/post-bugbot-run.helpers.ps1 +0 -49
  66. package/skills/pr-converge/scripts/post-bugbot-run.ps1 +0 -33
  67. package/skills/pr-converge/scripts/reply_to_inline_comment.py +0 -84
  68. package/skills/pr-converge/scripts/request_copilot_review.py +0 -71
  69. package/skills/pr-converge/scripts/resolve_pr_head.py +0 -58
  70. package/skills/pr-converge/scripts/review_field_helpers.py +0 -43
  71. package/skills/pr-converge/scripts/reviewer_fetch_core.py +0 -153
  72. package/skills/pr-converge/scripts/reviewer_specs.py +0 -98
  73. package/skills/pr-converge/scripts/test_check_pr_mergeability.py +0 -126
  74. package/skills/pr-converge/scripts/test_fetch_bugbot_inline_comments.py +0 -443
  75. package/skills/pr-converge/scripts/test_fetch_bugbot_reviews.py +0 -299
  76. package/skills/pr-converge/scripts/test_fetch_claude_inline_comments.py +0 -485
  77. package/skills/pr-converge/scripts/test_fetch_claude_reviews.py +0 -368
  78. package/skills/pr-converge/scripts/test_fetch_copilot_inline_comments.py +0 -440
  79. package/skills/pr-converge/scripts/test_fetch_copilot_reviews.py +0 -366
  80. package/skills/pr-converge/scripts/test_mark_pr_ready.py +0 -69
  81. package/skills/pr-converge/scripts/test_post_bugbot_run.py +0 -195
  82. package/skills/pr-converge/scripts/test_reply_to_inline_comment.py +0 -159
  83. package/skills/pr-converge/scripts/test_request_copilot_review.py +0 -101
  84. package/skills/pr-converge/scripts/test_resolve_pr_head.py +0 -79
  85. package/skills/pr-converge/scripts/test_review_field_helpers.py +0 -80
  86. package/skills/pr-converge/scripts/test_reviewer_fetch_core.py +0 -448
  87. package/skills/pr-converge/scripts/test_reviewer_specs.py +0 -107
  88. package/skills/pr-converge/scripts/test_trigger_bugbot.py +0 -139
  89. package/skills/pr-converge/scripts/test_view_pr_context.py +0 -155
  90. package/skills/pr-converge/scripts/trigger_bugbot.py +0 -77
  91. package/skills/pr-converge/scripts/view_pr_context.py +0 -78
  92. package/skills/pr-review-responder/scripts/respond_to_reviews.py +0 -376
package/CLAUDE.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Claude Development Assistant
2
2
 
3
+ The user delegates execution to you and expects zero manual steps unless strictly necessary. Execute every command you can directly. Only instruct the user to do something manually when you are technically unable to do it yourself. When a task involves credentials or other sensitive input, display a minimal secure UI (e.g., a password dialog) to collect it rather than asking the user to paste it into chat or run the command themselves. When direction is ambiguous, use AskUserQuestion to clarify before acting.
4
+
3
5
  ## Code Rules
4
6
  @~/.claude/docs/CODE_RULES.md
5
7
 
@@ -42,6 +44,7 @@ Full rule including the reconciliation with Right-Sized Engineering, misapplicat
42
44
 
43
45
  ## Tool Policies
44
46
  - **context7:** Before writing code using any library/framework/SDK/API, call `resolve-library-id` then `query-docs` via Context7 MCP. Use the fetched docs to write code. Applies to all libs including React, Next.js, Django, Express, Prisma.
47
+ - **gh MCP:** Always use `mcp__plugin_github_github__*` tools for any GitHub operations (branches, PRs, file operations). Do not use the `Bash` tool to invoke `gh` or `git` CLI for GitHub operations.
45
48
 
46
49
  ## Compaction
47
50
  When compacting, always preserve:
@@ -121,9 +121,10 @@ Sequence:
121
121
  4. Compute `fix_diff` against pre-fix contents for the modified set.
122
122
  5. Run `bugteam_code_rules_gate.py` with explicit paths for every modified file.
123
123
  6. Spawn a scoped audit of `fix_diff` with full A–J rigor, Shape A/B contract, adversarial pass, AND Haiku secondary in parallel (paranoid mode on post-fix).
124
- 7. Any new findings become same-loop fix-targets. Internal iteration count increments by one.
125
- 8. After 3 internal iterations with fresh findings each time, exit `stuck: post-fix audit not converging`.
126
- 9. Only when `gate_findings` empty AND `post_fix_findings` empty: `git add`, commit, push.
124
+ 7. Read the previous loop's outcome XML (`<worktree_path>/.bugteam-pr<N>-loop<L-1>.outcomes.xml`) and obtain its total finding count. If this is the first loop (L <= 1) or the file does not exist, skip this comparison. Compute the post-fix total: previous total minus bugs fixed in this round plus new violations found in the post-fix audit (step 6). If the post-fix total exceeds the previous total, flag all new findings as same-loop fix-targets and revise. An increase in total findings across loop transitions is a regression.
125
+ 8. Any new findings become same-loop fix-targets. Internal iteration count increments by one.
126
+ 9. After 3 internal iterations with fresh findings each time, exit `stuck: post-fix audit not converging`.
127
+ 10. Only when `gate_findings` empty AND `post_fix_findings` empty: `git add`, commit, push.
127
128
 
128
129
  `converged` exit condition: `primary_audit_clean AND post_fix_audit_clean` for the committing loop.
129
130
 
@@ -35,3 +35,5 @@ After step 11, when `git rev-parse HEAD` is unchanged from `pre_fix_sha`, the fi
35
35
  - Append commits; the branch stays linear (one commit per fix loop, fast-forward push only).
36
36
  - No comment deletion on lines left unchanged.
37
37
  - No `--no-verify`. Hook rejections flag real underlying issues worth investigating.
38
+ - **Narrow scope.** Fix only the exact defect at the specified file:line. No structural refactoring, no inlining helpers.
39
+ - **Preserve helpers.** Do not remove or inline existing helper functions unless the finding explicitly names them.
@@ -1,33 +1,28 @@
1
- # gh API payloads
1
+ # MCP-based payloads
2
2
 
3
3
  Shared payload shapes for posting PR reviews and replies. Used by `bugteam`, `qbug`, `pr-converge`, `monitor-many`.
4
4
 
5
- ## Build payloads with jq + gh api --input
5
+ ## Build payloads with MCP tools
6
6
 
7
- Build JSON with `jq --rawfile` / `-Rs` reading per-finding markdown bodies from temp files; pipe to `gh api ... --input -`. Avoids shell-quoting hazards and satisfies the `gh-body-backtick-guard` hook.
7
+ Build payloads as structured arguments to MCP tools. Body content passes as a string parameter directly.
8
8
 
9
9
  ## One review per loop
10
10
 
11
- POST to `repos/<owner>/<repo>/pulls/<number>/reviews` once per audit loop. Payload: `event: "COMMENT"`, the review body, and one `comments[]` object per anchored finding.
12
-
13
- ```bash
14
- jq -n \
15
- --rawfile review_body <tmp_review_body.md> \
16
- --arg commit_id "$(git rev-parse HEAD)" \
17
- --rawfile finding_body_1 <tmp_finding_1.md> \
18
- --arg path_1 "<file_1>" \
19
- --argjson line_1 <line_1> \
20
- [... one finding_body_K / path_K / line_K triple per finding ...] \
21
- '{
22
- commit_id: $commit_id,
23
- event: "COMMENT",
24
- body: $review_body,
25
- comments: [
26
- {path: $path_1, line: $line_1, side: "RIGHT", body: $finding_body_1}
27
- [, ... ]
28
- ]
29
- }' \
30
- | gh api repos/<owner>/<repo>/pulls/<number>/reviews -X POST --input -
11
+ Call `pull_request_review_write` once per audit loop. Payload: `event: "COMMENT"`, the review body, and one `comments[]` object per anchored finding.
12
+
13
+ ```
14
+ pull_request_review_write(
15
+ method="create",
16
+ event="COMMENT",
17
+ body=review_body,
18
+ commitID=head_sha,
19
+ owner=owner,
20
+ repo=repo,
21
+ pullNumber=pull_number,
22
+ comments=[
23
+ {path: file_path, line: line_number, side: "RIGHT", body: finding_body}
24
+ ]
25
+ )
31
26
  ```
32
27
 
33
28
  Single-line anchors: `{path, line, side: "RIGHT", body}`. Multi-line anchors add `start_line` and `start_side: "RIGHT"`.
@@ -48,11 +43,16 @@ Zero findings still post one review. Body line: `## /<workflow> loop <N> audit:
48
43
 
49
44
  ## Reply to a finding
50
45
 
51
- POST to `repos/<owner>/<repo>/pulls/<number>/comments/<finding_comment_id>/replies`:
46
+ Call `add_reply_to_pull_request_comment` with the finding comment ID and reply body:
52
47
 
53
- ```bash
54
- jq -Rs '{body: .}' <tmp_reply.md \
55
- | gh api repos/<owner>/<repo>/pulls/<number>/comments/<finding_comment_id>/replies -X POST --input -
48
+ ```
49
+ add_reply_to_pull_request_comment(
50
+ commentId=finding_comment_id,
51
+ body=reply_body,
52
+ owner=owner,
53
+ repo=repo,
54
+ pullNumber=pull_number
55
+ )
56
56
  ```
57
57
 
58
58
  ## Anchor fallback (line not in diff)
@@ -61,25 +61,26 @@ Lines not in the PR diff cannot anchor an inline comment. Omit them from `commen
61
61
 
62
62
  ## Review POST failure fallback (issue comment)
63
63
 
64
- When the review POST fails, post one issue comment carrying the full review body to `repos/<owner>/<repo>/issues/<number>/comments`:
64
+ When the review POST fails, call `add_issue_comment` with the full review body:
65
65
 
66
- ```bash
67
- jq -Rs '{body: .}' <tmp_fallback.md \
68
- | gh api repos/<owner>/<repo>/issues/<number>/comments -X POST --input -
66
+ ```
67
+ add_issue_comment(
68
+ owner=owner,
69
+ repo=repo,
70
+ issueNumber=pull_number,
71
+ body=fallback_body
72
+ )
69
73
  ```
70
74
 
71
75
  All findings in the loop record `used_fallback="true"`; `finding_comment_url` = issue comment URL.
72
76
 
73
77
  ## Endpoints
74
78
 
75
- - Review POST: `repos/{owner}/{repo}/pulls/{pull}/reviews`
76
- - Reply POST: `repos/{owner}/{repo}/pulls/{pull}/comments/{id}/replies`
77
- - Fallback issue comment: `repos/{owner}/{repo}/issues/{issue}/comments` (`issue` = PR number)
79
+ - Review: `pull_request_review_write(method="create", ...)`
80
+ - Reply: `add_reply_to_pull_request_comment(...)`
81
+ - Fallback issue comment: `add_issue_comment(...)`
78
82
 
79
83
  ## SHA capture timing
80
84
 
81
85
  `commit_id` and any `<head_sha_at_post_time>` reference: `git rev-parse HEAD` immediately before the POST, in the cwd of whichever subagent or process is posting.
82
86
 
83
- ## Body file UTF-8 encoding
84
-
85
- Write each markdown body to a temp file via the BOM-free PowerShell pattern (`[IO.File]::WriteAllText($path, $content, [Text.UTF8Encoding]::new($false))`) before `gh api` consumes it. See `~/.claude/rules/gh-body-file.md`.
@@ -9,7 +9,6 @@ Runnable helpers used by **bugteam**, **qbug**, **pr-converge**, and related ski
9
9
  | `preflight.py` | Local checks before a PR-loop run (pytest discovery, optional pre-commit, hooksPath sanity). |
10
10
  | `code_rules_gate.py` | CODE_RULES gate over PR-scoped diffs (`--base`, staged-only, path filters). |
11
11
  | `fix_hookspath.py` | Repair `core.hooksPath` when it does not point at the packaged git-hooks tree. |
12
- | `gh_util.py` | GitHub CLI helpers (pagination-safe JSON parsing, review fetches). |
13
12
  | `grant_project_claude_permissions.py` / `revoke_project_claude_permissions.py` | Claude Code permission JSON helpers used during publish-style flows. |
14
13
  | `_claude_permissions_common.py` | Shared implementation for the permission scripts. |
15
14
 
@@ -378,6 +378,7 @@ def main(all_arguments: list[str]) -> int:
378
378
  hooks_path_exit_code = verify_git_hooks_path(repository_root)
379
379
  if hooks_path_exit_code != 0:
380
380
  return hooks_path_exit_code
381
+ discovery_result: bool | None = True
381
382
  if not arguments.no_pytest and has_pytest_configuration(repository_root):
382
383
  discovery_result = has_discoverable_tests(repository_root)
383
384
  if discovery_result is None:
@@ -433,7 +434,7 @@ def main(all_arguments: list[str]) -> int:
433
434
  exit_code = run_pytest(repository_root, arguments.verbose)
434
435
  if exit_code != 0:
435
436
  return exit_code
436
- elif not arguments.no_pytest:
437
+ elif not arguments.no_pytest and discovery_result is not False:
437
438
  print(
438
439
  "bugteam_preflight: no pytest configuration found; skipping pytest.",
439
440
  file=sys.stderr,
@@ -537,8 +537,8 @@ def test_check_wrapper_plumb_through_accepts_positional_or_keyword_forwarder() -
537
537
 
538
538
  When a wrapper exposes the delegate's optional kwarg as a positional-or-keyword
539
539
  parameter with a default value and forwards it correctly, the check must produce
540
- zero findings. This mirrors the live gh_util.fetch_inline_review_comments
541
- run_gh signature pairing on this PR.
540
+ zero findings. This mirrors a wrapper/delegate signature pairing
541
+ where the wrapper exposes the delegate's optional kwarg.
542
542
  """
543
543
  source = (
544
544
  "def run_gh(all_command, *, timeout_seconds=30):\n"
@@ -651,6 +651,28 @@ def test_main_does_not_print_no_related_tests_when_get_changed_files_returns_non
651
651
  assert "no related tests found" not in captured.err
652
652
 
653
653
 
654
+ def test_main_should_not_print_no_pytest_config_when_pytest_configured_but_no_tests(
655
+ capsys: pytest.CaptureFixture[str],
656
+ ) -> None:
657
+ """When pytest is configured but no tests are found, main must not print
658
+ the misleading 'no pytest configuration found' message."""
659
+ with (
660
+ patch.object(preflight, "verify_git_hooks_path", return_value=0),
661
+ patch.object(preflight, "has_pytest_configuration", return_value=True),
662
+ patch.object(preflight, "has_discoverable_tests", return_value=False),
663
+ ):
664
+ exit_code = preflight.main([])
665
+ assert exit_code == 0
666
+ captured = capsys.readouterr()
667
+ assert "bugteam_preflight: pytest configured but no tests found" in captured.err, (
668
+ "Must print the correct message about configured pytest with no tests"
669
+ )
670
+ assert "bugteam_preflight: no pytest configuration found" not in captured.err, (
671
+ "Must not print the misleading 'no pytest configuration found' message "
672
+ "when pytest IS configured"
673
+ )
674
+
675
+
654
676
  def test_main_prints_no_related_tests_when_get_changed_files_returns_empty(
655
677
  capsys: pytest.CaptureFixture[str],
656
678
  ) -> None:
@@ -7,7 +7,7 @@ State each PR-loop workflow tracks across iterations. Workflows differ on persis
7
7
  | Field | Type | Purpose |
8
8
  |---|---|---|
9
9
  | `loop_count` | int | Iterations completed; bumps on each AUDIT or tick |
10
- | `last_action` | enum | `fresh` \| `audited` \| `fixed` — drives next-step dispatch |
10
+ | `last_action` | enum | `fresh`, `audited`, `fixed` — drives next-step dispatch |
11
11
  | `last_findings` | object | `{p0, p1, p2, total}` count of findings from most recent AUDIT |
12
12
  | `audit_log` | list[str] | Per-iteration one-line summaries for the final report |
13
13
  | `starting_sha` | str | `git rev-parse HEAD` at workflow start |
@@ -35,11 +35,11 @@ Adds the same **traffic** fields whether they live in **`state.json`** or in the
35
35
 
36
36
  | Field | Type | Purpose |
37
37
  |---|---|---|
38
- | `phase` | enum | `BUGBOT` \| `BUGTEAM` — which reviewer the current tick drives |
39
- | `current_head` | str | PR `headRefOid` / `git rev-parse` for the PR under work (each tick; from `view_pr_context.py` when no file store) |
38
+ | `phase` | enum | `BUGBOT`, `BUGTEAM` — which reviewer the current tick drives |
39
+ | `current_head` | str | PR `.head.sha` / `git rev-parse` for the PR under work (each tick; from `pull_request_read(method="get")` when no file store) |
40
40
  | `bugbot_clean_at` | str \| null | HEAD SHA at which Cursor Bugbot last reported clean, or `null` (reset on every push) |
41
41
  | `copilot_clean_at` | str \| null | HEAD SHA at which the GitHub Copilot reviewer (`copilot-pull-request-reviewer[bot]`) last reported clean (review `state == "APPROVED"`), or `null`. Reset on every push. Convergence gates require this equals `current_head` after bugbot+bugteam are clean (see `skills/pr-converge/SKILL.md` § Convergence gates). |
42
- | `merge_state_status` | str \| null | Last-observed `mergeStateStatus` from `gh pr view --json mergeable,mergeStateStatus,headRefOid` (e.g., `CLEAN`, `DIRTY`, `BLOCKED`, `BEHIND`, `UNKNOWN`), or `null` before the first check. Reset on every push. `DIRTY` triggers the rebase invocation; non-`CLEAN` non-`DIRTY` is a hard blocker per pr-converge `Stop conditions`. |
42
+ | `merge_state_status` | str \| null | Last-observed `mergeable_state` from `pull_request_read(method="get")` (e.g., `clean`, `dirty`, `blocked`, `behind`, `unknown`), or `null` before the first check. Reset on every push. `dirty` triggers the rebase invocation; non-`clean` non-`dirty` is a hard blocker per pr-converge `Stop conditions`. |
43
43
  | `inline_lag_streak` | int | Consecutive ticks where bugbot's review body claims findings but inline-comments API returns zero rows for `current_head` |
44
44
  | `tick_count` | int | Observability only — **no ceiling**; loop ends on convergence or **Stop conditions** in `pr-converge` |
45
45
 
@@ -48,9 +48,9 @@ Adds the same **traffic** fields whether they live in **`state.json`** or in the
48
48
  | Mode | When it applies | Source of truth | `tick_count` bump |
49
49
  |---|---|---|---|
50
50
  | **`state.json`** | File exists at `<TMPDIR>/pr-converge-<session_id>/state.json` (multi-PR orchestration or other file-backed session) | JSON: top-level `session_id`; per-PR objects under `prs[<number>]` with `owner`, `repo`, `branch`, `phase`, `current_head`, `bugbot_clean_at`, `inline_lag_streak`, `tick_count`, `last_action`, `status`, `last_updated`. Optional sibling `converged.log` (append-only; multi-PR only). Writes use lock + atomic replace per skill **Concurrency** | **Orchestrator only** at tick start (locked merge for every non-terminal PR); **never** bump `tick_count` in Step 1 when this file is in use |
51
- | **Conversation state line** | **No** `state.json` (typical single-PR `/pr-converge` in Cursor) | Persist **`phase`**, **`bugbot_clean_at`**, **`inline_lag_streak`**, **`tick_count`** as **plain text** in each assistant turn; next tick reads them from the **most recent assistant message**. **`current_head` is not serialized in that line** — re-resolve each tick via `view_pr_context.py` (same contract as `skills/pr-converge/SKILL.md` § State across ticks). | **Step 1** increments `tick_count` in that line **only** when no `state.json` — must not double-count with any file-backed path |
51
+ | **Conversation state line** | **No** `state.json` (typical single-PR `/pr-converge` in Cursor) | Persist **`phase`**, **`bugbot_clean_at`**, **`inline_lag_streak`**, **`tick_count`** as **plain text** in each assistant turn; next tick reads them from the **most recent assistant message**. **`current_head` is not serialized in that line** — re-resolve each tick via `pull_request_read(method="get")` (same contract as `skills/pr-converge/SKILL.md` § State across ticks). | **Step 1** increments `tick_count` in that line **only** when no `state.json` — must not double-count with any file-backed path |
52
52
 
53
- **`status` (file-backed `prs[...]` only):** `fresh` \| `in_progress` \| `awaiting_bugbot` \| `awaiting_bugteam` \| `converged` \| `blocked`
53
+ **`status` (file-backed `prs[...]` only):** `fresh | in_progress | awaiting_bugbot | awaiting_bugteam | converged | blocked`
54
54
 
55
55
  ### monitor-many
56
56
 
@@ -60,9 +60,9 @@ Adds per-PR JSON state file at `~/.claude/skills/monitor-many/state/<owner>-<rep
60
60
  |---|---|---|
61
61
  | `repo_name` | str | Full `owner/repo` |
62
62
  | `pr_number` | int | PR number |
63
- | `status` | enum | `open` \| `blocked_escalation` \| `fixing` \| `ready_candidate` \| `closed` |
64
- | `copilot_review` | enum | `none` \| `requested` \| `pending` \| `commented` \| `approved` |
65
- | `bugbot_review` | enum | Same vocabulary as `copilot_review` |
63
+ | `status` | enum | `open`, `blocked_escalation`, `fixing`, `ready_candidate`, `closed` |
64
+ | `copilot_review` | enum | `none`, `requested`, `pending`, `commented`, `approved` |
65
+ | `bugbot_review` | enum | Same vocabulary as `copilot_review`; one of `none`, `requested`, `pending`, `commented`, `approved` |
66
66
  | `last_seen_comment_id` | int \| null | Highest processed review-comment id (incremental polling watermark) |
67
67
  | `review_comments` | list[object] | Optional cache; `{id, author, path, line}` per entry |
68
68
  | `escalation_queue` | list[object] | Pending human-judgment items: `{comment_id, summary, created_at}` |
@@ -77,5 +77,5 @@ Adds per-PR JSON state file at `~/.claude/skills/monitor-many/state/<owner>-<rep
77
77
  ## Convergence checks
78
78
 
79
79
  - bugteam, qbug: `last_action == "audited"` AND `last_findings.total == 0` → `converged`
80
- - pr-converge: `bugbot_clean_at == current_head` AND most-recent bugteam exit is `converged` AND no push during the bugteam tick AND no outstanding Copilot findings on `current_head` AND `merge_state_status == "CLEAN"` (per `skills/pr-converge/SKILL.md` § Convergence gates) → back-to-back clean → `gh pr ready` (read `current_head` / `bugbot_clean_at` / `copilot_clean_at` / `merge_state_status` from `state.json` when file-backed, else from the conversation state line and Step 1 `view_pr_context.py` output)
80
+ - pr-converge: `bugbot_clean_at == current_head` AND most-recent bugteam exit is `converged` AND no push during the bugteam tick AND no outstanding Copilot findings on `current_head` AND `merge_state_status == "clean"` (per `skills/pr-converge/SKILL.md` § Convergence gates) → back-to-back clean → `update_pull_request(draft=false)` (read `current_head` / `bugbot_clean_at` / `copilot_clean_at` / `merge_state_status` from `state.json` when file-backed, else from the conversation state line and Step 1 `pull_request_read(method="get")` output)
81
81
  - monitor-many: no unresolved comments requiring code changes AND required checks green AND review policy satisfied → `gh pr ready`
@@ -436,6 +436,10 @@ This default is overridden by explicit user instruction such as "refactor this e
436
436
 
437
437
  Docstrings on functions, methods, classes, and modules are encouraged for public APIs. The self-documenting-names gate inspects inline `#` and block `#` comments only; docstrings are exempt from that gate.
438
438
 
439
+ ## Audit Awareness
440
+
441
+ Code clean-coder writes will be audited later against the A–K bug categories from `code-quality-agent`. The hooks listed in this file enforce the Category J slice at write time, but A–I and K (codebase conflicts / incomplete propagation) surface only in audit. For each category's full rubric, sub-bucket decomposition, and concrete checks, see `../audit-rubrics/category_rubrics/` (relative to this agent file). While generating code, anticipate the full A–K surface so the first write clears every audit category.
442
+
439
443
  ## What You Produce
440
444
 
441
445
  Every line you write or modify will:
@@ -9,7 +9,7 @@ color: red
9
9
 
10
10
  You audit a pull request diff for bugs and CODE_RULES.md compliance issues. You return findings; the orchestrator handles fixes.
11
11
 
12
- **Announce at start:** "Using code-quality-agent — auditing diff against A–J categories with CODE_RULES.md awareness."
12
+ **Announce at start:** "Using code-quality-agent — auditing diff against A–K categories with CODE_RULES.md awareness."
13
13
 
14
14
  ## Scope
15
15
 
@@ -19,8 +19,8 @@ Audit only added or modified lines in the diff. Pre-existing code on untouched l
19
19
 
20
20
  This agent runs in one of two modes depending on the calling prompt:
21
21
 
22
- - **Unscoped (default):** the prompt names no categories. Walk all of A through J and produce Shape A/B for every category.
23
- - **Category-restricted:** the prompt names a subset of categories ("audit only category F" or "investigate only H, I, and J"). Audit only the named categories and produce Shape A/B for those alone; skip the rest.
22
+ - **Unscoped (default):** the prompt names no categories. Walk all of A through K and produce Shape A/B for every category.
23
+ - **Category-restricted:** the prompt names a subset of categories ("audit only category F" or "investigate only H, I, and K"). Audit only the named categories and produce Shape A/B for those alone; skip the rest.
24
24
 
25
25
  Tradeoff for callers picking the category-restricted mode: parallel category invocation loses cross-category reasoning. A security finding in Category H may inform a Category J classification, and a parallel split misses that connection. When categories need to inform each other, prefer the unscoped mode.
26
26
 
@@ -32,93 +32,31 @@ Preserve every existing comment. Findings on production code report only on new
32
32
 
33
33
  Report findings only. Author zero edits. Author zero diffs. Run zero commits or pushes. The orchestrator (and the calling skill) handles fix application, commit creation, and PR posting based on your finding list.
34
34
 
35
- ## Bug Categories A–J
35
+ ## Bug Categories A–K
36
36
 
37
- Every audit pass walks all ten categories. Each category produces either at least one Shape A finding (concrete bug at a file:line) or at least one Shape B proof-of-absence entry (audited and clean, with adversarial probes documented). A category that returns neither is a protocol gap per the audit contract.
37
+ Every audit pass walks all eleven categories. Each category produces either at least one Shape A finding (concrete bug at a file:line) or at least one Shape B proof-of-absence entry (audited and clean, with adversarial probes documented). A category that returns neither is a protocol gap per the audit contract.
38
38
 
39
- ### A. API contract verification
39
+ For each category's full description, examples, sub-bucket decomposition, and concrete checks, read the matching rubric in `../audit-rubrics/category_rubrics/`:
40
40
 
41
- Function signatures, return types, async/await correctness, callback shape compatibility.
42
- - A call site passes positional arguments that the callee expects as keyword arguments.
43
- - `await` is missing on a function that returns a coroutine.
44
- - Return type annotated as `bool` while a code path returns `None`.
41
+ | Letter | Category | Reference file |
42
+ |---|---|---|
43
+ | A | API contract verification | `../audit-rubrics/category_rubrics/category-a-api-contracts.md` |
44
+ | B | Selector / query / engine compatibility | `../audit-rubrics/category_rubrics/category-b-selector-engine-compat.md` |
45
+ | C | Resource cleanup and lifecycle | `../audit-rubrics/category_rubrics/category-c-resource-cleanup.md` |
46
+ | D | Variable scoping, ordering, and unbound references | `../audit-rubrics/category_rubrics/category-d-scoping-and-ordering.md` |
47
+ | E | Dead code and unused imports | `../audit-rubrics/category_rubrics/category-e-dead-code.md` |
48
+ | F | Silent failures | `../audit-rubrics/category_rubrics/category-f-silent-failures.md` |
49
+ | G | Off-by-one, bounds, integer overflow | `../audit-rubrics/category_rubrics/category-g-bounds-and-overflow.md` |
50
+ | H | Security boundaries | `../audit-rubrics/category_rubrics/category-h-security-boundaries.md` |
51
+ | I | Concurrency hazards | `../audit-rubrics/category_rubrics/category-i-concurrency.md` |
52
+ | J | CODE_RULES.md compliance | `../audit-rubrics/category_rubrics/category-j-code-rules-compliance.md` |
53
+ | K | Codebase conflicts (incomplete propagation) | `../audit-rubrics/category_rubrics/category-k-codebase-conflicts.md` |
45
54
 
46
- ### B. Selector / query / engine compatibility
55
+ Test files (`test_*.py`, `*_test.py`, `*.test.*`, `*.spec.*`, `conftest.py`, and any path under `/tests/`) are exempt from category J. The exempt path families documented in the J reference also opt out of the constants-location sub-item.
47
56
 
48
- CSS selectors, SQL queries, DOM queries, search-engine syntax incompatibility with the runtime in use.
49
- - CSS selector uses a pseudo-class the target browser engine lacks.
50
- - SQL uses a window function on a database version that lacks it.
51
- - A regex flag is set in syntax that the engine treats as a literal character.
57
+ Category K Shape A findings always cite TWO line locations: the changed line and the unchanged-but-should-have-changed parallel line. The `failure_mode` field describes the contradiction between the two states. K is narrow but recurrent — linters and unit tests rarely catch these findings.
52
58
 
53
- ### C. Resource cleanup and lifecycle
54
-
55
- File handles, network connections, processes, locks, subscriptions.
56
- - File opened in a function that returns before reaching `close()` or a `with` block.
57
- - Database connection acquired without a release path on every error branch.
58
- - Background task started without a cancellation hook.
59
-
60
- ### D. Variable scoping, ordering, and unbound references
61
-
62
- Closures, variable hoisting, ordering of declarations, late binding in loops.
63
- - Variable referenced before assignment on one branch.
64
- - Loop closure captures the loop variable by reference where by-value capture is required.
65
- - A name shadows an outer-scope variable the function still relies on.
66
-
67
- ### E. Dead code and unused imports
68
-
69
- Imports the diff adds but leaves unreferenced; functions defined but uncalled; branches unreachable due to a prior return.
70
- - New `import` line with zero corresponding references.
71
- - A defined helper function whose call sites the diff also removed.
72
- - Code after an unconditional `return` or `raise`.
73
-
74
- ### F. Silent failures
75
-
76
- Catch-all excepts, unconditional success returns, missing error propagation.
77
- - `except Exception: pass` swallows every error including programming bugs.
78
- - A function returns `True` on the success path and `True` on every error path too.
79
- - An async task error is logged while the caller continues as if it succeeded.
80
-
81
- ### G. Off-by-one, bounds, integer overflow
82
-
83
- Loop bounds, slice indices, signed/unsigned overflow, floating-point comparison.
84
- - `range(len(items) + 1)` walks one element past the end of the array.
85
- - Timestamp arithmetic uses 32-bit integer math on a 64-bit value.
86
- - `==` between floats where epsilon comparison is required.
87
-
88
- ### H. Security boundaries
89
-
90
- Injection, path traversal, auth bypass, secret leakage.
91
- - User input concatenated into SQL rather than parameterized.
92
- - File path joined from untrusted input without normalization or root containment.
93
- - Token, password, or API key written to a log line.
94
-
95
- ### I. Concurrency hazards
96
-
97
- Race conditions, missing awaits, shared mutable state, lock ordering.
98
- - Two coroutines append to the same list without synchronization.
99
- - An `await` is missing on a critical-section operation.
100
- - A lock is acquired in different orders on two code paths.
101
-
102
- ### J. CODE_RULES.md compliance
103
-
104
- Hook-enforced and rubric-enforced rules from CODE_RULES.md. Every PR passes through `code_rules_enforcer.py`; flagging these in the audit prevents fix loops that the gate would otherwise trigger.
105
-
106
- Sub-items the audit walks:
107
-
108
- | Sub-item | What this rule looks for |
109
- |---|---|
110
- | Magic values | Literals other than `0`, `1`, `-1` inside production function bodies |
111
- | String-template magic | f-strings whose structural literal text (paths, URLs, patterns) belongs in `config/` |
112
- | Constants location | Module-level `UPPER_SNAKE = ...` outside `config/` in production code (exempt path families: `config/*`, `/migrations/`, `/workflow/`, `_tab.py`, `/states.py`, `/modules.py`, test files) |
113
- | File-global use-count | A file-global constant referenced by fewer than two methods, functions, or classes in the same file |
114
- | Abbreviations | `ctx`, `cfg`, `msg`, `btn`, `idx`, `cnt`, `elem`, `val`, `tmp`, `str`, `num`, `arr`, `obj`, `fn`, `cb`, `req`, `res` (single-letter loop counters and `e` for exceptions are exempt) |
115
- | Vague-name list | `result`, `data`, `output`, `response`, `value`, `item`, `temp`, `info`, `stuff`, `thing`; vague prefixes: `handle`, `process`, `manage`, `do` |
116
- | Type hints | Missing type annotation on a parameter or return; presence of `Any` or `# type: ignore` |
117
- | New inline comments | New `#` or `//` comments in production code that the diff adds (existing comments are preserved untouched and stay outside scope) |
118
- | Logging format | `log_*(f"...")` rather than `log_*("...", arg)` |
119
- | Imports inside functions | `import` statements placed inside function bodies |
120
-
121
- Test files (`test_*.py`, `*_test.py`, `*.test.*`, `*.spec.*`, `conftest.py`, and any path under `/tests/`) are exempt from category J. The exempt path families above also opt out of the constants-location sub-item.
59
+ For reusable Variant C audit prompts scoped to a single category, see `../audit-rubrics/prompts/`. **Each prompt file is a two-section artifact**: above the `---` separator is a PR/repo-INDEPENDENT generalized robust skeleton (full sub-bucket structure with `[BRACKETED_PLACEHOLDERS]` for `[REPO/ARTIFACT]`, `[TARGET_ID]`, `[INLINE THE FULL ARTIFACT HERE]`, etc.) copy this and fill in for a new audit on any artifact. Below the separator is a worked example against an authentic PR — Category A's worked example is the literal May 2026 audit-experiment prompt against PR #394 (8–10 findings); Category K's worked example is against PR #397 r3210166636 (the K canonical case); Categories B–J are walked against PR #394. Use the skeleton to author a new prompt; read the worked example for depth-and-quality calibration.
122
60
 
123
61
  ## Output Schema
124
62
 
@@ -172,7 +110,7 @@ A bare verified-clean label is inadequate: every Shape B entry lists the files o
172
110
 
173
111
  ## Per-Category Expectation
174
112
 
175
- Every category A through J is investigated. The output for each category is one of:
113
+ Every category A through K is investigated. The output for each category is one of:
176
114
  - one or more Shape A findings, or
177
115
  - one Shape B proof-of-absence entry with concrete files, quoted lines, and adversarial probes.
178
116
 
@@ -14,7 +14,7 @@ You are the FIX teammate for bugteam when `BUGTEAM_FIX_IMPLEMENTER=groq-coder`.
14
14
 
15
15
  ## Contract
16
16
 
17
- You receive the standard bugteam FIX spawn XML documented in `skills/bugteam/PROMPTS.md`, including a `bugs_to_fix` block and a `<worktree_path>` to operate in. Outputs conform to the FIX outcome XML schema in the same file: `.bugteam-pr<N>-loop<L>.outcomes.xml` inside the worktree.
17
+ You receive the standard bugteam FIX spawn XML documented in `skills/bugteam/PROMPTS.md`, including a `bugs_to_fix` block and a `<worktree_path>` to operate in. Outputs conform to the FIX outcome XML schema in the same file: `.bugteam-pr<N>-loop<L>.fix-outcomes.xml` inside the worktree.
18
18
 
19
19
  ## Validation Gate (before any patch)
20
20
 
@@ -82,20 +82,22 @@ After Groq returns:
82
82
 
83
83
  After all files have been patched (or skipped):
84
84
 
85
- 1. `git add` every patched file by explicit path never `git add -A`.
86
- 2. `git commit` with a message summarizing the addressed findings. Example:
85
+ 1. Run the project's test suite and confirm all existing tests pass. If a test fails, diagnose the regression and fix it before committing.
86
+ 2. Read the previous loop's outcome XML (`<worktree_path>/.bugteam-pr<N>-loop<L-1>.outcomes.xml`) and obtain its total finding count. If this is the first loop (L <= 1) or the file does not exist, skip this comparison. Re-read each changed file and count any new violations. Compute the post-fix total: previous total minus bugs fixed in this round plus new violations. If the post-fix total exceeds the previous total, flag all new findings as same-loop fix-targets and revise before committing.
87
+ 3. `git add` every patched file by explicit path — never `git add -A`.
88
+ 4. `git commit` with a message summarizing the addressed findings. Example:
87
89
  ```
88
90
  fix(groq-coder): address N findings from bugteam loop <L>
89
91
 
90
92
  Findings: <comma-separated finding_ids>
91
93
  ```
92
94
  Let every git hook run. Never pass `--no-verify`. Never pass `--no-gpg-sign`. If the commit is hook-blocked: capture stderr, write `status=hook_blocked` for every finding in this loop, populate `hook_output`, and return without retrying — the lead treats this loop as no-progress.
93
- 3. `git push` with a plain fast-forward push. If signing issues surface, stop and report to the user rather than bypassing.
94
- 4. For each finding, post a reply to its `finding_comment_id` via the Step 2.5 reply CLI shape from `skills/bugteam/SKILL.md`:
95
+ 5. `git push` with a plain fast-forward push. If signing issues surface, stop and report to the user rather than bypassing.
96
+ 6. For each finding, post a reply to its `finding_comment_id` via the Step 2.5 reply CLI shape from `skills/bugteam/SKILL.md`:
95
97
  - `Fixed in <commit_sha>` when `status=fixed`.
96
98
  - `Could not address this loop: <reason>` when `status=could_not_address`.
97
99
  - `Hook blocked the fix commit: <one-line summary>` when `status=hook_blocked`.
98
- 5. Write `.bugteam-pr<N>-loop<L>.outcomes.xml` inside `<worktree_path>` per the FIX outcome schema.
100
+ 7. Write `.bugteam-pr<N>-loop<L>.fix-outcomes.xml` inside `<worktree_path>` per the FIX outcome schema.
99
101
 
100
102
  ## Non-Negotiable Guardrails
101
103
 
File without changes
@@ -123,7 +123,7 @@ def main() -> None:
123
123
  else:
124
124
  skill_reference = (
125
125
  "under research-mode constraints "
126
- "(no research-mode skill installed; verify with sources or reply 'I don't know')"
126
+ "(no research-mode skill installed; verify with sources or prompt the user via AskUserQuestion with potential options + context)"
127
127
  )
128
128
 
129
129
  block_response = {
@@ -134,7 +134,7 @@ def main() -> None:
134
134
  f"These words signal unverified claims. You MUST rewrite your response "
135
135
  f"{skill_reference}\n\n"
136
136
  f"Do NOT simply remove the hedging word and keep the unverified claim. "
137
- f"Either VERIFY it with a source or replace it with 'I don't know'.\n\n"
137
+ f"Do more research to VERIFY it with a source, or prompt the user via AskUserQuestion with some potential options + context if you are unable to find anything online.\n\n"
138
138
  f"You MUST re-output the complete, revised response with the corrections applied."
139
139
  ),
140
140
  "systemMessage": USER_FACING_NOTICE,