npm - claude-dev-env - Versions diffs - 1.37.0 → 1.38.0 - Mend

claude-dev-env 1.37.0 → 1.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

package/CLAUDE.md +3 -0
package/_shared/pr-loop/audit-contract.md +4 -3
package/_shared/pr-loop/fix-protocol.md +2 -0
package/_shared/pr-loop/gh-payloads.md +38 -37
package/_shared/pr-loop/scripts/README.md +0 -1
package/_shared/pr-loop/scripts/preflight.py +2 -1
package/_shared/pr-loop/scripts/tests/test_code_rules_gate.py +2 -2
package/_shared/pr-loop/scripts/tests/test_preflight.py +22 -0
package/_shared/pr-loop/state-schema.md +10 -10
package/agents/clean-coder.md +4 -0
package/agents/code-quality-agent.md +23 -85
package/agents/groq-coder.md +8 -6
package/hooks/blocking/__init__.py +0 -0
package/hooks/blocking/hedging_language_blocker.py +2 -2
package/hooks/blocking/state_description_blocker.py +243 -0
package/hooks/blocking/tdd_enforcer.py +94 -0
package/hooks/blocking/test_hedging_language_blocker.py +1 -1
package/hooks/blocking/test_state_description_blocker.py +618 -0
package/hooks/blocking/test_tdd_enforcer.py +152 -0
package/hooks/config/state_description_blocker_constants.py +130 -0
package/hooks/hooks.json +10 -0
package/package.json +1 -1
package/rules/gh-paginate.md +4 -50
package/rules/no-historical-clutter.md +57 -0
package/scripts/config/groq_bugteam_config.py +13 -5
package/skills/bugteam/CONSTRAINTS.md +20 -27
package/skills/bugteam/EXAMPLES.md +1 -1
package/skills/bugteam/PROMPTS.md +78 -42
package/skills/bugteam/SKILL.md +76 -63
package/skills/bugteam/SKILL_EVALS.md +12 -12
package/skills/bugteam/reference/audit-and-teammates.md +21 -48
package/skills/bugteam/reference/audit-contract.md +7 -7
package/skills/bugteam/reference/github-pr-reviews.md +31 -31
package/skills/bugteam/reference/team-setup.md +1 -1
package/skills/bugteam/reference/teardown-publish-permissions.md +4 -4
package/skills/copilot-review/SKILL.md +7 -14
package/skills/findbugs/SKILL.md +2 -2
package/skills/fixbugs/SKILL.md +1 -1
package/skills/monitor-open-prs/SKILL.md +6 -6
package/skills/pr-converge/SKILL.md +7 -6
package/skills/pr-converge/reference/convergence-gates.md +46 -44
package/skills/pr-converge/reference/examples.md +4 -4
package/skills/pr-converge/reference/fix-protocol.md +8 -8
package/skills/pr-converge/reference/multi-pr-orchestration.md +10 -10
package/skills/pr-converge/reference/per-tick.md +24 -36
package/skills/pr-converge/reference/stop-conditions.md +7 -7
package/skills/pr-converge/scripts/README.md +65 -117
package/skills/pr-review-responder/EXAMPLES.md +2 -2
package/skills/pr-review-responder/PRINCIPLES.md +2 -8
package/skills/pr-review-responder/README.md +7 -48
package/skills/pr-review-responder/SKILL.md +2 -3
package/skills/pr-review-responder/TESTING.md +8 -65
package/skills/qbug/SKILL.md +10 -16
package/_shared/pr-loop/scripts/config/gh_util_constants.py +0 -31
package/_shared/pr-loop/scripts/gh_util.py +0 -193
package/_shared/pr-loop/scripts/tests/test_gh_util.py +0 -257
package/_shared/pr-loop/scripts/tests/test_gh_util_constants.py +0 -61
package/skills/pr-converge/scripts/check_pr_mergeability.py +0 -78
package/skills/pr-converge/scripts/config/pr_converge_constants.py +0 -118
package/skills/pr-converge/scripts/config/test_pr_converge_constants.py +0 -152
package/skills/pr-converge/scripts/fetch_bugbot_inline_comments.py +0 -70
package/skills/pr-converge/scripts/fetch_bugbot_reviews.py +0 -57
package/skills/pr-converge/scripts/fetch_claude_inline_comments.py +0 -70
package/skills/pr-converge/scripts/fetch_claude_reviews.py +0 -61
package/skills/pr-converge/scripts/fetch_copilot_inline_comments.py +0 -70
package/skills/pr-converge/scripts/fetch_copilot_reviews.py +0 -61
package/skills/pr-converge/scripts/mark_pr_ready.py +0 -54
package/skills/pr-converge/scripts/post-bugbot-run.helpers.ps1 +0 -49
package/skills/pr-converge/scripts/post-bugbot-run.ps1 +0 -33
package/skills/pr-converge/scripts/reply_to_inline_comment.py +0 -84
package/skills/pr-converge/scripts/request_copilot_review.py +0 -71
package/skills/pr-converge/scripts/resolve_pr_head.py +0 -58
package/skills/pr-converge/scripts/review_field_helpers.py +0 -43
package/skills/pr-converge/scripts/reviewer_fetch_core.py +0 -153
package/skills/pr-converge/scripts/reviewer_specs.py +0 -98
package/skills/pr-converge/scripts/test_check_pr_mergeability.py +0 -126
package/skills/pr-converge/scripts/test_fetch_bugbot_inline_comments.py +0 -443
package/skills/pr-converge/scripts/test_fetch_bugbot_reviews.py +0 -299
package/skills/pr-converge/scripts/test_fetch_claude_inline_comments.py +0 -485
package/skills/pr-converge/scripts/test_fetch_claude_reviews.py +0 -368
package/skills/pr-converge/scripts/test_fetch_copilot_inline_comments.py +0 -440
package/skills/pr-converge/scripts/test_fetch_copilot_reviews.py +0 -366
package/skills/pr-converge/scripts/test_mark_pr_ready.py +0 -69
package/skills/pr-converge/scripts/test_post_bugbot_run.py +0 -195
package/skills/pr-converge/scripts/test_reply_to_inline_comment.py +0 -159
package/skills/pr-converge/scripts/test_request_copilot_review.py +0 -101
package/skills/pr-converge/scripts/test_resolve_pr_head.py +0 -79
package/skills/pr-converge/scripts/test_review_field_helpers.py +0 -80
package/skills/pr-converge/scripts/test_reviewer_fetch_core.py +0 -448
package/skills/pr-converge/scripts/test_reviewer_specs.py +0 -107
package/skills/pr-converge/scripts/test_trigger_bugbot.py +0 -139
package/skills/pr-converge/scripts/test_view_pr_context.py +0 -111
package/skills/pr-converge/scripts/trigger_bugbot.py +0 -77
package/skills/pr-converge/scripts/view_pr_context.py +0 -47
package/skills/pr-review-responder/scripts/respond_to_reviews.py +0 -376

package/CLAUDE.md CHANGED Viewed

@@ -1,5 +1,7 @@
 # Claude Development Assistant
+ The user delegates execution to you and expects zero manual steps unless strictly necessary. Execute every command you can directly. Only instruct the user to do something manually when you are technically unable to do it yourself. When a task involves credentials or other sensitive input, display a minimal secure UI (e.g., a password dialog) to collect it rather than asking the user to paste it into chat or run the command themselves. When direction is ambiguous, use AskUserQuestion to clarify before acting.
 ## Code Rules
 @~/.claude/docs/CODE_RULES.md
@@ -42,6 +44,7 @@ Full rule including the reconciliation with Right-Sized Engineering, misapplicat
 ## Tool Policies
 - **context7:** Before writing code using any library/framework/SDK/API, call `resolve-library-id` then `query-docs` via Context7 MCP. Use the fetched docs to write code. Applies to all libs including React, Next.js, Django, Express, Prisma.
+- **gh MCP:** Always use `mcp__plugin_github_github__*` tools for any GitHub operations (branches, PRs, file operations). Do not use the `Bash` tool to invoke `gh` or `git` CLI for GitHub operations.
 ## Compaction
 When compacting, always preserve:

package/_shared/pr-loop/audit-contract.md CHANGED Viewed

@@ -121,9 +121,10 @@ Sequence:
 4. Compute `fix_diff` against pre-fix contents for the modified set.
 5. Run `bugteam_code_rules_gate.py` with explicit paths for every modified file.
 6. Spawn a scoped audit of `fix_diff` with full A–J rigor, Shape A/B contract, adversarial pass, AND Haiku secondary in parallel (paranoid mode on post-fix).
-7. Any new findings become same-loop fix-targets. Internal iteration count increments by one.
-8. After 3 internal iterations with fresh findings each time, exit `stuck: post-fix audit not converging`.
-9. Only when `gate_findings` empty AND `post_fix_findings` empty: `git add`, commit, push.
+7. Read the previous loop's outcome XML (`<worktree_path>/.bugteam-pr<N>-loop<L-1>.outcomes.xml`) and obtain its total finding count. If this is the first loop (L <= 1) or the file does not exist, skip this comparison. Compute the post-fix total: previous total minus bugs fixed in this round plus new violations found in the post-fix audit (step 6). If the post-fix total exceeds the previous total, flag all new findings as same-loop fix-targets and revise. An increase in total findings across loop transitions is a regression.
+8. Any new findings become same-loop fix-targets. Internal iteration count increments by one.
+9. After 3 internal iterations with fresh findings each time, exit `stuck: post-fix audit not converging`.
+10. Only when `gate_findings` empty AND `post_fix_findings` empty: `git add`, commit, push.
 `converged` exit condition: `primary_audit_clean AND post_fix_audit_clean` for the committing loop.

package/_shared/pr-loop/fix-protocol.md CHANGED Viewed

@@ -35,3 +35,5 @@ After step 11, when `git rev-parse HEAD` is unchanged from `pre_fix_sha`, the fi
 - Append commits; the branch stays linear (one commit per fix loop, fast-forward push only).
 - No comment deletion on lines left unchanged.
 - No `--no-verify`. Hook rejections flag real underlying issues worth investigating.
+- **Narrow scope.** Fix only the exact defect at the specified file:line. No structural refactoring, no inlining helpers.
+- **Preserve helpers.** Do not remove or inline existing helper functions unless the finding explicitly names them.

package/_shared/pr-loop/gh-payloads.md CHANGED Viewed

@@ -1,33 +1,28 @@
-# gh API payloads
+# MCP-based payloads
 Shared payload shapes for posting PR reviews and replies. Used by `bugteam`, `qbug`, `pr-converge`, `monitor-many`.
-## Build payloads with jq + gh api --input
+## Build payloads with MCP tools
-Build JSON with `jq --rawfile` / `-Rs` reading per-finding markdown bodies from temp files; pipe to `gh api ... --input -`. Avoids shell-quoting hazards and satisfies the `gh-body-backtick-guard` hook.
+Build payloads as structured arguments to MCP tools. Body content passes as a string parameter directly.
 ## One review per loop
-POST to `repos/<owner>/<repo>/pulls/<number>/reviews` once per audit loop. Payload: `event: "COMMENT"`, the review body, and one `comments[]` object per anchored finding.
-```bash
-jq -n \
-  --rawfile review_body <tmp_review_body.md> \
-  --arg commit_id "$(git rev-parse HEAD)" \
-  --rawfile finding_body_1 <tmp_finding_1.md> \
-  --arg path_1 "<file_1>" \
-  --argjson line_1 <line_1> \
-  [... one finding_body_K / path_K / line_K triple per finding ...] \
-  '{
-     commit_id: $commit_id,
-     event: "COMMENT",
-     body: $review_body,
-     comments: [
-       {path: $path_1, line: $line_1, side: "RIGHT", body: $finding_body_1}
-       [, ... ]
-     ]
-   }' \
-| gh api repos/<owner>/<repo>/pulls/<number>/reviews -X POST --input -
+Call `pull_request_review_write` once per audit loop. Payload: `event: "COMMENT"`, the review body, and one `comments[]` object per anchored finding.
+```
+pull_request_review_write(
+    method="create",
+    event="COMMENT",
+    body=review_body,
+    commitID=head_sha,
+    owner=owner,
+    repo=repo,
+    pullNumber=pull_number,
+    comments=[
+        {path: file_path, line: line_number, side: "RIGHT", body: finding_body}
+    ]
+)
 ```
 Single-line anchors: `{path, line, side: "RIGHT", body}`. Multi-line anchors add `start_line` and `start_side: "RIGHT"`.
@@ -48,11 +43,16 @@ Zero findings still post one review. Body line: `## /<workflow> loop <N> audit:
 ## Reply to a finding
-POST to `repos/<owner>/<repo>/pulls/<number>/comments/<finding_comment_id>/replies`:
+Call `add_reply_to_pull_request_comment` with the finding comment ID and reply body:
-```bash
-jq -Rs '{body: .}' <tmp_reply.md \
-| gh api repos/<owner>/<repo>/pulls/<number>/comments/<finding_comment_id>/replies -X POST --input -
+```
+add_reply_to_pull_request_comment(
+    commentId=finding_comment_id,
+    body=reply_body,
+    owner=owner,
+    repo=repo,
+    pullNumber=pull_number
+)
 ```
 ## Anchor fallback (line not in diff)
@@ -61,25 +61,26 @@ Lines not in the PR diff cannot anchor an inline comment. Omit them from `commen
 ## Review POST failure fallback (issue comment)
-When the review POST fails, post one issue comment carrying the full review body to `repos/<owner>/<repo>/issues/<number>/comments`:
+When the review POST fails, call `add_issue_comment` with the full review body:
-```bash
-jq -Rs '{body: .}' <tmp_fallback.md \
-| gh api repos/<owner>/<repo>/issues/<number>/comments -X POST --input -
+```
+add_issue_comment(
+    owner=owner,
+    repo=repo,
+    issueNumber=pull_number,
+    body=fallback_body
+)
 ```
 All findings in the loop record `used_fallback="true"`; `finding_comment_url` = issue comment URL.
 ## Endpoints
-- Review POST: `repos/{owner}/{repo}/pulls/{pull}/reviews`
-- Reply POST: `repos/{owner}/{repo}/pulls/{pull}/comments/{id}/replies`
-- Fallback issue comment: `repos/{owner}/{repo}/issues/{issue}/comments` (`issue` = PR number)
+- Review: `pull_request_review_write(method="create", ...)`
+- Reply: `add_reply_to_pull_request_comment(...)`
+- Fallback issue comment: `add_issue_comment(...)`
 ## SHA capture timing
 `commit_id` and any `<head_sha_at_post_time>` reference: `git rev-parse HEAD` immediately before the POST, in the cwd of whichever subagent or process is posting.
-## Body file UTF-8 encoding
-Write each markdown body to a temp file via the BOM-free PowerShell pattern (`[IO.File]::WriteAllText($path, $content, [Text.UTF8Encoding]::new($false))`) before `gh api` consumes it. See `~/.claude/rules/gh-body-file.md`.

package/_shared/pr-loop/scripts/README.md CHANGED Viewed

@@ -9,7 +9,6 @@ Runnable helpers used by **bugteam**, **qbug**, **pr-converge**, and related ski
 | `preflight.py` | Local checks before a PR-loop run (pytest discovery, optional pre-commit, hooksPath sanity). |
 | `code_rules_gate.py` | CODE_RULES gate over PR-scoped diffs (`--base`, staged-only, path filters). |
 | `fix_hookspath.py` | Repair `core.hooksPath` when it does not point at the packaged git-hooks tree. |
-| `gh_util.py` | GitHub CLI helpers (pagination-safe JSON parsing, review fetches). |
 | `grant_project_claude_permissions.py` / `revoke_project_claude_permissions.py` | Claude Code permission JSON helpers used during publish-style flows. |
 | `_claude_permissions_common.py` | Shared implementation for the permission scripts. |

package/_shared/pr-loop/scripts/preflight.py CHANGED Viewed

@@ -378,6 +378,7 @@ def main(all_arguments: list[str]) -> int:
     hooks_path_exit_code = verify_git_hooks_path(repository_root)
     if hooks_path_exit_code != 0:
         return hooks_path_exit_code
+    discovery_result: bool | None = True
     if not arguments.no_pytest and has_pytest_configuration(repository_root):
         discovery_result = has_discoverable_tests(repository_root)
         if discovery_result is None:
@@ -433,7 +434,7 @@ def main(all_arguments: list[str]) -> int:
                 exit_code = run_pytest(repository_root, arguments.verbose)
             if exit_code != 0:
                 return exit_code
-    elif not arguments.no_pytest:
+    elif not arguments.no_pytest and discovery_result is not False:
         print(
             "bugteam_preflight: no pytest configuration found; skipping pytest.",
             file=sys.stderr,

package/_shared/pr-loop/scripts/tests/test_code_rules_gate.py CHANGED Viewed

@@ -537,8 +537,8 @@ def test_check_wrapper_plumb_through_accepts_positional_or_keyword_forwarder() -
     When a wrapper exposes the delegate's optional kwarg as a positional-or-keyword
     parameter with a default value and forwards it correctly, the check must produce
-    zero findings. This mirrors the live gh_util.fetch_inline_review_comments →
-    run_gh signature pairing on this PR.
+    zero findings. This mirrors a wrapper/delegate signature pairing
+    where the wrapper exposes the delegate's optional kwarg.
     """
     source = (
         "def run_gh(all_command, *, timeout_seconds=30):\n"

package/_shared/pr-loop/scripts/tests/test_preflight.py CHANGED Viewed

@@ -651,6 +651,28 @@ def test_main_does_not_print_no_related_tests_when_get_changed_files_returns_non
     assert "no related tests found" not in captured.err
+def test_main_should_not_print_no_pytest_config_when_pytest_configured_but_no_tests(
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """When pytest is configured but no tests are found, main must not print
+    the misleading 'no pytest configuration found' message."""
+    with (
+        patch.object(preflight, "verify_git_hooks_path", return_value=0),
+        patch.object(preflight, "has_pytest_configuration", return_value=True),
+        patch.object(preflight, "has_discoverable_tests", return_value=False),
+    ):
+        exit_code = preflight.main([])
+    assert exit_code == 0
+    captured = capsys.readouterr()
+    assert "bugteam_preflight: pytest configured but no tests found" in captured.err, (
+        "Must print the correct message about configured pytest with no tests"
+    )
+    assert "bugteam_preflight: no pytest configuration found" not in captured.err, (
+        "Must not print the misleading 'no pytest configuration found' message "
+        "when pytest IS configured"
+    )
 def test_main_prints_no_related_tests_when_get_changed_files_returns_empty(
     capsys: pytest.CaptureFixture[str],
 ) -> None:

package/_shared/pr-loop/state-schema.md CHANGED Viewed

@@ -7,7 +7,7 @@ State each PR-loop workflow tracks across iterations. Workflows differ on persis
 | Field | Type | Purpose |
 |---|---|---|
 | `loop_count` | int | Iterations completed; bumps on each AUDIT or tick |
-| `last_action` | enum | `fresh` \| `audited` \| `fixed` — drives next-step dispatch |
+| `last_action` | enum | `fresh`, `audited`, `fixed` — drives next-step dispatch |
 | `last_findings` | object | `{p0, p1, p2, total}` count of findings from most recent AUDIT |
 | `audit_log` | list[str] | Per-iteration one-line summaries for the final report |
 | `starting_sha` | str | `git rev-parse HEAD` at workflow start |
@@ -35,11 +35,11 @@ Adds the same **traffic** fields whether they live in **`state.json`** or in the
 | Field | Type | Purpose |
 |---|---|---|
-| `phase` | enum | `BUGBOT` \| `BUGTEAM` — which reviewer the current tick drives |
-| `current_head` | str | PR `headRefOid` / `git rev-parse` for the PR under work (each tick; from `view_pr_context.py` when no file store) |
+| `phase` | enum | `BUGBOT`, `BUGTEAM` — which reviewer the current tick drives |
+| `current_head` | str | PR `.head.sha` / `git rev-parse` for the PR under work (each tick; from `pull_request_read(method="get")` when no file store) |
 | `bugbot_clean_at` | str \| null | HEAD SHA at which Cursor Bugbot last reported clean, or `null` (reset on every push) |
 | `copilot_clean_at` | str \| null | HEAD SHA at which the GitHub Copilot reviewer (`copilot-pull-request-reviewer[bot]`) last reported clean (review `state == "APPROVED"`), or `null`. Reset on every push. Convergence gates require this equals `current_head` after bugbot+bugteam are clean (see `skills/pr-converge/SKILL.md` § Convergence gates). |
-| `merge_state_status` | str \| null | Last-observed `mergeStateStatus` from `gh pr view --json mergeable,mergeStateStatus,headRefOid` (e.g., `CLEAN`, `DIRTY`, `BLOCKED`, `BEHIND`, `UNKNOWN`), or `null` before the first check. Reset on every push. `DIRTY` triggers the rebase invocation; non-`CLEAN` non-`DIRTY` is a hard blocker per pr-converge `Stop conditions`. |
+| `merge_state_status` | str \| null | Last-observed `mergeable_state` from `pull_request_read(method="get")` (e.g., `clean`, `dirty`, `blocked`, `behind`, `unknown`), or `null` before the first check. Reset on every push. `dirty` triggers the rebase invocation; non-`clean` non-`dirty` is a hard blocker per pr-converge `Stop conditions`. |
 | `inline_lag_streak` | int | Consecutive ticks where bugbot's review body claims findings but inline-comments API returns zero rows for `current_head` |
 | `tick_count` | int | Observability only — **no ceiling**; loop ends on convergence or **Stop conditions** in `pr-converge` |
@@ -48,9 +48,9 @@ Adds the same **traffic** fields whether they live in **`state.json`** or in the
 | Mode | When it applies | Source of truth | `tick_count` bump |
 |---|---|---|---|
 | **`state.json`** | File exists at `<TMPDIR>/pr-converge-<session_id>/state.json` (multi-PR orchestration or other file-backed session) | JSON: top-level `session_id`; per-PR objects under `prs[<number>]` with `owner`, `repo`, `branch`, `phase`, `current_head`, `bugbot_clean_at`, `inline_lag_streak`, `tick_count`, `last_action`, `status`, `last_updated`. Optional sibling `converged.log` (append-only; multi-PR only). Writes use lock + atomic replace per skill **Concurrency** | **Orchestrator only** at tick start (locked merge for every non-terminal PR); **never** bump `tick_count` in Step 1 when this file is in use |
-| **Conversation state line** | **No** `state.json` (typical single-PR `/pr-converge` in Cursor) | Persist **`phase`**, **`bugbot_clean_at`**, **`inline_lag_streak`**, **`tick_count`** as **plain text** in each assistant turn; next tick reads them from the **most recent assistant message**. **`current_head` is not serialized in that line** — re-resolve each tick via `view_pr_context.py` (same contract as `skills/pr-converge/SKILL.md` § State across ticks). | **Step 1** increments `tick_count` in that line **only** when no `state.json` — must not double-count with any file-backed path |
+| **Conversation state line** | **No** `state.json` (typical single-PR `/pr-converge` in Cursor) | Persist **`phase`**, **`bugbot_clean_at`**, **`inline_lag_streak`**, **`tick_count`** as **plain text** in each assistant turn; next tick reads them from the **most recent assistant message**. **`current_head` is not serialized in that line** — re-resolve each tick via `pull_request_read(method="get")` (same contract as `skills/pr-converge/SKILL.md` § State across ticks). | **Step 1** increments `tick_count` in that line **only** when no `state.json` — must not double-count with any file-backed path |
-**`status` (file-backed `prs[...]` only):** `fresh` \| `in_progress` \| `awaiting_bugbot` \| `awaiting_bugteam` \| `converged` \| `blocked`
+**`status` (file-backed `prs[...]` only):** `fresh | in_progress | awaiting_bugbot | awaiting_bugteam | converged | blocked`
 ### monitor-many
@@ -60,9 +60,9 @@ Adds per-PR JSON state file at `~/.claude/skills/monitor-many/state/<owner>-<rep
 |---|---|---|
 | `repo_name` | str | Full `owner/repo` |
 | `pr_number` | int | PR number |
-| `status` | enum | `open` \| `blocked_escalation` \| `fixing` \| `ready_candidate` \| `closed` |
-| `copilot_review` | enum | `none` \| `requested` \| `pending` \| `commented` \| `approved` |
-| `bugbot_review` | enum | Same vocabulary as `copilot_review` |
+| `status` | enum | `open`, `blocked_escalation`, `fixing`, `ready_candidate`, `closed` |
+| `copilot_review` | enum | `none`, `requested`, `pending`, `commented`, `approved` |
+| `bugbot_review` | enum | Same vocabulary as `copilot_review`; one of `none`, `requested`, `pending`, `commented`, `approved` |
 | `last_seen_comment_id` | int \| null | Highest processed review-comment id (incremental polling watermark) |
 | `review_comments` | list[object] | Optional cache; `{id, author, path, line}` per entry |
 | `escalation_queue` | list[object] | Pending human-judgment items: `{comment_id, summary, created_at}` |
@@ -77,5 +77,5 @@ Adds per-PR JSON state file at `~/.claude/skills/monitor-many/state/<owner>-<rep
 ## Convergence checks
 - bugteam, qbug: `last_action == "audited"` AND `last_findings.total == 0` → `converged`
-- pr-converge: `bugbot_clean_at == current_head` AND most-recent bugteam exit is `converged` AND no push during the bugteam tick AND no outstanding Copilot findings on `current_head` AND `merge_state_status == "CLEAN"` (per `skills/pr-converge/SKILL.md` § Convergence gates) → back-to-back clean → `gh pr ready` (read `current_head` / `bugbot_clean_at` / `copilot_clean_at` / `merge_state_status` from `state.json` when file-backed, else from the conversation state line and Step 1 `view_pr_context.py` output)
+- pr-converge: `bugbot_clean_at == current_head` AND most-recent bugteam exit is `converged` AND no push during the bugteam tick AND no outstanding Copilot findings on `current_head` AND `merge_state_status == "clean"` (per `skills/pr-converge/SKILL.md` § Convergence gates) → back-to-back clean → `update_pull_request(draft=false)` (read `current_head` / `bugbot_clean_at` / `copilot_clean_at` / `merge_state_status` from `state.json` when file-backed, else from the conversation state line and Step 1 `pull_request_read(method="get")` output)
 - monitor-many: no unresolved comments requiring code changes AND required checks green AND review policy satisfied → `gh pr ready`

package/agents/clean-coder.md CHANGED Viewed

@@ -436,6 +436,10 @@ This default is overridden by explicit user instruction such as "refactor this e
 Docstrings on functions, methods, classes, and modules are encouraged for public APIs. The self-documenting-names gate inspects inline `#` and block `#` comments only; docstrings are exempt from that gate.
+## Audit Awareness
+Code clean-coder writes will be audited later against the A–K bug categories from `code-quality-agent`. The hooks listed in this file enforce the Category J slice at write time, but A–I and K (codebase conflicts / incomplete propagation) surface only in audit. For each category's full rubric, sub-bucket decomposition, and concrete checks, see `../audit-rubrics/category_rubrics/` (relative to this agent file). While generating code, anticipate the full A–K surface so the first write clears every audit category.
 ## What You Produce
 Every line you write or modify will:

package/agents/code-quality-agent.md CHANGED Viewed

@@ -9,7 +9,7 @@ color: red
 You audit a pull request diff for bugs and CODE_RULES.md compliance issues. You return findings; the orchestrator handles fixes.
-**Announce at start:** "Using code-quality-agent — auditing diff against A–J categories with CODE_RULES.md awareness."
+**Announce at start:** "Using code-quality-agent — auditing diff against A–K categories with CODE_RULES.md awareness."
 ## Scope
@@ -19,8 +19,8 @@ Audit only added or modified lines in the diff. Pre-existing code on untouched l
 This agent runs in one of two modes depending on the calling prompt:
-- **Unscoped (default):** the prompt names no categories. Walk all of A through J and produce Shape A/B for every category.
-- **Category-restricted:** the prompt names a subset of categories ("audit only category F" or "investigate only H, I, and J"). Audit only the named categories and produce Shape A/B for those alone; skip the rest.
+- **Unscoped (default):** the prompt names no categories. Walk all of A through K and produce Shape A/B for every category.
+- **Category-restricted:** the prompt names a subset of categories ("audit only category F" or "investigate only H, I, and K"). Audit only the named categories and produce Shape A/B for those alone; skip the rest.
 Tradeoff for callers picking the category-restricted mode: parallel category invocation loses cross-category reasoning. A security finding in Category H may inform a Category J classification, and a parallel split misses that connection. When categories need to inform each other, prefer the unscoped mode.
@@ -32,93 +32,31 @@ Preserve every existing comment. Findings on production code report only on new
 Report findings only. Author zero edits. Author zero diffs. Run zero commits or pushes. The orchestrator (and the calling skill) handles fix application, commit creation, and PR posting based on your finding list.
-## Bug Categories A–J
+## Bug Categories A–K
-Every audit pass walks all ten categories. Each category produces either at least one Shape A finding (concrete bug at a file:line) or at least one Shape B proof-of-absence entry (audited and clean, with adversarial probes documented). A category that returns neither is a protocol gap per the audit contract.
+Every audit pass walks all eleven categories. Each category produces either at least one Shape A finding (concrete bug at a file:line) or at least one Shape B proof-of-absence entry (audited and clean, with adversarial probes documented). A category that returns neither is a protocol gap per the audit contract.
-### A. API contract verification
+For each category's full description, examples, sub-bucket decomposition, and concrete checks, read the matching rubric in `../audit-rubrics/category_rubrics/`:
-Function signatures, return types, async/await correctness, callback shape compatibility.
-- A call site passes positional arguments that the callee expects as keyword arguments.
-- `await` is missing on a function that returns a coroutine.
-- Return type annotated as `bool` while a code path returns `None`.
+| Letter | Category | Reference file |
+|---|---|---|
+| A | API contract verification | `../audit-rubrics/category_rubrics/category-a-api-contracts.md` |
+| B | Selector / query / engine compatibility | `../audit-rubrics/category_rubrics/category-b-selector-engine-compat.md` |
+| C | Resource cleanup and lifecycle | `../audit-rubrics/category_rubrics/category-c-resource-cleanup.md` |
+| D | Variable scoping, ordering, and unbound references | `../audit-rubrics/category_rubrics/category-d-scoping-and-ordering.md` |
+| E | Dead code and unused imports | `../audit-rubrics/category_rubrics/category-e-dead-code.md` |
+| F | Silent failures | `../audit-rubrics/category_rubrics/category-f-silent-failures.md` |
+| G | Off-by-one, bounds, integer overflow | `../audit-rubrics/category_rubrics/category-g-bounds-and-overflow.md` |
+| H | Security boundaries | `../audit-rubrics/category_rubrics/category-h-security-boundaries.md` |
+| I | Concurrency hazards | `../audit-rubrics/category_rubrics/category-i-concurrency.md` |
+| J | CODE_RULES.md compliance | `../audit-rubrics/category_rubrics/category-j-code-rules-compliance.md` |
+| K | Codebase conflicts (incomplete propagation) | `../audit-rubrics/category_rubrics/category-k-codebase-conflicts.md` |
-### B. Selector / query / engine compatibility
+Test files (`test_*.py`, `*_test.py`, `*.test.*`, `*.spec.*`, `conftest.py`, and any path under `/tests/`) are exempt from category J. The exempt path families documented in the J reference also opt out of the constants-location sub-item.
-CSS selectors, SQL queries, DOM queries, search-engine syntax — incompatibility with the runtime in use.
-- CSS selector uses a pseudo-class the target browser engine lacks.
-- SQL uses a window function on a database version that lacks it.
-- A regex flag is set in syntax that the engine treats as a literal character.
+Category K Shape A findings always cite TWO line locations: the changed line and the unchanged-but-should-have-changed parallel line. The `failure_mode` field describes the contradiction between the two states. K is narrow but recurrent — linters and unit tests rarely catch these findings.
-### C. Resource cleanup and lifecycle
-File handles, network connections, processes, locks, subscriptions.
-- File opened in a function that returns before reaching `close()` or a `with` block.
-- Database connection acquired without a release path on every error branch.
-- Background task started without a cancellation hook.
-### D. Variable scoping, ordering, and unbound references
-Closures, variable hoisting, ordering of declarations, late binding in loops.
-- Variable referenced before assignment on one branch.
-- Loop closure captures the loop variable by reference where by-value capture is required.
-- A name shadows an outer-scope variable the function still relies on.
-### E. Dead code and unused imports
-Imports the diff adds but leaves unreferenced; functions defined but uncalled; branches unreachable due to a prior return.
-- New `import` line with zero corresponding references.
-- A defined helper function whose call sites the diff also removed.
-- Code after an unconditional `return` or `raise`.
-### F. Silent failures
-Catch-all excepts, unconditional success returns, missing error propagation.
-- `except Exception: pass` swallows every error including programming bugs.
-- A function returns `True` on the success path and `True` on every error path too.
-- An async task error is logged while the caller continues as if it succeeded.
-### G. Off-by-one, bounds, integer overflow
-Loop bounds, slice indices, signed/unsigned overflow, floating-point comparison.
-- `range(len(items) + 1)` walks one element past the end of the array.
-- Timestamp arithmetic uses 32-bit integer math on a 64-bit value.
-- `==` between floats where epsilon comparison is required.
-### H. Security boundaries
-Injection, path traversal, auth bypass, secret leakage.
-- User input concatenated into SQL rather than parameterized.
-- File path joined from untrusted input without normalization or root containment.
-- Token, password, or API key written to a log line.
-### I. Concurrency hazards
-Race conditions, missing awaits, shared mutable state, lock ordering.
-- Two coroutines append to the same list without synchronization.
-- An `await` is missing on a critical-section operation.
-- A lock is acquired in different orders on two code paths.
-### J. CODE_RULES.md compliance
-Hook-enforced and rubric-enforced rules from CODE_RULES.md. Every PR passes through `code_rules_enforcer.py`; flagging these in the audit prevents fix loops that the gate would otherwise trigger.
-Sub-items the audit walks:
-| Sub-item | What this rule looks for |
-|---|---|
-| Magic values | Literals other than `0`, `1`, `-1` inside production function bodies |
-| String-template magic | f-strings whose structural literal text (paths, URLs, patterns) belongs in `config/` |
-| Constants location | Module-level `UPPER_SNAKE = ...` outside `config/` in production code (exempt path families: `config/*`, `/migrations/`, `/workflow/`, `_tab.py`, `/states.py`, `/modules.py`, test files) |
-| File-global use-count | A file-global constant referenced by fewer than two methods, functions, or classes in the same file |
-| Abbreviations | `ctx`, `cfg`, `msg`, `btn`, `idx`, `cnt`, `elem`, `val`, `tmp`, `str`, `num`, `arr`, `obj`, `fn`, `cb`, `req`, `res` (single-letter loop counters and `e` for exceptions are exempt) |
-| Vague-name list | `result`, `data`, `output`, `response`, `value`, `item`, `temp`, `info`, `stuff`, `thing`; vague prefixes: `handle`, `process`, `manage`, `do` |
-| Type hints | Missing type annotation on a parameter or return; presence of `Any` or `# type: ignore` |
-| New inline comments | New `#` or `//` comments in production code that the diff adds (existing comments are preserved untouched and stay outside scope) |
-| Logging format | `log_*(f"...")` rather than `log_*("...", arg)` |
-| Imports inside functions | `import` statements placed inside function bodies |
-Test files (`test_*.py`, `*_test.py`, `*.test.*`, `*.spec.*`, `conftest.py`, and any path under `/tests/`) are exempt from category J. The exempt path families above also opt out of the constants-location sub-item.
+For reusable Variant C audit prompts scoped to a single category, see `../audit-rubrics/prompts/`. **Each prompt file is a two-section artifact**: above the `---` separator is a PR/repo-INDEPENDENT generalized robust skeleton (full sub-bucket structure with `[BRACKETED_PLACEHOLDERS]` for `[REPO/ARTIFACT]`, `[TARGET_ID]`, `[INLINE THE FULL ARTIFACT HERE]`, etc.) — copy this and fill in for a new audit on any artifact. Below the separator is a worked example against an authentic PR — Category A's worked example is the literal May 2026 audit-experiment prompt against PR #394 (8–10 findings); Category K's worked example is against PR #397 r3210166636 (the K canonical case); Categories B–J are walked against PR #394. Use the skeleton to author a new prompt; read the worked example for depth-and-quality calibration.
 ## Output Schema
@@ -172,7 +110,7 @@ A bare verified-clean label is inadequate: every Shape B entry lists the files o
 ## Per-Category Expectation
-Every category A through J is investigated. The output for each category is one of:
+Every category A through K is investigated. The output for each category is one of:
 - one or more Shape A findings, or
 - one Shape B proof-of-absence entry with concrete files, quoted lines, and adversarial probes.

package/agents/groq-coder.md CHANGED Viewed

@@ -14,7 +14,7 @@ You are the FIX teammate for bugteam when `BUGTEAM_FIX_IMPLEMENTER=groq-coder`.
 ## Contract
-You receive the standard bugteam FIX spawn XML documented in `skills/bugteam/PROMPTS.md`, including a `bugs_to_fix` block and a `<worktree_path>` to operate in. Outputs conform to the FIX outcome XML schema in the same file: `.bugteam-pr<N>-loop<L>.outcomes.xml` inside the worktree.
+You receive the standard bugteam FIX spawn XML documented in `skills/bugteam/PROMPTS.md`, including a `bugs_to_fix` block and a `<worktree_path>` to operate in. Outputs conform to the FIX outcome XML schema in the same file: `.bugteam-pr<N>-loop<L>.fix-outcomes.xml` inside the worktree.
 ## Validation Gate (before any patch)
@@ -82,20 +82,22 @@ After Groq returns:
 After all files have been patched (or skipped):
-1. `git add` every patched file by explicit path — never `git add -A`.
-2. `git commit` with a message summarizing the addressed findings. Example:
+1. Run the project's test suite and confirm all existing tests pass. If a test fails, diagnose the regression and fix it before committing.
+2. Read the previous loop's outcome XML (`<worktree_path>/.bugteam-pr<N>-loop<L-1>.outcomes.xml`) and obtain its total finding count. If this is the first loop (L <= 1) or the file does not exist, skip this comparison. Re-read each changed file and count any new violations. Compute the post-fix total: previous total minus bugs fixed in this round plus new violations. If the post-fix total exceeds the previous total, flag all new findings as same-loop fix-targets and revise before committing.
+3. `git add` every patched file by explicit path — never `git add -A`.
+4. `git commit` with a message summarizing the addressed findings. Example:
    ```
    fix(groq-coder): address N findings from bugteam loop <L>
    Findings: <comma-separated finding_ids>
    ```
    Let every git hook run. Never pass `--no-verify`. Never pass `--no-gpg-sign`. If the commit is hook-blocked: capture stderr, write `status=hook_blocked` for every finding in this loop, populate `hook_output`, and return without retrying — the lead treats this loop as no-progress.
-3. `git push` with a plain fast-forward push. If signing issues surface, stop and report to the user rather than bypassing.
-4. For each finding, post a reply to its `finding_comment_id` via the Step 2.5 reply CLI shape from `skills/bugteam/SKILL.md`:
+5. `git push` with a plain fast-forward push. If signing issues surface, stop and report to the user rather than bypassing.
+6. For each finding, post a reply to its `finding_comment_id` via the Step 2.5 reply CLI shape from `skills/bugteam/SKILL.md`:
    - `Fixed in <commit_sha>` when `status=fixed`.
    - `Could not address this loop: <reason>` when `status=could_not_address`.
    - `Hook blocked the fix commit: <one-line summary>` when `status=hook_blocked`.
-5. Write `.bugteam-pr<N>-loop<L>.outcomes.xml` inside `<worktree_path>` per the FIX outcome schema.
+7. Write `.bugteam-pr<N>-loop<L>.fix-outcomes.xml` inside `<worktree_path>` per the FIX outcome schema.
 ## Non-Negotiable Guardrails

package/hooks/blocking/__init__.py ADDED Viewed

File without changes

package/hooks/blocking/hedging_language_blocker.py CHANGED Viewed

@@ -123,7 +123,7 @@ def main() -> None:
     else:
         skill_reference = (
             "under research-mode constraints "
-            "(no research-mode skill installed; verify with sources or reply 'I don't know')"
+            "(no research-mode skill installed; verify with sources or prompt the user via AskUserQuestion with potential options + context)"
         )
     block_response = {
@@ -134,7 +134,7 @@ def main() -> None:
             f"These words signal unverified claims. You MUST rewrite your response "
             f"{skill_reference}\n\n"
             f"Do NOT simply remove the hedging word and keep the unverified claim. "
-            f"Either VERIFY it with a source or replace it with 'I don't know'.\n\n"
+            f"Do more research to VERIFY it with a source, or prompt the user via AskUserQuestion with some potential options + context if you are unable to find anything online.\n\n"
             f"You MUST re-output the complete, revised response with the corrections applied."
         ),
         "systemMessage": USER_FACING_NOTICE,