npm - claude-dev-env - Versions diffs - 1.26.3 → 1.26.5 - Mend

claude-dev-env 1.26.3 → 1.26.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/hooks/blocking/hedging_language_blocker.py +18 -9
package/hooks/blocking/test_hedging_language_blocker.py +135 -0
package/hooks/blocking/test_pr_description_enforcer.py +4 -1
package/hooks/config/__init__.py +1 -0
package/hooks/config/messages.py +4 -0
package/package.json +1 -1
package/skills/bugteam/CONSTRAINTS.md +1 -0
package/skills/bugteam/PROMPTS.md +13 -5
package/skills/bugteam/SKILL.md +28 -19

package/hooks/blocking/hedging_language_blocker.py CHANGED Viewed

@@ -11,6 +11,9 @@ import os
 import re
 import sys
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "config"))
+from messages import USER_FACING_NOTICE
 PLUGIN_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 RESEARCH_MODE_SKILL_SEARCH_PATHS = [
@@ -99,14 +102,19 @@ def main() -> None:
     formatted_term_list = ", ".join(f'"{term}"' for term in found_hedging_terms)
-    research_mode_content = "(Could not load research-mode skill file)"
+    resolved_skill_path: str | None = None
     for each_skill_path in RESEARCH_MODE_SKILL_SEARCH_PATHS:
-        try:
-            with open(each_skill_path, encoding="utf-8") as skill_file:
-                research_mode_content = skill_file.read()
-                break
-        except OSError:
-            continue
+        if os.path.exists(each_skill_path):
+            resolved_skill_path = each_skill_path
+            break
+    if resolved_skill_path is not None:
+        skill_reference = f"under the research-mode constraints defined in:\n\n{resolved_skill_path}"
+    else:
+        skill_reference = (
+            "under research-mode constraints "
+            "(no research-mode skill installed; verify with sources or reply 'I don't know')"
+        )
     block_response = {
         "decision": "block",
@@ -114,12 +122,13 @@ def main() -> None:
             f"ANTI-HALLUCINATION GUARDRAIL: Your response contains hedging language: "
             f"{formatted_term_list}. "
             f"These words signal unverified claims. You MUST rewrite your response "
-            f"with these constraints active:\n\n"
-            f"{research_mode_content}\n\n"
+            f"{skill_reference}\n\n"
             f"Do NOT simply remove the hedging word and keep the unverified claim. "
             f"Either VERIFY it with a source or replace it with 'I don't know'.\n\n"
             f"You MUST re-output the complete, revised response with the corrections applied."
         ),
+        "systemMessage": USER_FACING_NOTICE,
+        "suppressOutput": True,
     }
     print(json.dumps(block_response))

package/hooks/blocking/test_hedging_language_blocker.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""Tests for hedging_language_blocker hook response shape."""
+import importlib.util
+import json
+import os
+import subprocess
+import sys
+import tempfile
+HOOK_SCRIPT_PATH = os.path.join(os.path.dirname(__file__), "hedging_language_blocker.py")
+_HOOKS_DIR = os.path.dirname(HOOK_SCRIPT_PATH)
+_CONFIG_DIR = os.path.join(_HOOKS_DIR, "..", "config")
+if _HOOKS_DIR not in sys.path:
+    sys.path.insert(0, _HOOKS_DIR)
+if _CONFIG_DIR not in sys.path:
+    sys.path.insert(0, _CONFIG_DIR)
+import hedging_language_blocker
+from messages import USER_FACING_NOTICE
+RESEARCH_MODE_SKILL_BODY_MARKER = "Three anti-hallucination constraints are ALWAYS active."
+HEDGING_MESSAGE = "This is likely correct."
+CLEAN_MESSAGE = "This is verified by the source document."
+EMPTY_MESSAGE = ""
+def run_hook_with_message(assistant_message: str) -> subprocess.CompletedProcess:
+    hook_input_payload = json.dumps({"last_assistant_message": assistant_message})
+    return subprocess.run(
+        [sys.executable, HOOK_SCRIPT_PATH],
+        input=hook_input_payload,
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+def run_hook_with_patched_search_paths(
+    assistant_message: str,
+    search_paths: list[str],
+) -> subprocess.CompletedProcess:
+    """Run the hook with RESEARCH_MODE_SKILL_SEARCH_PATHS overridden via a wrapper script."""
+    wrapper_script = (
+        "import sys, json, os\n"
+        f"sys.path.insert(0, {repr(os.path.dirname(HOOK_SCRIPT_PATH))})\n"
+        "import hedging_language_blocker as blocker\n"
+        f"blocker.RESEARCH_MODE_SKILL_SEARCH_PATHS = {repr(search_paths)}\n"
+        "blocker.main()\n"
+    )
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as wrapper_file:
+        wrapper_file.write(wrapper_script)
+        wrapper_file_path = wrapper_file.name
+    hook_input_payload = json.dumps({"last_assistant_message": assistant_message})
+    try:
+        completed_process = subprocess.run(
+            [sys.executable, wrapper_file_path],
+            input=hook_input_payload,
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+    finally:
+        os.unlink(wrapper_file_path)
+    return completed_process
+def test_user_facing_notice_importable_from_config_messages():
+    config_messages_path = os.path.join(_CONFIG_DIR, "messages.py")
+    specification = importlib.util.spec_from_file_location("messages", config_messages_path)
+    module = importlib.util.module_from_spec(specification)
+    specification.loader.exec_module(module)
+    assert module.USER_FACING_NOTICE == USER_FACING_NOTICE
+def test_hedging_message_emits_block_with_short_user_notice():
+    completed_process = run_hook_with_message(HEDGING_MESSAGE)
+    assert completed_process.returncode == 0
+    parsed_response = json.loads(completed_process.stdout)
+    assert parsed_response["decision"] == "block"
+    assert parsed_response["systemMessage"] == USER_FACING_NOTICE
+    assert parsed_response["suppressOutput"] is True
+    assert "likely" in parsed_response["reason"]
+def test_hedging_reason_contains_not_installed_notice_when_skill_absent():
+    completed_process = run_hook_with_patched_search_paths(
+        HEDGING_MESSAGE,
+        ["/nonexistent/path/one/SKILL.md", "/nonexistent/path/two/SKILL.md"],
+    )
+    assert completed_process.returncode == 0
+    parsed_response = json.loads(completed_process.stdout)
+    assert parsed_response["decision"] == "block"
+    assert "no research-mode skill installed" in parsed_response["reason"]
+    assert "verify with sources or reply" in parsed_response["reason"]
+    assert "SKILL.md" not in parsed_response["reason"]
+    assert RESEARCH_MODE_SKILL_BODY_MARKER not in parsed_response["reason"]
+def test_hedging_reason_contains_skill_path_when_skill_present():
+    with tempfile.TemporaryDirectory() as skill_dir:
+        skill_file_path = os.path.join(skill_dir, "SKILL.md")
+        with open(skill_file_path, "w") as skill_file:
+            skill_file.write("# Research Mode Skill\n")
+        completed_process = run_hook_with_patched_search_paths(
+            HEDGING_MESSAGE,
+            ["/nonexistent/path/SKILL.md", skill_file_path],
+        )
+    assert completed_process.returncode == 0
+    parsed_response = json.loads(completed_process.stdout)
+    assert parsed_response["decision"] == "block"
+    assert "SKILL.md" in parsed_response["reason"]
+    assert "no research-mode skill installed" not in parsed_response["reason"]
+    assert RESEARCH_MODE_SKILL_BODY_MARKER not in parsed_response["reason"]
+def test_clean_message_passes_through_with_no_output():
+    completed_process = run_hook_with_message(CLEAN_MESSAGE)
+    assert completed_process.returncode == 0
+    assert completed_process.stdout == ""
+def test_empty_message_passes_through_with_no_output():
+    completed_process = run_hook_with_message(EMPTY_MESSAGE)
+    assert completed_process.returncode == 0
+    assert completed_process.stdout == ""

package/hooks/blocking/test_pr_description_enforcer.py CHANGED Viewed

@@ -219,7 +219,10 @@ def test_read_body_file_rejects_relative_path_traversal(tmp_path) -> None:
     import os, pytest
     sentinel_file = tmp_path / 'secret.txt'
     sentinel_file.write_text('secret')
-    rel_path = os.path.relpath(str(sentinel_file))
+    try:
+        rel_path = os.path.relpath(str(sentinel_file))
+    except ValueError:
+        pytest.skip('tmp_path on different drive than cwd; relpath undefined on Windows')
     if '..' not in rel_path:
         pytest.skip('file is under cwd, not a traversal case')
     with pytest.raises(m.PathTraversalError):

package/hooks/config/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # pragma: no-tdd-gate

package/hooks/config/messages.py ADDED Viewed

@@ -0,0 +1,4 @@
+# pragma: no-tdd-gate
+"""User-facing notice messages for blocking hooks."""
+USER_FACING_NOTICE = "Agent was found guessing - sourcing opinions..."

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "claude-dev-env",
-    "version": "1.26.3",
+    "version": "1.26.5",
     "description": "Claude Code development standards — rules, hooks, agents, commands, and skills",
     "type": "module",
     "bin": {

package/skills/bugteam/CONSTRAINTS.md CHANGED Viewed

@@ -4,6 +4,7 @@
 - **Agent teams required, not parallel subagents.** The skill MUST use Claude Code's agent teams feature (`CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1`). Spawning `code-quality-agent` and `clean-coder` as parallel subagents from the lead's context = fail; the clean-room property requires independent teammate sessions.
 - **Orchestrator-only `TeamCreate`.** Only the lead session (this session, when `/bugteam` is invoked) calls `TeamCreate`. Teammates never call `TeamCreate` — if a teammate's spawn prompt instructs it to, that is a skill defect. When additional parallel work is needed (e.g., parallel auditors from loop 4 onward, supplementary audit of adjacent files), the lead spawns additional teammates into the EXISTING team by passing the current `team_name` to every `Agent(...)` call. Multiple teammate "sets" live inside one team under one orchestrator. The runtime enforces this: `TeamCreate` called while the session already leads a team returns the error `Already leading team "<name>". A leader can only manage one team at a time. Use TeamDelete to end the current team before creating a new one.` — direct quote from the runtime's response when this invariant is violated.
+- **One team per invocation, multi-PR supported.** All PRs in a single /bugteam invocation share one team created by the orchestrator. Per-PR identity lives in the teammate name prefix (`bugfind-pr<N>-loop<L>` / `bugfix-pr<N>-loop<L>`) and the `<team_temp_dir>/pr-<N>/` subfolder containing that PR's git worktree, diff patches, and outcome XML files.
 - **Grant before any spawn, revoke before any return.** Step 0 grants project `.claude/**` permissions; Step 5 revokes. Both are mandatory. Revoke runs on every exit path including error, cap-reached, and stuck.
 - **Fresh teammate per loop.** Both bugfind and bugfix are spawned new each loop and shut down after their action. Reusing a teammate across loops accumulates context inside that teammate's window — defeats clean-room.
 - **One up-front confirmation = whole cycle.** The `/bugteam` invocation authorizes the entire cycle; every subsequent decision runs on that single authorization.

package/skills/bugteam/PROMPTS.md CHANGED Viewed

@@ -11,10 +11,14 @@ Keep the spawn prompt self-contained: reference only the PR scope, audit rubric,
   <base_branch>base ref</base_branch>
   <pr_url>full URL</pr_url>
   <loop>N</loop>
+  <pr_number>N</pr_number>
+  <worktree_path>absolute path from Step 1 per-PR workspace</worktree_path>
 </context>
+cd into `<worktree_path>` before any git, gh, or file operation.
 <scope>
-  <diff_path>Absolute path to the loop-N patch file under team_temp_dir from Step 2 (same path as gh pr diff redirect in AUDIT)</diff_path>
+  <diff_path>Absolute path to the per-PR patch file: <team_temp_dir>/pr-<N>/loop-<L>.patch (same path as gh pr diff redirect in AUDIT)</diff_path>
   <scope_rule>Audit only lines added or modified in the diff. Pre-existing code on untouched lines is out of scope.</scope_rule>
 </scope>
@@ -72,8 +76,8 @@ Keep the spawn prompt self-contained: reference only the PR scope, audit rubric,
 </comment_posting>
 <output_format>
-  Write the outcome XML below to .bugteam-loop-N.outcomes.xml in the
-  working directory. Return only that path on stdout. The schema:
+  Write the outcome XML below to .bugteam-pr<N>-loop<L>.outcomes.xml inside
+  the PR's worktree directory (<worktree_path>). Return only that path on stdout. The schema:
 </output_format>
 ```
@@ -100,7 +104,7 @@ Keep the spawn prompt self-contained: reference only the PR scope, audit rubric,
 </bugteam_audit>
 ```
-After the teammate writes the XML and returns, the lead reads `.bugteam-loop-<N>.outcomes.xml` with the `Read` tool, parses it, and populates `loop_comment_index` from `<finding>` elements.
+After the teammate writes the XML and returns, the lead reads `.bugteam-pr<N>-loop<L>.outcomes.xml` from the PR's worktree directory with the `Read` tool, parses it, and populates `loop_comment_index` from `<finding>` elements.
 ## FIX spawn-prompt XML (bugfix teammate)
@@ -111,8 +115,12 @@ After the teammate writes the XML and returns, the lead reads `.bugteam-loop-<N>
   <base_branch>base</base_branch>
   <pr_url>url</pr_url>
   <loop>N</loop>
+  <pr_number>N</pr_number>
+  <worktree_path>absolute path from Step 1 per-PR workspace</worktree_path>
 </context>
+cd into `<worktree_path>` before any git, gh, or file operation.
 <bugs_to_fix>
   [for each P0/P1/P2 finding from last_findings:]
   <bug
@@ -144,7 +152,7 @@ After the teammate writes the XML and returns, the lead reads `.bugteam-loop-<N>
      - "Could not address this loop: <one-line reason>" if you skipped or failed it
      - "Hook blocked the fix commit: <one-line summary>" if the commit was hook-blocked
      Use the Fix reply CLI shape from Step 2.5 (`jq -Rs | gh api .../comments/<id>/replies --input -`). Write every reply body to a temp file first.
-  7. Write `.bugteam-loop-<N>.outcomes.xml` (schema below) and return its path.
+  7. Write `.bugteam-pr<N>-loop<L>.outcomes.xml` inside `<worktree_path>` (schema below) and return its path.
 </execution>
 <outcome_xml_schema>

package/skills/bugteam/SKILL.md CHANGED Viewed

@@ -51,6 +51,7 @@ Refusals — first match wins; respond with the quoted line exactly and stop:
 - **No PR or upstream diff.** `No PR or upstream diff. /bugteam needs a target.`
 - **Dirty tree.** `Uncommitted changes detected. Stash, commit, or revert before /bugteam.`
 - **Missing subagents.** Before Step 0, confirm `code-quality-agent` and `clean-coder` exist. Else: `Required subagent type <name> not installed. /bugteam needs both code-quality-agent and clean-coder available.`
+- **Lead role must be held by the orchestrator.** Run /bugteam in the session that received the user's command. The orchestrator session calls TeamCreate directly. Runtime confirms a single lead per team: `Already leading team "<name>". A leader can only manage one team at a time.`
 ## Utility scripts
@@ -89,17 +90,23 @@ python "${CLAUDE_SKILL_DIR}/scripts/grant_project_claude_permissions.py"
 ### Step 1: Resolve PR scope (once)
-Same as `/findbugs`:
-1. `gh pr view --json number,baseRefName,headRefName,url`
-2. Else `git merge-base HEAD origin/<default>` then `git diff <merge-base>...HEAD`
-3. Else refuse above.
+Accept one or more PR numbers from the invocation. For each PR, run `gh pr view --json number,baseRefName,headRefName,url` (falling back to the merge-base diff path when no PR exists). Capture `all_prs = [{number, owner, repo, baseRef, headRef, url}, ...]`. A single-PR invocation produces a one-element list and follows the same downstream rules.
 Keep: owner/repo, branches, PR number, URL — for all loops.
+#### Per-PR workspace
+For each PR in all_prs:
+1. Create `<team_temp_dir>/pr-<N>/`.
+2. Run `git worktree add "<team_temp_dir>/pr-<N>/worktree" origin/<headRef>`.
+3. Record the absolute worktree path alongside the PR's other fields.
+Teammates spawned for a PR operate inside that PR's worktree. Step 4 teardown runs `git worktree remove "<team_temp_dir>/pr-<N>/worktree"` for each PR before `TeamDelete`.
 ### Step 2: Create the agent team
-Lead calls `TeamCreate`:
+**This session is the lead.** The orchestrator calls `TeamCreate` directly:
 ```
 TeamCreate(
@@ -109,7 +116,7 @@ TeamCreate(
 )
 ```
-**Team name:** `bugteam-pr-<number>-<YYYYMMDDHHMMSS>` or `bugteam-<sanitized-head>-<YYYYMMDDHHMMSS>` if no PR. Timestamp avoids collisions. `TeamCreate` implements natural-language team creation ([`sources.md`](sources.md) § Team creation in natural language).
+**Team name:** For a single-PR invocation use `bugteam-pr-<number>-<YYYYMMDDHHMMSS>`. For a multi-PR invocation use `bugteam-<YYYYMMDDHHMMSS>`. The timestamp is captured once at team-creation time. Apply the no-PR fallback (`bugteam-<sanitized-head>-<YYYYMMDDHHMMSS>`) only when no PR resolves at all. `TeamCreate` implements natural-language team creation ([`sources.md`](sources.md) § Team creation in natural language).
 **Sanitize head branch (no-PR only):** replace characters outside `[A-Za-z0-9._-]` with `-` (e.g. `feat/foo*bar` → `feat-foo-bar`). Apply once; reuse everywhere below.
@@ -186,7 +193,9 @@ jq -n \
 ### Step 3: The cycle
-Repeat until exit. **Gate:** `validate_content` / `hooks/blocking/code_rules_enforcer.py` on PR-scoped files before every AUDIT (`bugteam_code_rules_gate.py`). Lead runs gate; clean-coder clears failures; then bugfind audits.
+Run the AUDIT-FIX cycle for each PR in all_prs, reusing the same team across PRs. The 10-loop cap applies per PR. Exit reasons (converged, cap reached, stuck, error) are tracked per PR; the final report lists one outcome line per PR.
+**Gate:** `validate_content` / `hooks/blocking/code_rules_enforcer.py` on PR-scoped files before every AUDIT (`bugteam_code_rules_gate.py`). Lead runs gate; clean-coder clears failures; then bugfind audits.
 1. From `last_action` / `last_findings`:
    - `last_action == "audited"` and `last_findings.total == 0` → exit `converged`
@@ -213,29 +222,29 @@ First pass: pre-audit → AUDIT. After a FIX, the next pass runs pre-audit again
 ### AUDIT action
 ```bash
-mkdir -p "<team_temp_dir>"
-gh pr diff <number> -R <owner>/<repo> > "<team_temp_dir>/loop-<N>.patch"
+mkdir -p "<team_temp_dir>/pr-<N>"
+gh pr diff <N> -R <owner>/<repo> > "<team_temp_dir>/pr-<N>/loop-<L>.patch"
 ```
 ```
 Agent(
   subagent_type="code-quality-agent",
-  name="bugfind",
+  name="bugfind-pr<N>-loop<L>",
   team_name="<team_name>",
   model="sonnet",
-  description="Bugfind audit loop <N>",
+  description="Bugfind audit PR <N> loop <L>",
   prompt="<audit XML; see PROMPTS.md>"
 )
 ```
-Fresh `Agent` each loop; teammate context excludes lead history ([`sources.md`](sources.md) § Teammate context isolation). [`PROMPTS.md`](PROMPTS.md): XML + outcome schema. Lead reads `.bugteam-loop-<N>.outcomes.xml`, fills `loop_comment_index`.
+Fresh `Agent` each loop; teammate context excludes lead history ([`sources.md`](sources.md) § Teammate context isolation). [`PROMPTS.md`](PROMPTS.md): XML + outcome schema. Lead reads `.bugteam-pr<N>-loop<L>.outcomes.xml`, fills `loop_comment_index`.
 **Shutdown:** If `Agent` returned and the teammate already ended, skip. Otherwise:
 ```
 SendMessage(
-  to="bugfind",
-  message={"type": "shutdown_request", "reason": "audit loop <N> complete; outcome XML captured"}
+  to="bugfind-pr<N>-loop<L>",
+  message={"type": "shutdown_request", "reason": "audit PR <N> loop <L> complete; outcome XML captured"}
 )
 ```
@@ -243,24 +252,24 @@ SendMessage(
 `last_action = "audited"`; append audit line to `audit_log`.
-**Parallel auditors (`loop_count >= 4`):** gate passes immediately before; after three full audit/fix rounds without convergence, issue three `Agent` calls in one assistant message (parallel). `-a` posts the review and merges outcomes from `-b`/`-c` (read `.bugteam-loop-<N>.outcomes.xml` plus `<team_temp_dir>/loop-<N>-b.outcomes.xml` and `...-c...`); merge key `(file, line, category_letter)`; re-id `loopN-K`. `-b`/`-c` write sibling XML only; prompts must pass literal absolute sibling paths. Shutdown: parallel `SendMessage` to `b` and `c`, then `a`.
+**Parallel auditors (`loop_count >= 4`):** gate passes immediately before; after three full audit/fix rounds without convergence, issue three `Agent` calls in one assistant message (parallel). `-a` posts the review and merges outcomes from `-b`/`-c` (read `.bugteam-pr<N>-loop<L>.outcomes.xml` plus `<team_temp_dir>/pr-<N>/loop-<L>-b.outcomes.xml` and `...-c...`); merge key `(file, line, category_letter)`; re-id `loopN-K`. `-b`/`-c` write sibling XML only; prompts must pass literal absolute sibling paths. Shutdown: parallel `SendMessage` to `b` and `c`, then `a`.
 ### FIX action
 ```
 Agent(
   subagent_type="clean-coder",
-  name="bugfix",
+  name="bugfix-pr<N>-loop<L>",
   team_name="<team_name>",
   model="sonnet",
-  description="Bugfix loop <N>",
+  description="Bugfix PR <N> loop <L>",
   prompt="<fix XML; see PROMPTS.md>"
 )
 ```
 Pass finding comment URLs/ids from `loop_comment_index` in XML. Replies: `Fixed in <sha>` or `Could not address this loop: <reason>`.
-**Shutdown:** same as bugfind; else `SendMessage(to="bugfix", message={"type": "shutdown_request", "reason": "fix loop <N> complete; commit <sha7> pushed"})`. `approve: false` → `error: bugfix teammate refused shutdown` → Step 4 then 5.
+**Shutdown:** same as bugfind; else `SendMessage(to="bugfix-pr<N>-loop<L>", message={"type": "shutdown_request", "reason": "fix PR <N> loop <L> complete; commit <sha7> pushed"})`. `approve: false` → `error: bugfix teammate refused shutdown` → Step 4 then 5.
 [`PROMPTS.md`](PROMPTS.md): fix XML + schema. Verify: `git rev-parse HEAD` advanced; `git fetch origin <branch> && git rev-parse origin/<branch>` matches `HEAD`. Unchanged HEAD → `stuck — bugfix teammate could not address findings`.