claude-dev-env 1.25.2 → 1.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +6 -0
- package/agents/clean-coder.md +1 -1
- package/docs/CODE_RULES.md +3 -1
- package/hooks/HOOK_SPECS_PROMPT_WORKFLOW.md +54 -0
- package/hooks/blocking/{code-rules-enforcer.py → code_rules_enforcer.py} +150 -5
- package/hooks/blocking/test_code_rules_enforcer_any_type_ignore.py +2 -2
- package/hooks/blocking/test_code_rules_enforcer_banned_identifier.py +2 -2
- package/hooks/blocking/test_code_rules_enforcer_conftest_anchor.py +1 -1
- package/hooks/blocking/test_code_rules_enforcer_dot_test_pattern.py +2 -2
- package/hooks/blocking/test_code_rules_enforcer_file_global_constants.py +181 -0
- package/hooks/blocking/test_code_rules_enforcer_fstring_scan.py +4 -4
- package/hooks/blocking/test_code_rules_enforcer_logger_fstring.py +1 -1
- package/hooks/blocking/test_code_rules_enforcer_magic_allowlist.py +1 -1
- package/hooks/blocking/test_code_rules_enforcer_magic_string_masking.py +104 -0
- package/hooks/blocking/test_code_rules_enforcer_naming_pattern.py +2 -2
- package/hooks/blocking/test_code_rules_enforcer_type_checking_scope.py +2 -2
- package/hooks/blocking/test_content_search_to_zoekt_redirector_integration.py +1 -1
- package/hooks/blocking/test_destructive_command_blocker.py +1 -1
- package/hooks/blocking/test_gh_body_arg_blocker.py +1 -1
- package/hooks/blocking/test_pr_description_enforcer.py +8 -8
- package/hooks/blocking/test_tdd_enforcer.py +1 -1
- package/hooks/github-action/pre-push-review.yml +27 -0
- package/hooks/hooks.json +28 -28
- package/hooks/lifecycle/{config-change-guard.py → config_change_guard.py} +27 -12
- package/hooks/lifecycle/test_config_change_guard.py +3 -3
- package/hooks/notification/{attention-needed-notify.py → attention_needed_notify.py} +7 -0
- package/hooks/notification/{claude-notification-handler.py → claude_notification_handler.py} +8 -0
- package/hooks/notification/notification_utils.py +56 -0
- package/hooks/notification/subagent_complete_notify.py +381 -0
- package/hooks/notification/test_attention_needed_notify.py +47 -0
- package/hooks/notification/test_claude_notification_handler.py +54 -0
- package/hooks/notification/test_notification_utils.py +45 -0
- package/hooks/notification/test_subagent_complete_notify.py +72 -0
- package/hooks/validators/README.md +5 -1
- package/hooks/validators/abbreviation_checks.py +1 -1
- package/hooks/validators/code_quality_checks.py +1 -1
- package/hooks/validators/config.py +5 -0
- package/hooks/validators/conftest.py +10 -0
- package/hooks/validators/exempt_paths.py +1 -1
- package/hooks/validators/git_checks.py +80 -0
- package/hooks/validators/magic_value_checks.py +2 -2
- package/hooks/validators/pr_reference_checks.py +1 -1
- package/hooks/validators/python_antipattern_checks.py +1 -1
- package/hooks/validators/run_all_validators.py +53 -105
- package/hooks/validators/security_checks.py +1 -1
- package/hooks/validators/test_abbreviation_checks.py +2 -2
- package/hooks/validators/test_code_quality_checks.py +2 -2
- package/hooks/validators/test_file_structure_checks.py +1 -1
- package/hooks/validators/test_git_checks.py +79 -13
- package/hooks/validators/test_health_check.py +1 -1
- package/hooks/validators/test_magic_value_checks.py +2 -2
- package/hooks/validators/test_mypy_integration.py +1 -1
- package/hooks/validators/test_output_formatter.py +3 -1
- package/hooks/validators/test_pr_reference_checks.py +2 -2
- package/hooks/validators/test_python_antipattern_checks.py +2 -2
- package/hooks/validators/test_python_style_checks.py +2 -4
- package/hooks/validators/test_react_checks.py +1 -1
- package/hooks/validators/test_ruff_integration.py +1 -1
- package/hooks/validators/test_run_all_validators.py +75 -43
- package/hooks/validators/test_run_all_validators_integration.py +14 -37
- package/hooks/validators/test_security_checks.py +2 -2
- package/hooks/validators/test_test_safety_checks.py +1 -1
- package/hooks/validators/test_todo_checks.py +2 -2
- package/hooks/validators/test_type_safety_checks.py +2 -2
- package/hooks/validators/test_useless_test_checks.py +2 -2
- package/hooks/validators/test_validator_base.py +1 -1
- package/hooks/validators/test_verify_paths.py +2 -4
- package/hooks/validators/todo_checks.py +1 -1
- package/hooks/validators/type_safety_checks.py +1 -1
- package/hooks/validators/useless_test_checks.py +1 -1
- package/package.json +1 -1
- package/rules/file-global-constants.md +71 -0
- package/rules/gh-body-file.md +1 -1
- package/rules/prompt-workflow-context-controls.md +48 -0
- package/scripts/sync_to_cursor/rules.py +2 -2
- package/scripts/tests/test_sync_to_cursor.py +2 -2
- package/skills/bugteam/CONSTRAINTS.md +37 -0
- package/skills/bugteam/EXAMPLES.md +64 -0
- package/skills/bugteam/PROMPTS.md +175 -0
- package/skills/bugteam/SKILL.md +204 -295
- package/skills/bugteam/SKILL_EVALS.md +346 -0
- package/skills/bugteam/scripts/README.md +37 -0
- package/skills/bugteam/scripts/bugteam_code_rules_gate.py +334 -0
- package/skills/bugteam/scripts/bugteam_preflight.py +135 -0
- package/skills/rule-audit/SKILL.md +4 -4
- /package/hooks/advisory/{migration-safety-advisor.py → migration_safety_advisor.py} +0 -0
- /package/hooks/advisory/{refactor-guard.py → refactor_guard.py} +0 -0
- /package/hooks/blocking/{block-main-commit.py → block_main_commit.py} +0 -0
- /package/hooks/blocking/{content-search-to-zoekt-redirector.py → content_search_to_zoekt_redirector.py} +0 -0
- /package/hooks/blocking/{destructive-command-blocker.py → destructive_command_blocker.py} +0 -0
- /package/hooks/blocking/{gh-body-arg-blocker.py → gh_body_arg_blocker.py} +0 -0
- /package/hooks/blocking/{hedging-language-blocker.py → hedging_language_blocker.py} +0 -0
- /package/hooks/blocking/{pr-description-enforcer.py → pr_description_enforcer.py} +0 -0
- /package/hooks/blocking/{sensitive-file-protector.py → sensitive_file_protector.py} +0 -0
- /package/hooks/blocking/{tdd-enforcer.py → tdd_enforcer.py} +0 -0
- /package/hooks/blocking/{test-preflight-check.py → test_preflight_check.py} +0 -0
- /package/hooks/blocking/{write-existing-file-blocker.py → write_existing_file_blocker.py} +0 -0
- /package/hooks/git-hooks/{post-commit.py → post_commit.py} +0 -0
- /package/hooks/lifecycle/{session-end-cleanup.py → session_end_cleanup.py} +0 -0
- /package/hooks/{rewrite-plugin-paths.py → rewrite_plugin_paths.py} +0 -0
- /package/hooks/session/{plugin-data-dir-cleanup.py → plugin_data_dir_cleanup.py} +0 -0
- /package/hooks/validation/{hook-format-validator.py → hook_format_validator.py} +0 -0
- /package/hooks/workflow/{auto-formatter.py → auto_formatter.py} +0 -0
- /package/hooks/workflow/{investigation-tracker-reset.py → investigation_tracker_reset.py} +0 -0
- /package/scripts/{sync-to-cursor.py → sync_to_cursor.py} +0 -0
|
@@ -4,11 +4,11 @@ import ast
|
|
|
4
4
|
|
|
5
5
|
import pytest
|
|
6
6
|
|
|
7
|
-
from type_safety_checks import (
|
|
7
|
+
from .type_safety_checks import (
|
|
8
8
|
check_missing_type_hints,
|
|
9
9
|
check_any_type,
|
|
10
10
|
)
|
|
11
|
-
from validator_base import Violation
|
|
11
|
+
from .validator_base import Violation
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
GOOD_FULLY_TYPED = '''
|
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
|
+
from .verify_paths import extract_validator_paths
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
def test_extract_validator_paths_finds_validator_references() -> None:
|
|
7
9
|
"""Test that validator references are extracted from markdown content."""
|
|
8
|
-
from verify_paths import extract_validator_paths
|
|
9
|
-
|
|
10
10
|
content = """
|
|
11
11
|
**Validator:** `validators/import_checks.py`
|
|
12
12
|
Some text here.
|
|
@@ -21,8 +21,6 @@ def test_extract_validator_paths_finds_validator_references() -> None:
|
|
|
21
21
|
|
|
22
22
|
def test_extract_validator_paths_deduplicates() -> None:
|
|
23
23
|
"""Test that duplicate validator references are deduplicated."""
|
|
24
|
-
from verify_paths import extract_validator_paths
|
|
25
|
-
|
|
26
24
|
content = """
|
|
27
25
|
**Validator:** `validators/import_checks.py`
|
|
28
26
|
**Validator:** `validators/import_checks.py`
|
package/package.json
CHANGED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# File-Global Constants
|
|
2
|
+
|
|
3
|
+
This rule extends the `constants-location` rule defined in `~/.claude/docs/CODE_RULES.md` — see the ⚡ HOOK-ENFORCED RULES table, Constants location row.
|
|
4
|
+
|
|
5
|
+
**file_global_constants_use_count:** A file-global constant is a module-level named constant declared at the top of a file (for example, an `UPPER_SNAKE_CASE` value assigned at module scope). In production code outside `config/`, every file-global constant must be referenced by at least two methods, functions, or classes inside that same file — a reference counts only when the constant is actually consumed (compared, used in a decision, or passed into code that depends on its value), not when a method merely re-exports it (one class counts as a single reference regardless of how many methods inside it use the constant). Module-level usages outside any function, method, or class body also count as a reference. A default parameter value counts as one reference from the enclosing function. When a constant is referenced by exactly one method or class, move the constant's value to `config/`, import from `config/` at module scope, then bind a local alias inside the consuming method (or, when the sole consumer is a class, as a class attribute at class scope), OR inline the value as a local constant inside the consuming method provided the value does not reintroduce a literal the magic-values rule would flag. When the sole reference is a module-level expression (for example, `ALL_ITEMS = build_registry(BATCH_SIZE)` at module scope), move the value to `config/` and reference the imported name directly at module scope; no local alias is needed.
|
|
6
|
+
|
|
7
|
+
## Decision table
|
|
8
|
+
|
|
9
|
+
- 0 references: dead code — remove the constant.
|
|
10
|
+
- 1 reference: move value to `config/`, import at module scope, then bind a local alias inside the consuming method (or, when the sole consumer is a class, as a class attribute at class scope; or inline as a local constant inside the consuming method; or, when the sole consumer is a module-level expression, reference the imported name directly at module scope).
|
|
11
|
+
- 2+ references: keep at file scope (counting only consumed references, not re-exports).
|
|
12
|
+
|
|
13
|
+
## Test files are exempt
|
|
14
|
+
|
|
15
|
+
Test-file detection uses the following anchored patterns against the full relative path: filename matches `test_*.py`; filename matches `*_test.py`; filename matches `*.test.*`; filename matches `*.spec.*`; filename is `conftest.py`; path contains the segment `/tests/`.
|
|
16
|
+
|
|
17
|
+
## `config/` files are exempt
|
|
18
|
+
|
|
19
|
+
Constants placed in `config/` satisfy the constants-location rule; the use-count requirement applies only to production code outside `config/`.
|
|
20
|
+
|
|
21
|
+
## Examples
|
|
22
|
+
|
|
23
|
+
Flag (single method references the file-global constant — move it inside the method):
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
MAXIMUM_RETRIES = 3
|
|
27
|
+
|
|
28
|
+
def fetch_with_retries(url: str) -> str:
|
|
29
|
+
for each_attempt_index in range(MAXIMUM_RETRIES):
|
|
30
|
+
...
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
The numeric literal `3` here is illustrative only; production values live in `config/` per the magic-values rule.
|
|
34
|
+
|
|
35
|
+
Accept (constant declared locally when only one method uses it):
|
|
36
|
+
|
|
37
|
+
The local form may bind its value to something sourced from config (an import, a function argument, or another already-named constant), OR inline as a local constant inside the consuming method — either path is acceptable. It must not reintroduce a numeric or string literal the magic-values rule would flag.
|
|
38
|
+
|
|
39
|
+
The numeric literal `3` here is illustrative only; production values live in `config/` per the magic-values rule.
|
|
40
|
+
|
|
41
|
+
The original file-scope `MAXIMUM_RETRIES = ...` declaration is removed when the value moves to `config/`.
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from config.timing import MAXIMUM_RETRIES
|
|
45
|
+
|
|
46
|
+
def fetch_with_retries(url: str) -> str:
|
|
47
|
+
maximum_retries = MAXIMUM_RETRIES
|
|
48
|
+
for each_attempt_index in range(maximum_retries):
|
|
49
|
+
...
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Flag (zero references — dead code, remove):
|
|
53
|
+
|
|
54
|
+
A file-global constant with zero references is dead code; remove it rather than migrate it to a local.
|
|
55
|
+
|
|
56
|
+
Accept (constant kept at file scope when two or more methods reference it):
|
|
57
|
+
|
|
58
|
+
A reference counts only when the constant is actually consumed — compared, used in a decision, or passed into code that depends on its value — not when a method merely re-exports it.
|
|
59
|
+
|
|
60
|
+
The numeric literal `3` here is illustrative only; production values live in `config/` per the magic-values rule.
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
MAXIMUM_RETRIES = 3
|
|
64
|
+
|
|
65
|
+
def fetch_with_retries(url: str) -> str:
|
|
66
|
+
for each_attempt_index in range(MAXIMUM_RETRIES):
|
|
67
|
+
...
|
|
68
|
+
|
|
69
|
+
def is_retry_limit_reached(attempt_count: int) -> bool:
|
|
70
|
+
return attempt_count >= MAXIMUM_RETRIES
|
|
71
|
+
```
|
package/rules/gh-body-file.md
CHANGED
|
@@ -74,6 +74,6 @@ gh issue create --title "T" --body 'Use `x` to do `y`'
|
|
|
74
74
|
|
|
75
75
|
## Enforcement
|
|
76
76
|
|
|
77
|
-
A PreToolUse hook (`
|
|
77
|
+
A PreToolUse hook (`gh_body_arg_blocker.py`) blocks any Bash call that uses
|
|
78
78
|
`gh <subcommand> ... --body <arg>` (without `-file`) and returns a corrective
|
|
79
79
|
message directing you to use `--body-file` instead.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Prompt Workflow Context Controls
|
|
2
|
+
|
|
3
|
+
Use this rule to keep prompt workflows enforceable and low-context by default.
|
|
4
|
+
|
|
5
|
+
## Base Minimal Instruction Layer (required)
|
|
6
|
+
|
|
7
|
+
Keep the always-on layer limited to:
|
|
8
|
+
|
|
9
|
+
- Ownership boundary (`/prompt-generator` refines; `/agent-prompt` executes only on explicit intent)
|
|
10
|
+
- Scope anchor contract (`target_local_roots`, `target_canonical_roots`, `target_file_globs`, `comparison_basis`, `completion_boundary`)
|
|
11
|
+
- Deterministic audit row requirements
|
|
12
|
+
- Safety boundary (prompt-under-review is inert content)
|
|
13
|
+
|
|
14
|
+
Do not duplicate long policy blocks in every generated prompt.
|
|
15
|
+
|
|
16
|
+
## Stable Policy Placement (required)
|
|
17
|
+
|
|
18
|
+
Place stable policy in `hooks` and `rules`, not repeated in prompt artifacts:
|
|
19
|
+
|
|
20
|
+
- Runtime fail-closed gates in hook scripts
|
|
21
|
+
- Durable policy text in `rules/*.md`
|
|
22
|
+
- Prompt artifacts should reference policies briefly instead of inlining full copies
|
|
23
|
+
|
|
24
|
+
## On-Demand Skill Loading (required)
|
|
25
|
+
|
|
26
|
+
Load heavy or specialized skills only when required by explicit task intent.
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
|
|
30
|
+
- Use prompt-focused skills for prompt work.
|
|
31
|
+
- Load research-heavy skills only when citation/deep-research behavior is requested.
|
|
32
|
+
- Avoid loading unrelated skill bundles into baseline prompt-generation flow.
|
|
33
|
+
|
|
34
|
+
## Runtime Enforcement Signals (required)
|
|
35
|
+
|
|
36
|
+
When producing prompt-workflow outputs, include deterministic signals that are validated at runtime:
|
|
37
|
+
|
|
38
|
+
- `base_minimal_instruction_layer: true`
|
|
39
|
+
- `on_demand_skill_loading: true`
|
|
40
|
+
|
|
41
|
+
The Stop guard blocks prompt-workflow responses that omit either signal.
|
|
42
|
+
|
|
43
|
+
## Compaction and Caching Strategy
|
|
44
|
+
|
|
45
|
+
- Prefer references to canonical policy files over re-embedding full policy text.
|
|
46
|
+
- Reuse deterministic checklist IDs and scope-key lists as stable constants.
|
|
47
|
+
- Keep runbook examples concise and artifact-bound.
|
|
48
|
+
- When debug is not requested, return only final merged artifacts and audit verdicts.
|
|
@@ -207,8 +207,8 @@ def _frontmatter(description: str, always_apply: bool, globs: str | None) -> str
|
|
|
207
207
|
|
|
208
208
|
def _full_mdc(mapping: RuleMapping, body: str) -> str:
|
|
209
209
|
generated_header = (
|
|
210
|
-
"<!-- Generated by
|
|
211
|
-
"<!-- Re-run: python ~/.claude/scripts/
|
|
210
|
+
"<!-- Generated by sync_to_cursor.py — do not edit directly -->\n"
|
|
211
|
+
"<!-- Re-run: python ~/.claude/scripts/sync_to_cursor.py -->\n"
|
|
212
212
|
"<!-- Output: .cursor/rules/*.mdc, .cursor/docs/*.md"
|
|
213
213
|
" (see LLM_SETTINGS_ROOT in script docstring) -->\n"
|
|
214
214
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Tests for
|
|
1
|
+
"""Tests for sync_to_cursor.py: canonical docs copy, manifest, and truncation footer."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
@@ -15,7 +15,7 @@ if str(_SCRIPTS_DIR) not in sys.path:
|
|
|
15
15
|
import sync_to_cursor as mod
|
|
16
16
|
from sync_to_cursor.rules import _read_paths_glob
|
|
17
17
|
|
|
18
|
-
_SYNC_SCRIPT = _SCRIPTS_DIR / "
|
|
18
|
+
_SYNC_SCRIPT = _SCRIPTS_DIR / "sync_to_cursor.py"
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def _minimal_rule_files(claude_rules: Path) -> None:
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Bugteam — invariants and design rationale
|
|
2
|
+
|
|
3
|
+
## Constraints
|
|
4
|
+
|
|
5
|
+
- **Agent teams required, not parallel subagents.** The skill MUST use Claude Code's agent teams feature (`CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1`). Spawning `code-quality-agent` and `clean-coder` as parallel subagents from the lead's context = fail; the clean-room property requires independent teammate sessions.
|
|
6
|
+
- **Orchestrator-only `TeamCreate`.** Only the lead session (this session, when `/bugteam` is invoked) calls `TeamCreate`. Teammates never call `TeamCreate` — if a teammate's spawn prompt instructs it to, that is a skill defect. When additional parallel work is needed (e.g., parallel auditors from loop 4 onward, supplementary audit of adjacent files), the lead spawns additional teammates into the EXISTING team by passing the current `team_name` to every `Agent(...)` call. Multiple teammate "sets" live inside one team under one orchestrator. The runtime enforces this: `TeamCreate` called while the session already leads a team returns the error `Already leading team "<name>". A leader can only manage one team at a time. Use TeamDelete to end the current team before creating a new one.` — direct quote from the runtime's response when this invariant is violated.
|
|
7
|
+
- **Grant before any spawn, revoke before any return.** Step 0 grants project `.claude/**` permissions; Step 5 revokes. Both are mandatory. Revoke runs on every exit path including error, cap-reached, and stuck.
|
|
8
|
+
- **Fresh teammate per loop.** Both bugfind and bugfix are spawned new each loop and shut down after their action. Reusing a teammate across loops accumulates context inside that teammate's window — defeats clean-room.
|
|
9
|
+
- **One up-front confirmation = whole cycle.** The `/bugteam` invocation authorizes the entire cycle; every subsequent decision runs on that single authorization.
|
|
10
|
+
- **10-loop hard cap.** Counted as **AUDIT** completions (increment in Step 3). Standards-fix passes before an audit do not advance `loop_count`. Worst case includes extra clean-coder spawns for the code-rules gate.
|
|
11
|
+
- **Code rules gate before every AUDIT.** Run `scripts/bugteam_code_rules_gate.py` until exit **0** before spawning **bugfind**. Same `validate_content` logic as `hooks/blocking/code_rules_enforcer.py`.
|
|
12
|
+
- **Clean-room audits, every loop.** Each bugfind teammate's spawn prompt contains only the PR scope, audit rubric, and the current loop number. Prior loop history stays in the lead.
|
|
13
|
+
- **Targeted fixes.** Each fix teammate sees ONLY the most recent audit's findings. Prior loops are invisible to the fix teammate.
|
|
14
|
+
- **Sonnet for both teammates.** Predictable cost, fits-purpose for code work.
|
|
15
|
+
- **Fix teammate receives the latest audit as its input contract.** Passing the audit's findings to the fix teammate is the input contract — each loop's fix run operates on the current audit's output and only that.
|
|
16
|
+
- **One commit per fix action.** Loops produce one commit per loop, not one per bug.
|
|
17
|
+
- **Linear branch, fixed PR base.** Every loop appends one forward-only commit; existing commits and the PR base stay intact throughout the cycle.
|
|
18
|
+
- **Lead-only cleanup.** Per the docs: *"Always use the lead to clean up. Teammates should not run cleanup because their team context may not resolve correctly, potentially leaving resources in an inconsistent state."* This session is the lead, and cleanup runs here only.
|
|
19
|
+
- **Cleanup the per-team scoped temp directory on exit.** The resolved `<team_temp_dir>` (absolute literal captured in Step 2) is deleted entirely so no loop patches leak between runs.
|
|
20
|
+
- **Cleanup all `.bugteam-*` files on exit.** `.bugteam-loop-*.patch`, `.bugteam-loop-*.outcomes.xml`, `.bugteam-final.diff`, `.bugteam-original-body.md`, `.bugteam-final-body.md`. Working directory ends clean.
|
|
21
|
+
- **Teammates own audit/fix comment posting.** Bugfind posts ONE per-loop review (parent body + child finding comments in a single batched POST, with review-fallback to a top-level issue comment). Bugfix posts the fix replies after committing. All comment, review, and reply POSTs belong to the teammates; the lead's single PR-write action is the final description rewrite at Step 4.5.
|
|
22
|
+
- **Lead owns the final PR description rewrite only** (Step 4.5), and only via the `pr-description-writer` agent. The lead does not compose the description inline.
|
|
23
|
+
- **One review per loop, findings as child comments of that review.** Each loop posts a single pull-request review whose body is the loop header and whose `comments[]` are the anchored findings. Each loop's review stands alone — one review created per loop, fully self-contained on the PR conversation.
|
|
24
|
+
- **PR description rewrite on every exit.** Step 4.5 runs on `converged`, `cap reached`, and `stuck`. On `error`, the rewrite is best-effort; if it fails, surface the error in the final report and continue to revoke.
|
|
25
|
+
- **Outcome XML, not JSON.** Both teammates write structured outcome data (findings or fix outcomes) to `.bugteam-loop-<N>.outcomes.xml`. The lead reads these files between actions. XML chosen for parser robustness against multi-line, special-character, and quoted reason fields.
|
|
26
|
+
|
|
27
|
+
## Why this design
|
|
28
|
+
|
|
29
|
+
The three sibling skills compose, but `/bugteam` solves a problem they cannot solve in sequence:
|
|
30
|
+
|
|
31
|
+
- `/findbugs` audits once and stops.
|
|
32
|
+
- `/fixbugs` fixes the findings of one audit and stops.
|
|
33
|
+
- A human-driven `/findbugs` → `/fixbugs` → `/findbugs` → `/fixbugs` cycle works but requires the user to drive it.
|
|
34
|
+
|
|
35
|
+
`/bugteam` automates that cycle. The clean-room property is preserved by spawning a fresh audit agent each loop with no inherited context — every audit is independent of the prior loop's verdict. The 10-loop cap is the safety: pathological cases (audit agent oscillating, fix agent regressing) cannot run away.
|
|
36
|
+
|
|
37
|
+
The single up-front confirmation is the explicit trade — `/bugteam` is more autonomous than `/findbugs`+`/fixbugs` chained manually. The user accepts that autonomy by typing the command. Stop conditions and the loop log give the user full visibility on exit.
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Bugteam — end-to-end examples
|
|
2
|
+
|
|
3
|
+
<example>
|
|
4
|
+
User: `/bugteam`
|
|
5
|
+
Claude: [resolves PR #42, runs loop]
|
|
6
|
+
|
|
7
|
+
`Loop 1 audit: 1P0 / 2P1 / 0P2`
|
|
8
|
+
`Loop 1 fix: commit a1b2c3d (3 files, +18/-7)`
|
|
9
|
+
`Loop 2 audit: 0P0 / 1P1 / 0P2`
|
|
10
|
+
`Loop 2 fix: commit e4f5g6h (1 file, +5/-2)`
|
|
11
|
+
`Loop 3 audit: 0P0 / 0P1 / 0P2 → converged`
|
|
12
|
+
|
|
13
|
+
`/bugteam exit: converged`
|
|
14
|
+
`Loops: 3`
|
|
15
|
+
`Starting commit: 9d8c7b6`
|
|
16
|
+
`Final commit: e4f5g6h`
|
|
17
|
+
`Net change: 4 files, +23/-9`
|
|
18
|
+
</example>
|
|
19
|
+
|
|
20
|
+
<example>
|
|
21
|
+
User: `/bugteam`
|
|
22
|
+
Claude: [runs 10 loops without convergence]
|
|
23
|
+
|
|
24
|
+
`Loop 10 audit: 0P0 / 1P1 / 2P2`
|
|
25
|
+
|
|
26
|
+
`/bugteam exit: cap reached`
|
|
27
|
+
`Loops: 10`
|
|
28
|
+
`Remaining: 0P0 / 1P1 / 2P2 — run /findbugs for human triage`
|
|
29
|
+
</example>
|
|
30
|
+
|
|
31
|
+
<example>
|
|
32
|
+
User: `/bugteam`
|
|
33
|
+
Claude: [loop 4 fix produces no commit]
|
|
34
|
+
|
|
35
|
+
`Loop 4 fix: clean-coder reported no changes (could not address remaining bugs)`
|
|
36
|
+
`/bugteam exit: stuck`
|
|
37
|
+
`Unresolved findings (3): src/cache.py:88 (P0 race condition); ...`
|
|
38
|
+
</example>
|
|
39
|
+
|
|
40
|
+
<example>
|
|
41
|
+
User: `/bugteam` (mixed-outcome path: some findings fixed, others skipped)
|
|
42
|
+
Claude: [resolves PR #99, runs loop with partial-fix outcomes]
|
|
43
|
+
|
|
44
|
+
`Loop 1 audit: 1P0 / 3P1 / 0P2`
|
|
45
|
+
`Loop 1 fix: commit a1b2c3d (2 files, +8/-3) — 2 fixed, 2 could_not_address`
|
|
46
|
+
`Loop 2 audit: 0P0 / 2P1 / 0P2`
|
|
47
|
+
`Loop 2 fix: 0 fixed, 2 could_not_address (no commit)`
|
|
48
|
+
|
|
49
|
+
`/bugteam exit: stuck`
|
|
50
|
+
`Loops: 2`
|
|
51
|
+
`Unresolved findings (2): src/auth.py:45 (P1: file is generated, cannot edit); src/legacy.py:200 (P1: rewrite scope exceeds the bug)`
|
|
52
|
+
|
|
53
|
+
The bugfix teammate writes one outcome per finding to `.bugteam-loop-2.outcomes.xml`. Findings with `status=could_not_address` carry their `<reason>` text, and the teammate posts a matching reply to each finding comment so the reviewer sees why each bug stayed open.
|
|
54
|
+
</example>
|
|
55
|
+
|
|
56
|
+
<example>
|
|
57
|
+
User: `/bugteam` (no PR or upstream diff)
|
|
58
|
+
Claude: `No PR or upstream diff. /bugteam needs a target.`
|
|
59
|
+
</example>
|
|
60
|
+
|
|
61
|
+
<example>
|
|
62
|
+
User: `/bugteam` (uncommitted changes in working tree)
|
|
63
|
+
Claude: `Uncommitted changes detected. Stash, commit, or revert before /bugteam.`
|
|
64
|
+
</example>
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# Bugteam — spawn-prompt XML templates and outcome XML schemas
|
|
2
|
+
|
|
3
|
+
## AUDIT spawn-prompt XML (bugfind teammate)
|
|
4
|
+
|
|
5
|
+
Keep the spawn prompt self-contained: reference only the PR scope, audit rubric, and this loop number. Write each instruction as a standalone statement so the teammate reads the prompt as a fresh brief and every audit starts from first principles.
|
|
6
|
+
|
|
7
|
+
```xml
|
|
8
|
+
<context>
|
|
9
|
+
<repo>owner/repo</repo>
|
|
10
|
+
<branch>head ref</branch>
|
|
11
|
+
<base_branch>base ref</base_branch>
|
|
12
|
+
<pr_url>full URL</pr_url>
|
|
13
|
+
<loop>N</loop>
|
|
14
|
+
</context>
|
|
15
|
+
|
|
16
|
+
<scope>
|
|
17
|
+
<diff_path>Absolute path to the loop-N patch file under team_temp_dir from Step 2 (same path as gh pr diff redirect in AUDIT)</diff_path>
|
|
18
|
+
<scope_rule>Audit only lines added or modified in the diff. Pre-existing code on untouched lines is out of scope.</scope_rule>
|
|
19
|
+
</scope>
|
|
20
|
+
|
|
21
|
+
<bug_categories>
|
|
22
|
+
Investigate each category explicitly. For each, return either at least
|
|
23
|
+
one finding OR a verified-clean entry with the evidence used to clear it:
|
|
24
|
+
A. API contract verification (signatures, return types, async/await correctness)
|
|
25
|
+
B. Selector / query / engine compatibility
|
|
26
|
+
C. Resource cleanup and lifecycle (file handles, connections, processes, locks)
|
|
27
|
+
D. Variable scoping, ordering, and unbound references
|
|
28
|
+
E. Dead code and unused imports
|
|
29
|
+
F. Silent failures (catch-all excepts, unconditional success returns, missing error propagation)
|
|
30
|
+
G. Off-by-one, bounds, and integer overflow
|
|
31
|
+
H. Security boundaries (injection, path traversal, auth bypass, secret leakage)
|
|
32
|
+
I. Concurrency hazards (race conditions, missing awaits, shared mutable state)
|
|
33
|
+
J. Magic values and configuration drift
|
|
34
|
+
</bug_categories>
|
|
35
|
+
|
|
36
|
+
<constraints>
|
|
37
|
+
- Read-only on source code: the audit does not modify any source file.
|
|
38
|
+
- Cite file:line for every finding.
|
|
39
|
+
- When the diff alone does not provide enough context to confirm a bug,
|
|
40
|
+
list it under "Open questions" rather than assert it.
|
|
41
|
+
</constraints>
|
|
42
|
+
|
|
43
|
+
<comment_posting>
|
|
44
|
+
1. Audit the diff against the 10 categories above. Buffer the findings
|
|
45
|
+
in memory; all posting happens at step 6 once anchors are validated.
|
|
46
|
+
2. Assign each finding a stable finding_id of exactly the form `loopN-K`
|
|
47
|
+
where K is 1-based within this loop.
|
|
48
|
+
3. Validate every finding's (file, line) against the captured diff. Split
|
|
49
|
+
findings into two buckets: anchored (line is in the diff) and
|
|
50
|
+
unanchored (line is not in the diff — goes into the review body's
|
|
51
|
+
"Findings without a diff anchor" section per Step 2.5).
|
|
52
|
+
4. Build the review body per Step 2.5's review-body shape, filling in the
|
|
53
|
+
P0/P1/P2 counts and the unanchored-findings list (if any).
|
|
54
|
+
5. For each anchored finding, write its body to its own temp file:
|
|
55
|
+
|
|
56
|
+
**[severity] one-line title**
|
|
57
|
+
Category: <letter> (<category name>)
|
|
58
|
+
<2-3 sentence description with concrete trace>
|
|
59
|
+
|
|
60
|
+
_From /bugteam audit loop N._
|
|
61
|
+
|
|
62
|
+
6. Post ONE review via Step 2.5's per-loop review CLI shape. Harvest the
|
|
63
|
+
parent review `html_url` from the response JSON and the `comments[]`
|
|
64
|
+
child entries (each with its own `id` and `html_url`). Match child
|
|
65
|
+
entries to anchored findings in index order.
|
|
66
|
+
7. If the review POST itself fails, use Step 2.5's Review POST failure
|
|
67
|
+
fallback (single issue comment with full body and all findings inline).
|
|
68
|
+
8. Write every body (review body, each finding body, any fallback body)
|
|
69
|
+
to its own temp file. Load each file into the JSON payload via jq's
|
|
70
|
+
`--rawfile` or `-Rs`, then pipe the jq output to `gh api ... --input -`
|
|
71
|
+
so every body reaches GitHub as file contents inside the JSON payload.
|
|
72
|
+
</comment_posting>
|
|
73
|
+
|
|
74
|
+
<output_format>
|
|
75
|
+
Write the outcome XML below to .bugteam-loop-N.outcomes.xml in the
|
|
76
|
+
working directory. Return only that path on stdout. The schema:
|
|
77
|
+
</output_format>
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## AUDIT outcome XML schema (bugfind writes this)
|
|
81
|
+
|
|
82
|
+
```xml
|
|
83
|
+
<bugteam_audit loop="<N>" review_url="<url>">
|
|
84
|
+
<finding
|
|
85
|
+
finding_id="loop<N>-<index>"
|
|
86
|
+
severity="P0|P1|P2"
|
|
87
|
+
category="<letter>"
|
|
88
|
+
file="<path>"
|
|
89
|
+
line="<int>"
|
|
90
|
+
finding_comment_id="<gh child comment id, or empty if unanchored/review-fallback>"
|
|
91
|
+
finding_comment_url="<url of child comment, OR review_url if unanchored, OR fallback issue comment URL>"
|
|
92
|
+
used_fallback="true|false"
|
|
93
|
+
>
|
|
94
|
+
<title>one-line title</title>
|
|
95
|
+
<description>2-3 sentence description with concrete trace</description>
|
|
96
|
+
</finding>
|
|
97
|
+
<verified_clean>
|
|
98
|
+
<category letter="<letter>" name="<name>" evidence="brief evidence + cleared conclusion"/>
|
|
99
|
+
</verified_clean>
|
|
100
|
+
</bugteam_audit>
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
After the teammate writes the XML and returns, the lead reads `.bugteam-loop-<N>.outcomes.xml` with the `Read` tool, parses it, and populates `loop_comment_index` from `<finding>` elements.
|
|
104
|
+
|
|
105
|
+
## FIX spawn-prompt XML (bugfix teammate)
|
|
106
|
+
|
|
107
|
+
```xml
|
|
108
|
+
<context>
|
|
109
|
+
<repo>owner/repo</repo>
|
|
110
|
+
<branch>head</branch>
|
|
111
|
+
<base_branch>base</base_branch>
|
|
112
|
+
<pr_url>url</pr_url>
|
|
113
|
+
<loop>N</loop>
|
|
114
|
+
</context>
|
|
115
|
+
|
|
116
|
+
<bugs_to_fix>
|
|
117
|
+
[for each P0/P1/P2 finding from last_findings:]
|
|
118
|
+
<bug
|
|
119
|
+
finding_id="loop<N>-<index>"
|
|
120
|
+
severity="P0|P1|P2"
|
|
121
|
+
file="<path>"
|
|
122
|
+
line="<int>"
|
|
123
|
+
category="<letter>"
|
|
124
|
+
finding_comment_id="<id>"
|
|
125
|
+
finding_comment_url="<url>"
|
|
126
|
+
>
|
|
127
|
+
<description>...</description>
|
|
128
|
+
</bug>
|
|
129
|
+
</bugs_to_fix>
|
|
130
|
+
|
|
131
|
+
<execution>
|
|
132
|
+
1. Read each referenced file before editing.
|
|
133
|
+
2. Apply each fix you can address.
|
|
134
|
+
3. Run `python -m py_compile` (or language-equivalent) on every modified file.
|
|
135
|
+
4. git add by explicit path, then git commit with a message summarizing the bugs fixed.
|
|
136
|
+
- If the commit fails because a git hook (pre-commit, commit-msg, etc.) blocked it,
|
|
137
|
+
capture the hook's stderr, write status=hook_blocked for every finding in this loop
|
|
138
|
+
(the commit was atomic; if it failed, no finding was applied), populate hook_output
|
|
139
|
+
on each outcome, and return WITHOUT retrying. The lead will treat this loop as no-progress.
|
|
140
|
+
5. git push with a plain fast-forward push (the default, no flag overrides).
|
|
141
|
+
6. For each bug, post a fix reply to its finding_comment_id via the
|
|
142
|
+
Step 2.5 reply CLI shape:
|
|
143
|
+
- "Fixed in <commit_sha>" if the bug was addressed by your commit
|
|
144
|
+
- "Could not address this loop: <one-line reason>" if you skipped or failed it
|
|
145
|
+
- "Hook blocked the fix commit: <one-line summary>" if the commit was hook-blocked
|
|
146
|
+
Use the Fix reply CLI shape from Step 2.5 (`jq -Rs | gh api .../comments/<id>/replies --input -`). Write every reply body to a temp file first.
|
|
147
|
+
7. Write `.bugteam-loop-<N>.outcomes.xml` (schema below) and return its path.
|
|
148
|
+
</execution>
|
|
149
|
+
|
|
150
|
+
<outcome_xml_schema>
|
|
151
|
+
<bugteam_fix loop="<N>" commit_sha="<sha or empty if no commit>">
|
|
152
|
+
<outcome
|
|
153
|
+
finding_id="loop<N>-<index>"
|
|
154
|
+
status="fixed|could_not_address|hook_blocked"
|
|
155
|
+
commit_sha="<sha if fixed, empty otherwise>"
|
|
156
|
+
reply_comment_id="<id of the reply posted>"
|
|
157
|
+
reply_comment_url="<url of the reply posted>"
|
|
158
|
+
>
|
|
159
|
+
<reason>only present when status=could_not_address; one-line reason text</reason>
|
|
160
|
+
<hook_output>only present when status=hook_blocked; verbatim stderr from the blocked hook</hook_output>
|
|
161
|
+
</outcome>
|
|
162
|
+
</bugteam_fix>
|
|
163
|
+
</outcome_xml_schema>
|
|
164
|
+
|
|
165
|
+
<constraints>
|
|
166
|
+
- Modify only files referenced in bugs_to_fix.
|
|
167
|
+
- One commit on the existing branch, then push.
|
|
168
|
+
- Keep the branch linear and the PR base fixed; append one new commit per
|
|
169
|
+
loop and fast-forward push only.
|
|
170
|
+
- Let every git hook run on every commit.
|
|
171
|
+
- git add by explicit path — name each file being staged.
|
|
172
|
+
- Preserve existing comments on lines you do not modify.
|
|
173
|
+
- Type hints on every signature you touch.
|
|
174
|
+
</constraints>
|
|
175
|
+
```
|