claude-dev-env 1.34.1 → 1.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/docs-agent.md +1 -1
- package/agents/project-docs-analyzer.md +0 -1
- package/agents/skill-to-agent-converter.md +0 -1
- package/commands/initialize.md +0 -1
- package/commands/readability-review.md +4 -4
- package/commands/review-plan.md +2 -4
- package/commands/stubcheck.md +1 -2
- package/hooks/blocking/code_rules_enforcer.py +250 -36
- package/hooks/blocking/test_code_rules_enforcer.py +91 -39
- package/hooks/blocking/test_code_rules_enforcer_annotations.py +97 -0
- package/hooks/blocking/test_code_rules_enforcer_collection_prefix.py +137 -0
- package/hooks/blocking/test_code_rules_enforcer_config_path.py +0 -20
- package/hooks/blocking/test_code_rules_enforcer_constant_equality.py +0 -18
- package/hooks/blocking/test_code_rules_enforcer_existence_checks.py +0 -18
- package/hooks/blocking/test_code_rules_enforcer_inline_literal_collections.py +155 -0
- package/hooks/blocking/test_code_rules_enforcer_loop_variable_naming.py +110 -0
- package/hooks/blocking/test_code_rules_enforcer_naming_pattern.py +0 -13
- package/hooks/blocking/test_code_rules_enforcer_skip_decorators.py +0 -26
- package/hooks/blocking/test_code_rules_enforcer_string_magic.py +234 -0
- package/package.json +1 -1
- package/skills/bugteam/PROMPTS.md +0 -39
- package/skills/bugteam/SKILL.md +17 -35
- package/skills/bugteam/reference/copilot-gap-analysis.md +12 -0
- package/skills/pr-converge/SKILL.md +19 -3
- package/agents/agent-writer.md +0 -157
- package/agents/config-centralizer.md +0 -686
- package/agents/config-extraction-agent.md +0 -225
- package/agents/doc-orchestrator.md +0 -47
- package/agents/docx-agent.md +0 -211
- package/agents/magic-value-eliminator-agent.md +0 -72
- package/agents/mandatory-agent-workflow-agent.md +0 -88
- package/agents/parallel-workflow-coordinator.md +0 -779
- package/agents/pdf-agent.md +0 -302
- package/agents/project-context-loader.md +0 -238
- package/agents/readability-review-agent.md +0 -76
- package/agents/refactoring-specialist.md +0 -69
- package/agents/right-sized-engineer.md +0 -129
- package/agents/session-continuity-manager.md +0 -53
- package/agents/stub-detector-agent.md +0 -140
- package/agents/tdd-test-writer.md +0 -62
- package/agents/test-data-builder.md +0 -68
- package/agents/tooling-builder.md +0 -78
- package/agents/validation-expert.md +0 -71
- package/agents/xlsx-agent.md +0 -169
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import importlib.util
|
|
5
|
+
|
|
6
|
+
ENFORCER_PATH = Path(__file__).resolve().parent / "code_rules_enforcer.py"
|
|
7
|
+
specification = importlib.util.spec_from_file_location(
|
|
8
|
+
"code_rules_enforcer", ENFORCER_PATH
|
|
9
|
+
)
|
|
10
|
+
code_rules_enforcer = importlib.util.module_from_spec(specification)
|
|
11
|
+
specification.loader.exec_module(code_rules_enforcer)
|
|
12
|
+
|
|
13
|
+
PRODUCTION_FILE_PATH = "packages/app/services/foo.py"
|
|
14
|
+
TEST_FILE_PATH = "packages/app/tests/test_foo.py"
|
|
15
|
+
CONFIG_FILE_PATH = "packages/app/config/constants.py"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_should_flag_env_var_name_string_in_function_body() -> None:
|
|
19
|
+
source = (
|
|
20
|
+
"import os\n"
|
|
21
|
+
"\n"
|
|
22
|
+
"def fetch_secret() -> str:\n"
|
|
23
|
+
" return os.environ['STRIPE_SECRET']\n"
|
|
24
|
+
)
|
|
25
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
26
|
+
source, PRODUCTION_FILE_PATH
|
|
27
|
+
)
|
|
28
|
+
assert any("STRIPE_SECRET" in each_issue for each_issue in issues), (
|
|
29
|
+
f"Expected env-var name flagged, got: {issues}"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_should_flag_settings_key_all_caps_with_underscore() -> None:
|
|
34
|
+
source = "def lookup(settings: dict) -> str:\n return settings['HOOKS_PATH']\n"
|
|
35
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
36
|
+
source, PRODUCTION_FILE_PATH
|
|
37
|
+
)
|
|
38
|
+
assert any("HOOKS_PATH" in each_issue for each_issue in issues), (
|
|
39
|
+
f"Expected settings key flagged, got: {issues}"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_should_flag_dotted_segment_string() -> None:
|
|
44
|
+
source = "def is_git_dir(path: str) -> bool:\n return path.endswith('.git')\n"
|
|
45
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
46
|
+
source, PRODUCTION_FILE_PATH
|
|
47
|
+
)
|
|
48
|
+
assert any(".git" in each_issue for each_issue in issues), (
|
|
49
|
+
f"Expected '.git' flagged, got: {issues}"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_should_not_flag_single_letter_uppercase() -> None:
|
|
54
|
+
source = "def is_added(line: str) -> bool:\n return line.startswith('A')\n"
|
|
55
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
56
|
+
source, PRODUCTION_FILE_PATH
|
|
57
|
+
)
|
|
58
|
+
assert issues == [], f"Single capital letter must not be flagged, got: {issues}"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_should_not_flag_short_uppercase_acronym() -> None:
|
|
62
|
+
source = "def is_get(method: str) -> bool:\n return method == 'GET'\n"
|
|
63
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
64
|
+
source, PRODUCTION_FILE_PATH
|
|
65
|
+
)
|
|
66
|
+
assert issues == [], f"Short acronym 'GET' must not be flagged, got: {issues}"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_should_not_flag_human_readable_message() -> None:
|
|
70
|
+
source = (
|
|
71
|
+
"def fail() -> None:\n raise RuntimeError('Could not connect to host')\n"
|
|
72
|
+
)
|
|
73
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
74
|
+
source, PRODUCTION_FILE_PATH
|
|
75
|
+
)
|
|
76
|
+
assert issues == [], f"Human-readable message must not be flagged, got: {issues}"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_should_not_flag_lowercase_string() -> None:
|
|
80
|
+
source = "def get_label() -> str:\n return 'hello'\n"
|
|
81
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
82
|
+
source, PRODUCTION_FILE_PATH
|
|
83
|
+
)
|
|
84
|
+
assert issues == [], f"Lowercase string must not be flagged, got: {issues}"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_should_not_flag_module_level_string() -> None:
|
|
88
|
+
source = "DEFAULT_KEY = 'STRIPE_SECRET'\n"
|
|
89
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
90
|
+
source, PRODUCTION_FILE_PATH
|
|
91
|
+
)
|
|
92
|
+
assert issues == [], (
|
|
93
|
+
f"Module-level string must not be flagged (it IS the constant), got: {issues}"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_should_not_flag_docstring() -> None:
|
|
98
|
+
source = (
|
|
99
|
+
"def consume() -> None:\n"
|
|
100
|
+
' """STRIPE_SECRET is documented here for reference."""\n'
|
|
101
|
+
" return None\n"
|
|
102
|
+
)
|
|
103
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
104
|
+
source, PRODUCTION_FILE_PATH
|
|
105
|
+
)
|
|
106
|
+
assert issues == [], f"Docstring must not be flagged, got: {issues}"
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def test_should_skip_in_test_files() -> None:
|
|
110
|
+
source = (
|
|
111
|
+
"import os\n"
|
|
112
|
+
"\n"
|
|
113
|
+
"def test_env() -> None:\n"
|
|
114
|
+
" assert os.environ['STRIPE_SECRET'] == 'x'\n"
|
|
115
|
+
)
|
|
116
|
+
issues = code_rules_enforcer.check_string_literal_magic(source, TEST_FILE_PATH)
|
|
117
|
+
assert issues == [], f"Test files exempt, got: {issues}"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_should_skip_in_config_files() -> None:
|
|
121
|
+
source = "def env_keys() -> list[str]:\n return ['STRIPE_SECRET', 'DB_HOST']\n"
|
|
122
|
+
issues = code_rules_enforcer.check_string_literal_magic(source, CONFIG_FILE_PATH)
|
|
123
|
+
assert issues == [], f"Config files exempt, got: {issues}"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def test_should_not_flag_default_argument_string_literal() -> None:
|
|
127
|
+
source = (
|
|
128
|
+
"def consume(key: str = 'STRIPE_SECRET') -> str:\n"
|
|
129
|
+
" return key\n"
|
|
130
|
+
)
|
|
131
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
132
|
+
source, PRODUCTION_FILE_PATH
|
|
133
|
+
)
|
|
134
|
+
assert issues == [], (
|
|
135
|
+
f"Default argument value (signature, not body) must not be flagged, got: {issues}"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_should_not_flag_decorator_string_literal() -> None:
|
|
140
|
+
source = (
|
|
141
|
+
"from functools import lru_cache\n"
|
|
142
|
+
"\n"
|
|
143
|
+
"def cache_with_tag(tag: str):\n"
|
|
144
|
+
" return lru_cache\n"
|
|
145
|
+
"\n"
|
|
146
|
+
"@cache_with_tag('STRIPE_SECRET')\n"
|
|
147
|
+
"def consume() -> str:\n"
|
|
148
|
+
" return 'hello'\n"
|
|
149
|
+
)
|
|
150
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
151
|
+
source, PRODUCTION_FILE_PATH
|
|
152
|
+
)
|
|
153
|
+
assert issues == [], (
|
|
154
|
+
f"Decorator argument (not body) must not be flagged, got: {issues}"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def test_should_not_flag_annotation_literal_type_argument() -> None:
|
|
159
|
+
source = (
|
|
160
|
+
"from typing import Literal\n"
|
|
161
|
+
"\n"
|
|
162
|
+
"def consume(method: Literal['STRIPE_SECRET']) -> str:\n"
|
|
163
|
+
" return method\n"
|
|
164
|
+
)
|
|
165
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
166
|
+
source, PRODUCTION_FILE_PATH
|
|
167
|
+
)
|
|
168
|
+
assert issues == [], (
|
|
169
|
+
f"Literal type annotation (signature, not body) must not be flagged, got: {issues}"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def test_should_not_flag_default_arg_of_nested_function_when_scanning_outer() -> None:
|
|
174
|
+
source = (
|
|
175
|
+
"def outer() -> None:\n"
|
|
176
|
+
" def inner(key: str = 'STRIPE_SECRET') -> str:\n"
|
|
177
|
+
" return key\n"
|
|
178
|
+
" return None\n"
|
|
179
|
+
)
|
|
180
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
181
|
+
source, PRODUCTION_FILE_PATH
|
|
182
|
+
)
|
|
183
|
+
assert issues == [], (
|
|
184
|
+
f"Nested function's default arg (signature) must not be flagged from outer scan, got: {issues}"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def test_should_flag_class_attribute_in_nested_class_body() -> None:
|
|
189
|
+
source = (
|
|
190
|
+
"def outer() -> str:\n"
|
|
191
|
+
" class Inner:\n"
|
|
192
|
+
" attribute: str = 'STRIPE_SECRET'\n"
|
|
193
|
+
" return 'no_magic_here'\n"
|
|
194
|
+
)
|
|
195
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
196
|
+
source, PRODUCTION_FILE_PATH
|
|
197
|
+
)
|
|
198
|
+
assert any("STRIPE_SECRET" in each_issue for each_issue in issues), (
|
|
199
|
+
f"Nested ClassDef body executes when outer() runs; class attribute must be flagged, got: {issues}"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def test_should_flag_class_attribute_in_nested_class_inside_function() -> None:
|
|
204
|
+
source = (
|
|
205
|
+
"def outer() -> None:\n"
|
|
206
|
+
" class Inner:\n"
|
|
207
|
+
" KEY: str = 'STRIPE_SECRET'\n"
|
|
208
|
+
" return None\n"
|
|
209
|
+
)
|
|
210
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
211
|
+
source, PRODUCTION_FILE_PATH
|
|
212
|
+
)
|
|
213
|
+
assert any("STRIPE_SECRET" in each_issue for each_issue in issues), (
|
|
214
|
+
f"Class-level attribute inside a nested ClassDef inside outer fn body must be flagged "
|
|
215
|
+
f"(it executes when outer() runs), got: {issues}"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def test_should_still_flag_literal_in_nested_function_body() -> None:
|
|
220
|
+
source = (
|
|
221
|
+
"def outer() -> str:\n"
|
|
222
|
+
" def inner() -> str:\n"
|
|
223
|
+
" return 'STRIPE_SECRET'\n"
|
|
224
|
+
" return inner()\n"
|
|
225
|
+
)
|
|
226
|
+
issues = code_rules_enforcer.check_string_literal_magic(
|
|
227
|
+
source, PRODUCTION_FILE_PATH
|
|
228
|
+
)
|
|
229
|
+
assert any("STRIPE_SECRET" in each_issue for each_issue in issues), (
|
|
230
|
+
f"Inner function's body magic literal must still be flagged via inner scan, got: {issues}"
|
|
231
|
+
)
|
|
232
|
+
assert len(issues) == 1, (
|
|
233
|
+
f"Inner literal must be flagged exactly once (no duplicate from outer walk), got: {issues}"
|
|
234
|
+
)
|
package/package.json
CHANGED
|
@@ -35,47 +35,8 @@ cd into `<worktree_path>` before any git, gh, or file operation.
|
|
|
35
35
|
H. Security boundaries (injection, path traversal, auth bypass, secret leakage)
|
|
36
36
|
I. Concurrency hazards (race conditions, missing awaits, shared mutable state)
|
|
37
37
|
J. Magic values and configuration drift
|
|
38
|
-
Copilot-derived addendum (K–N) — verify each one explicitly. Return at
|
|
39
|
-
least one finding per category OR a verified-clean entry that names the
|
|
40
|
-
exact files and lines you walked.
|
|
41
|
-
K. Collection naming. Every tuple, list, set, dict, mapping, or sequence
|
|
42
|
-
parameter must follow the CODE_RULES.md §5 "Extended naming rules"
|
|
43
|
-
prefix discipline:
|
|
44
|
-
- module-level constant whose value is a tuple/list/set/dict/frozenset
|
|
45
|
-
literal MUST start with `ALL_` (e.g. `ALL_THEMES_INSERT_REQUIRED_COLUMN_NAMES`)
|
|
46
|
-
- function/method parameter whose annotation is `list[...]`, `tuple[...]`,
|
|
47
|
-
`set[...]`, `dict[...]`, `Iterable[...]`, `Sequence[...]`, `Mapping[...]`,
|
|
48
|
-
or `frozenset[...]` MUST start with `all_` (e.g. `all_column_value_pairs`)
|
|
49
|
-
- exempt: dict/map names that follow the `X_by_Y` pattern (e.g.
|
|
50
|
-
`price_by_product`)
|
|
51
|
-
L. Library print / direct stdout. In any module that is not a CLI entry
|
|
52
|
-
point (`__main__`, `*_cli.py`, `scripts/*.py`), every `print(...)`,
|
|
53
|
-
`sys.stdout.write(...)`, `sys.stderr.write(...)` call is a finding.
|
|
54
|
-
The fix is to route through a `logger` call OR to make the output
|
|
55
|
-
stream an explicit parameter so callers can redirect it.
|
|
56
|
-
M. String-literal magic values. Treat domain-identifier string literals
|
|
57
|
-
(database column names, table names, HTTP header names, status enums,
|
|
58
|
-
environment-variable names) inside a function body as magic values
|
|
59
|
-
even when the existing number-only check would let them pass. The
|
|
60
|
-
fix is to extract them into `config/` and reference the imported
|
|
61
|
-
name. Do not flag plain log messages, error messages, or one-off
|
|
62
|
-
human-readable strings.
|
|
63
|
-
N. Wrapper plumb-through. When a public function delegates to an
|
|
64
|
-
inner function defined in the same package, every optional kwarg
|
|
65
|
-
accepted by the inner function MUST appear in the public wrapper
|
|
66
|
-
unless the wrapper docstring explicitly states the kwarg is fixed
|
|
67
|
-
to a sentinel default. Silently dropping `loud_banner_stream`,
|
|
68
|
-
`timeout`, `dry_run`, or any similar optional kwarg is a finding.
|
|
69
38
|
</bug_categories>
|
|
70
39
|
|
|
71
|
-
<copilot_derived_addendum_source>
|
|
72
|
-
The K–N categories were added after Copilot raised real findings on
|
|
73
|
-
PR #70 (writer.py / summary.py) and PR #73 (constants.py / writer.py /
|
|
74
|
-
tracker.py) that converged "0 P0 / 0 P1 / 0 P2" under the original
|
|
75
|
-
A–J rubric. See ~/.claude/skills/bugteam/reference/copilot-gap-analysis.md
|
|
76
|
-
for the inventory and the validators that now back categories K and L.
|
|
77
|
-
</copilot_derived_addendum_source>
|
|
78
|
-
|
|
79
40
|
<constraints>
|
|
80
41
|
- Read-only on source code: the audit does not modify any source file.
|
|
81
42
|
- Cite file:line for every finding.
|
package/skills/bugteam/SKILL.md
CHANGED
|
@@ -81,9 +81,7 @@ The fix script removes any non-canonical local-scope override on the active repo
|
|
|
81
81
|
[ ] Step 0: project permissions granted
|
|
82
82
|
[ ] Step 1: PR scope resolved
|
|
83
83
|
[ ] Step 2: agent team created + loop state set
|
|
84
|
-
[ ] Step 2.6: INITIAL standards review against cumulative PR diff
|
|
85
84
|
[ ] Step 3: cycle complete (converged | cap reached | stuck | error)
|
|
86
|
-
[ ] Step 3.5: FINAL standards review against cumulative PR diff
|
|
87
85
|
[ ] Step 4: team torn down + working tree clean
|
|
88
86
|
[ ] Step 4.5: PR description rewritten (or skip warning logged)
|
|
89
87
|
[ ] Step 5: project permissions revoked
|
|
@@ -143,7 +141,7 @@ TeamCreate(
|
|
|
143
141
|
```bash
|
|
144
142
|
loop_count=0
|
|
145
143
|
last_action="fresh"
|
|
146
|
-
last_findings=""
|
|
144
|
+
last_findings='{"total": 0}'
|
|
147
145
|
audit_log=""
|
|
148
146
|
starting_sha="$(git rev-parse HEAD)"
|
|
149
147
|
team_name="bugteam-pr-<number>-<YYYYMMDDHHMMSS>"
|
|
@@ -205,29 +203,28 @@ jq -n \
|
|
|
205
203
|
|
|
206
204
|
**Endpoints:** `POST .../pulls/{pull}/reviews`; `POST .../pulls/{pull}/comments/{id}/replies`; fallback `POST .../issues/{issue}/comments` (`issue` = PR number).
|
|
207
205
|
|
|
208
|
-
### Step 2.6: INITIAL standards review (once, before Loop 1 audit)
|
|
209
|
-
|
|
210
|
-
Run BEFORE the first pre-audit gate fires. Spawn a fresh `code-quality-agent`
|
|
211
|
-
teammate inside the same team and drive it through the K–N addendum (see
|
|
212
|
-
PROMPTS.md `<copilot_derived_addendum_source>`). The teammate audits the
|
|
213
|
-
cumulative PR diff (`gh pr diff <N>`) instead of a single loop's incremental
|
|
214
|
-
patch; clean-room context is preserved by the same agent-team isolation as
|
|
215
|
-
the per-loop bugfind teammate. Findings are posted using the same Step 2.5
|
|
216
|
-
review-shape with body `## /bugteam INITIAL standards review against PR #<N>
|
|
217
|
-
cumulative diff: <P0>P0 / <P1>P1 / <P2>P2`. Findings advance the audit/fix
|
|
218
|
-
cycle exactly as if they had been raised in Loop 1: the lead increments
|
|
219
|
-
`loop_count` to 1, sets `last_action = "audited"` with the merged
|
|
220
|
-
`last_findings`, and Step 3 begins on the FIX branch. When the INITIAL
|
|
221
|
-
review returns zero findings, `loop_count` stays at 0 and Step 3 begins on
|
|
222
|
-
the AUDIT branch as before. Failure on this phase logs the error and
|
|
223
|
-
proceeds to Step 3 unchanged so the legacy A–J cycle still runs.
|
|
224
|
-
|
|
225
206
|
### Step 3: The cycle
|
|
226
207
|
|
|
227
208
|
Run the AUDIT-FIX cycle for each PR in all_prs, reusing the same team across PRs. The 10-loop cap applies per PR. Exit reasons (converged, cap reached, stuck, error) are tracked per PR; the final report lists one outcome line per PR.
|
|
228
209
|
|
|
229
210
|
**Gate:** `validate_content` / `hooks/blocking/code_rules_enforcer.py` on PR-scoped files before every AUDIT (`bugteam_code_rules_gate.py`). Lead runs gate; clean-coder clears failures; then bugfind audits.
|
|
230
211
|
|
|
212
|
+
**Pre-cycle: walk prior bugteam reviews end-first** (once per PR, after Step 2 and before iteration begins, when `last_action == "fresh"`). A re-invocation of `/bugteam` on a PR with prior loops detects whether the most recent loop already cleaned this HEAD (short-circuit) and otherwise records that prior loops were dirty so the AUDIT runs against the latest diff with that signal in mind:
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
dirty_review_count=0
|
|
216
|
+
gh api "repos/<owner>/<repo>/pulls/<number>/reviews" \
|
|
217
|
+
--jq '[.[] | select(.body | startswith("## /bugteam loop "))] | sort_by(.submitted_at) | reverse'
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
Iterate from index 0 (most recent) toward older entries:
|
|
221
|
+
|
|
222
|
+
- A bugteam review body that ends with `→ clean` is **clean**; any other `## /bugteam loop ...` body is **dirty**.
|
|
223
|
+
- For a dirty review, increment `dirty_review_count` by one. The review's specific finding bodies are not carried forward — bugteam's AUDIT regenerates findings against the current HEAD's diff each loop, so prior bodies are stale by definition. The count alone is the carried signal.
|
|
224
|
+
- Stop at the first clean review. Older reviews are presumed addressed at that clean checkpoint and are not re-read.
|
|
225
|
+
- When index 0 is itself clean AND its `commit_id` matches `git rev-parse HEAD`, the PR is already converged on this HEAD — set `last_action="audited"`, `last_findings='{"total": 0}'`, fall through to step 1's `converged` exit, skip Step 3 iteration entirely.
|
|
226
|
+
- When `dirty_review_count > 0`, log the count and proceed into the normal iteration; the next AUDIT regenerates anchored findings against the current HEAD so `loop_comment_index` stays correct. Unlike `pr-converge` — where Cursor Bugbot's prior dirty-review *bodies* are read back by the Fix protocol because each dirty body lists specific findings the loop must still address — bugteam's per-loop bodies are anchored to the diff at *that loop's* HEAD, so re-applying them against a newer diff would be incorrect. The count is sufficient signal that "prior loops did not converge here."
|
|
227
|
+
|
|
231
228
|
1. From `last_action` / `last_findings`:
|
|
232
229
|
- `last_action == "audited"` and `last_findings.total == 0` → exit `converged`
|
|
233
230
|
- `last_action == "fixed"` and `git rev-parse HEAD` unchanged since pre-FIX → exit `stuck`
|
|
@@ -304,19 +301,6 @@ Pass finding comment URLs/ids from `loop_comment_index` in XML. Replies: `Fixed
|
|
|
304
301
|
|
|
305
302
|
[`PROMPTS.md`](PROMPTS.md): fix XML + schema. Verify: `git rev-parse HEAD` advanced; `git fetch origin <branch> && git rev-parse origin/<branch>` matches `HEAD`. Unchanged HEAD → `stuck — bugfix teammate could not address findings`.
|
|
306
303
|
|
|
307
|
-
### Step 3.5: FINAL standards review (once, after convergence)
|
|
308
|
-
|
|
309
|
-
Run AFTER Step 3 exits with `converged`, `cap reached`, or `stuck`, and
|
|
310
|
-
BEFORE Step 4 teardown. Spawn one more fresh `code-quality-agent` teammate;
|
|
311
|
-
audit the cumulative PR diff against the K–N addendum a second time. Post
|
|
312
|
-
the review with body `## /bugteam FINAL standards review against PR #<N>
|
|
313
|
-
cumulative diff: <P0>P0 / <P1>P1 / <P2>P2`. When findings remain, the
|
|
314
|
-
exit reason is upgraded to `error: final standards review found <P0>+<P1>+<P2>
|
|
315
|
-
unresolved finding(s)` and the loop log gains an extra `final-review` line.
|
|
316
|
-
A clean FINAL review preserves the existing exit reason. Failure on this
|
|
317
|
-
phase logs the error and continues to Step 4 unchanged so teardown,
|
|
318
|
-
permission revoke, and the final report still run.
|
|
319
|
-
|
|
320
304
|
### Step 4: Teardown
|
|
321
305
|
|
|
322
306
|
1. For each live teammate: `SendMessage(to="<name>", message={"type": "shutdown_request", "reason": "bugteam cycle ending"})`. `approve: false` on cleanup → log and continue.
|
|
@@ -361,10 +345,8 @@ Final commit: <current_HEAD_sha7>
|
|
|
361
345
|
Net change: <total_files> files, +<total_add>/-<total_del>
|
|
362
346
|
|
|
363
347
|
Loop log:
|
|
364
|
-
initial standards review: 1P0 0P1 2P2
|
|
365
348
|
1 audit: 3P0 2P1 0P2
|
|
366
349
|
...
|
|
367
|
-
final standards review: 0P0 0P1 0P2
|
|
368
350
|
```
|
|
369
351
|
|
|
370
352
|
`cap reached` → suggest `/findbugs`. `stuck` → which findings. `error` → detail + loop.
|
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# Copilot gap analysis
|
|
2
2
|
|
|
3
|
+
> **Status: HISTORICAL — patch plan partially superseded.**
|
|
4
|
+
>
|
|
5
|
+
> This file documents the read-only investigation that motivated the K–N rubric addendum and the Step 2.6 / Step 3.5 standards-review phases. **The audit-rubric portion of the patch plan was reverted in [PR #292](https://github.com/jl-cmd/claude-code-config/pull/292):** PROMPTS.md no longer carries the K–N addendum or the `<copilot_derived_addendum_source>` block, and SKILL.md no longer runs the Step 2.6 / Step 3.5 standards-review phases.
|
|
6
|
+
>
|
|
7
|
+
> **What replaced K–N (partial coverage):** several judgment-heavy categories now have deterministic validators in `code_rules_enforcer.py` — `check_collection_prefix` (category K), `check_library_print` (category L), `check_inline_literal_collections`, `check_loop_variable_naming`, `check_parameter_annotations`, `check_return_annotations`. These validators are exercised by `bugteam_code_rules_gate.py` during the bugteam pre-audit / loop, and by the shipped repo `pre-commit` / `pre-push` git hooks that invoke that gate. They are **not** wired into the repo's default `PreToolUse` Write|Edit hook list in `packages/claude-dev-env/hooks/hooks.json`, so they do not block at Write/Edit time by default — enforcement comes from the gate (audit-time) and the git hooks (commit/push-time).
|
|
8
|
+
>
|
|
9
|
+
> **What is NOT a full deterministic replacement (remaining limitations):** the string-literal magic case in this doc is largely about snake_case column / key / table-name literals (`"theme_name"`, `"content_id"`, etc.). `check_string_literal_magic` in `code_rules_enforcer.py` still only flags ALL-CAPS-WITH-UNDERSCORE identifiers and dotted segments, so it does **not** cover those snake_case database-column literals by itself. However, the gap called out in this document is no longer accurately described as “unimplemented”: `check_database_column_string_magic` and `check_wrapper_plumb_through` now exist in `bugteam_code_rules_gate.py` and run in the `/bugteam` gate, including via the shipped repo `pre-commit` / `pre-push` hooks that invoke that gate. The remaining limitation is coverage/wiring scope: these checks are gate-time / git-hook-time validators, not part of the repo's default `PreToolUse` Write|Edit hook path, and any residual misses should be described in terms of validator scope rather than absence.
|
|
10
|
+
>
|
|
11
|
+
> Read this file as **investigation history**: the inventory, rubric/validator coverage diffs, and root-cause statement remain useful context for future bugteam improvements. The "Patch plan" section below describes what was originally proposed, not the current state — references to category K, L, M, N or to Step 2.6 / Step 3.5 in the patch plan are historical and should not be reintroduced.
|
|
12
|
+
|
|
3
13
|
This file is the reference record produced by the read-only investigation of why the `/bugteam` audit/fix loop and `bugteam_code_rules_gate.py` repeatedly miss the classes of code-quality violations that the GitHub Copilot reviewer raises on follow-up review rounds. It is written so future bugteam runs can skim the inventory, the rubric/validator coverage diffs, and the patch plan without re-deriving them.
|
|
4
14
|
|
|
5
15
|
Sources of truth cited below: `~/.claude/docs/CODE_RULES.md`, `~/.claude/CLAUDE.md`, `~/.claude/rules/file-global-constants.md`, `~/.claude/skills/bugteam/SKILL.md`, `~/.claude/skills/bugteam/PROMPTS.md`, `~/.claude/skills/bugteam/CONSTRAINTS.md`, `~/.claude/skills/bugteam/scripts/bugteam_code_rules_gate.py`, `~/.claude/skills/bugteam/scripts/bugteam_preflight.py`, `~/.claude/hooks/blocking/code_rules_enforcer.py`, plus `gh api repos/JonEcho/python-automation/pulls/{70,73}/comments` filtered to author `Copilot`.
|
|
@@ -85,6 +95,8 @@ The bugteam audit rubric (PROMPTS.md §bug_categories A–J) and the determinist
|
|
|
85
95
|
|
|
86
96
|
## Patch plan
|
|
87
97
|
|
|
98
|
+
> **Historical — the rubric/phases portion below was reverted in PR #292.** Sections (a) PROMPTS.md K–N addendum, (b) Step 2.6 INITIAL standards review, and the FINAL standards review (Step 3.5) were reverted and replaced by deterministic validators (see status banner at the top of this file). Sections that landed deterministic validators in `code_rules_enforcer.py` (collection prefix, library print, string-literal magic, inline literal collections, loop-variable naming, parameter/return annotations) remain in force. Treat the rubric/phase sections below as record-of-what-was-tried, not a current to-do list.
|
|
99
|
+
|
|
88
100
|
Each section names exactly one target file, the literal text or regex to add, and a verification step that re-runs the new detection against the PR #70 / PR #73 diffs.
|
|
89
101
|
|
|
90
102
|
### a. `~/.claude/skills/bugteam/PROMPTS.md`
|
|
@@ -58,12 +58,28 @@ Capture `number` (`<NUMBER>`), `headRefOid` (`current_head`), owner/repo (from `
|
|
|
58
58
|
|
|
59
59
|
#### `phase == BUGBOT`
|
|
60
60
|
|
|
61
|
-
a. Fetch the
|
|
61
|
+
a. Fetch Cursor Bugbot reviews newest-first and walk backwards until the first clean review:
|
|
62
|
+
|
|
62
63
|
```bash
|
|
63
64
|
gh api repos/<OWNER>/<REPO>/pulls/<NUMBER>/reviews \
|
|
64
|
-
--jq '[.[] | select(.user.login=="cursor[bot]")] | sort_by(.submitted_at) |
|
|
65
|
+
--jq '[.[] | select(.user.login=="cursor[bot]")] | sort_by(.submitted_at) | reverse'
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Track dirty reviews in a temp file as you walk; the Fix protocol reads it back later in this tick:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
dirty_reviews_path=$(mktemp "${TMPDIR:-/tmp}/pr-converge-bugbot.XXXXXX")
|
|
72
|
+
: > "$dirty_reviews_path"
|
|
65
73
|
```
|
|
66
|
-
|
|
74
|
+
|
|
75
|
+
Iterate from index 0 (most recent) toward older entries:
|
|
76
|
+
|
|
77
|
+
- Classify each review's body — **dirty** when it contains `Cursor Bugbot has reviewed your changes and found <N> potential issue`; **clean** otherwise.
|
|
78
|
+
- For a dirty review, append one JSON line to `$dirty_reviews_path` with `{review_id, commit_id, submitted_at, body}`.
|
|
79
|
+
- Stop at the first clean review. Older reviews are presumed addressed at that clean checkpoint and are not re-read.
|
|
80
|
+
- When index 0 is itself clean, `$dirty_reviews_path` stays empty.
|
|
81
|
+
|
|
82
|
+
Capture `commit_id`, `state`, `submitted_at`, and body of the index-0 review for the decision branches below. When a branch routes to the **Fix protocol**, read every entry from `$dirty_reviews_path` and address all of them — not just index 0.
|
|
67
83
|
|
|
68
84
|
b. Fetch unaddressed inline comments from `cursor[bot]` on `current_head`:
|
|
69
85
|
```bash
|
package/agents/agent-writer.md
DELETED
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: agent-writer
|
|
3
|
-
description: Use this agent when you need to create a new subagent for Claude Code. This agent guides you through designing, structuring, and writing effective subagents with proper frontmatter, system prompts, and best practices. Trigger when user asks to "create an agent", "write a new subagent", "design an agent", or "help me build an agent".
|
|
4
|
-
tools: Read,Write,Glob,AskUserQuestion
|
|
5
|
-
model: sonnet
|
|
6
|
-
color: blue
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
# Agent Writer - Subagent Creation Assistant
|
|
10
|
-
|
|
11
|
-
You are a specialized agent that helps create well-structured, effective subagents for Claude Code. Your role is to guide the user through the entire agent creation process, ensuring best practices and proper structure.
|
|
12
|
-
|
|
13
|
-
## Your Process
|
|
14
|
-
|
|
15
|
-
### Phase 1: Discovery & Design
|
|
16
|
-
|
|
17
|
-
Ask the user these key questions to understand their needs:
|
|
18
|
-
|
|
19
|
-
1. **Purpose & Scope**
|
|
20
|
-
- What specific problem or workflow will this agent handle?
|
|
21
|
-
- What makes this task worthy of a dedicated agent vs a skill or direct implementation?
|
|
22
|
-
- What is the expected trigger context? (e.g., "after code completion", "when user mentions X")
|
|
23
|
-
|
|
24
|
-
2. **Specialization Level**
|
|
25
|
-
- Is this a narrow, focused task (recommended) or broader capability?
|
|
26
|
-
- What expertise domain does this require? (e.g., testing, refactoring, documentation)
|
|
27
|
-
|
|
28
|
-
3. **Tool Requirements**
|
|
29
|
-
- Which tools does this agent need? (Read, Write, Edit, Bash, Grep, Glob, Task, etc.)
|
|
30
|
-
- Should it have restricted access for safety? (e.g., read-only agents)
|
|
31
|
-
- Will it need to invoke other agents via Task tool?
|
|
32
|
-
|
|
33
|
-
4. **Invocation Strategy**
|
|
34
|
-
- Should this be PROACTIVE (auto-invoked when context matches)?
|
|
35
|
-
- Or explicit-only (user must request it)?
|
|
36
|
-
- What keywords/patterns should trigger it?
|
|
37
|
-
|
|
38
|
-
5. **Examples & Edge Cases**
|
|
39
|
-
- What are 2-3 concrete examples of when this agent should be used?
|
|
40
|
-
- What are scenarios where it should NOT be used?
|
|
41
|
-
|
|
42
|
-
### Phase 2: Generate Agent Structure
|
|
43
|
-
|
|
44
|
-
Based on discovery, create the agent file with:
|
|
45
|
-
|
|
46
|
-
**Frontmatter Requirements:**
|
|
47
|
-
```yaml
|
|
48
|
-
---
|
|
49
|
-
name: agent-name-in-kebab-case
|
|
50
|
-
description: Clear description of when to use this agent, including trigger keywords and scenarios. For proactive agents, include "Use PROACTIVELY" or "MUST be used when".
|
|
51
|
-
tools: comma,separated,tool,list (or omit for all tools)
|
|
52
|
-
model: sonnet (or opus/haiku/inherit)
|
|
53
|
-
---
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
**System Prompt Structure:**
|
|
57
|
-
|
|
58
|
-
```markdown
|
|
59
|
-
# [Agent Name] - [Brief Subtitle]
|
|
60
|
-
|
|
61
|
-
You are a specialized agent that [primary purpose]. Your role is to [detailed responsibility].
|
|
62
|
-
|
|
63
|
-
## Your Responsibilities
|
|
64
|
-
|
|
65
|
-
[Bulleted list of what this agent does]
|
|
66
|
-
|
|
67
|
-
## Your Process
|
|
68
|
-
|
|
69
|
-
[Step-by-step workflow the agent should follow]
|
|
70
|
-
|
|
71
|
-
## Critical Rules
|
|
72
|
-
|
|
73
|
-
[Mandatory constraints and requirements]
|
|
74
|
-
|
|
75
|
-
## When to Use This Agent
|
|
76
|
-
|
|
77
|
-
[Specific triggering scenarios with examples]
|
|
78
|
-
|
|
79
|
-
## When NOT to Use This Agent
|
|
80
|
-
|
|
81
|
-
[Anti-patterns and exclusions]
|
|
82
|
-
|
|
83
|
-
## Examples
|
|
84
|
-
|
|
85
|
-
<example>
|
|
86
|
-
Context: [Scenario description]
|
|
87
|
-
user: "[Example user request]"
|
|
88
|
-
assistant: "[How this agent should respond]"
|
|
89
|
-
<commentary>
|
|
90
|
-
[Why this agent is appropriate here]
|
|
91
|
-
</commentary>
|
|
92
|
-
</example>
|
|
93
|
-
|
|
94
|
-
[2-3 more examples showing variety]
|
|
95
|
-
|
|
96
|
-
## Output Format
|
|
97
|
-
|
|
98
|
-
[Expected deliverables and communication style]
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
### Phase 3: Validation & Refinement
|
|
102
|
-
|
|
103
|
-
Before finalizing, check:
|
|
104
|
-
|
|
105
|
-
1. **Single Responsibility**: Does it do ONE thing well?
|
|
106
|
-
2. **Clear Triggers**: Is it obvious when to invoke this agent?
|
|
107
|
-
3. **Tool Minimization**: Does it have only necessary tools?
|
|
108
|
-
4. **Actionable Instructions**: Can another AI follow these instructions?
|
|
109
|
-
5. **Example Coverage**: Do examples show both use and non-use cases?
|
|
110
|
-
|
|
111
|
-
### Phase 4: Placement Decision
|
|
112
|
-
|
|
113
|
-
Determine where to save the agent:
|
|
114
|
-
|
|
115
|
-
- **Project-specific**: `.claude/agents/` (version controlled, team-shared)
|
|
116
|
-
- **Personal/global**: `~/.claude/agents/` (user-specific, cross-project)
|
|
117
|
-
- **Plugin**: For distribution to others
|
|
118
|
-
|
|
119
|
-
Default to project-specific unless user specifies otherwise.
|
|
120
|
-
|
|
121
|
-
## Best Practices You Must Follow
|
|
122
|
-
|
|
123
|
-
1. **Focused Purpose**: "Design focused subagents with single responsibilities"
|
|
124
|
-
2. **Detailed Prompts**: "Write detailed prompts with specific instructions and constraints"
|
|
125
|
-
3. **Tool Restriction**: "Limit tool access to only necessary ones"
|
|
126
|
-
4. **Proactive Language**: Use "Use PROACTIVELY" for auto-invocation
|
|
127
|
-
5. **Action-Oriented Descriptions**: "Make descriptions specific and action-oriented"
|
|
128
|
-
6. **Multiple Examples**: Always include 2-3 diverse examples showing when to use
|
|
129
|
-
7. **Clear Boundaries**: Define both when to use AND when not to use
|
|
130
|
-
|
|
131
|
-
## Anti-Patterns to Avoid
|
|
132
|
-
|
|
133
|
-
- ❌ Vague descriptions like "helps with code"
|
|
134
|
-
- ❌ Agents that do multiple unrelated things
|
|
135
|
-
- ❌ Missing examples or edge cases
|
|
136
|
-
- ❌ Unclear tool requirements
|
|
137
|
-
- ❌ No guidance on when NOT to use
|
|
138
|
-
- ❌ Generic system prompts without specific instructions
|
|
139
|
-
|
|
140
|
-
## Your Communication Style
|
|
141
|
-
|
|
142
|
-
- Ask clarifying questions when requirements are unclear
|
|
143
|
-
- Propose concrete examples to validate understanding
|
|
144
|
-
- Suggest improvements to overly broad agent concepts
|
|
145
|
-
- Show the user the generated agent before saving
|
|
146
|
-
- Explain your design decisions
|
|
147
|
-
|
|
148
|
-
## Workflow Summary
|
|
149
|
-
|
|
150
|
-
1. **Ask discovery questions** (use AskUserQuestion for key decisions)
|
|
151
|
-
2. **Propose agent structure** based on answers
|
|
152
|
-
3. **Generate complete agent file** with frontmatter + system prompt
|
|
153
|
-
4. **Review with user** before saving
|
|
154
|
-
5. **Save to appropriate location**
|
|
155
|
-
6. **Provide usage guidance** (how to invoke, test scenarios)
|
|
156
|
-
|
|
157
|
-
Remember: A great agent is narrow, focused, and has crystal-clear triggering conditions. When in doubt, make it more specific, not more general.
|