claude-dev-env 1.50.2 → 1.50.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_shared/pr-loop/audit-contract.md +3 -3
- package/audit-rubrics/category_rubrics/category-e-dead-code.md +3 -2
- package/audit-rubrics/prompts/category-a-api-contracts.md +1 -1
- package/audit-rubrics/prompts/category-b-selector-engine-compat.md +2 -2
- package/audit-rubrics/prompts/category-c-resource-cleanup.md +2 -2
- package/audit-rubrics/prompts/category-d-scoping-and-ordering.md +2 -2
- package/audit-rubrics/prompts/category-e-dead-code.md +5 -4
- package/audit-rubrics/prompts/category-f-silent-failures.md +2 -2
- package/audit-rubrics/prompts/category-g-bounds-and-overflow.md +2 -2
- package/audit-rubrics/prompts/category-h-security-boundaries.md +2 -2
- package/audit-rubrics/prompts/category-i-concurrency.md +2 -2
- package/audit-rubrics/prompts/category-j-code-rules-compliance.md +2 -2
- package/audit-rubrics/prompts/category-k-codebase-conflicts.md +2 -2
- package/docs/CODE_RULES.md +1 -1
- package/package.json +1 -1
- package/skills/_shared/pr-loop/scripts/build_audit_prompt.py +13 -7
- package/skills/_shared/pr-loop/scripts/skills_pr_loop_constants/path_resolver_constants.py +21 -11
- package/skills/_shared/pr-loop/scripts/test_build_audit_prompt.py +92 -0
- package/skills/bugteam/CONSTRAINTS.md +1 -1
- package/skills/bugteam/PROMPTS.md +20 -48
- package/skills/bugteam/SKILL.md +5 -5
- package/skills/bugteam/reference/audit-and-teammates.md +1 -1
- package/skills/bugteam/reference/audit-contract.md +4 -4
- package/skills/bugteam/reference/design-rationale.md +1 -1
- package/skills/findbugs/SKILL.md +21 -12
- package/skills/fixbugs/SKILL.md +1 -1
- package/skills/pr-converge/SKILL.md +28 -1
- package/skills/pr-converge/reference/per-tick.md +24 -8
- package/skills/qbug/SKILL.md +5 -5
- package/skills/qbug/test_qbug_skill_audit_schema.py +13 -23
- package/skills/refine/SKILL.md +1 -1
|
@@ -21,7 +21,7 @@ Each finding an audit produces MUST be one of exactly two shapes.
|
|
|
21
21
|
"id": "loop<N>-<K>",
|
|
22
22
|
"file": "path/relative/to/repo/root.py",
|
|
23
23
|
"line": 123,
|
|
24
|
-
"category": "A | B | C | D | E | F | G | H | I | J",
|
|
24
|
+
"category": "A | B | C | D | E | F | G | H | I | J | K | L | M | N",
|
|
25
25
|
"severity": "P0 | P1 | P2",
|
|
26
26
|
"excerpt": "verbatim code snippet from the offending line(s)",
|
|
27
27
|
"failure_mode": "one sentence describing what goes wrong and when",
|
|
@@ -37,7 +37,7 @@ Used when an audit investigates a category and does NOT find a bug. Bare "verifi
|
|
|
37
37
|
|
|
38
38
|
```json
|
|
39
39
|
{
|
|
40
|
-
"category": "A | B | C | D | E | F | G | H | I | J",
|
|
40
|
+
"category": "A | B | C | D | E | F | G | H | I | J | K | L | M | N",
|
|
41
41
|
"files_opened": ["file1.py", "file2.py"],
|
|
42
42
|
"lines_quoted": [
|
|
43
43
|
{"file": "file1.py", "line": 88, "text": "verbatim line content"}
|
|
@@ -120,7 +120,7 @@ Sequence:
|
|
|
120
120
|
3. Run `py_compile` (or language-equivalent) on each modified file.
|
|
121
121
|
4. Compute `fix_diff` against pre-fix contents for the modified set.
|
|
122
122
|
5. Run `bugteam_code_rules_gate.py` with explicit paths for every modified file.
|
|
123
|
-
6. Spawn a scoped audit of `fix_diff` with full A–
|
|
123
|
+
6. Spawn a scoped audit of `fix_diff` with full A–N rigor, Shape A/B contract, adversarial pass, AND Haiku secondary in parallel (paranoid mode on post-fix).
|
|
124
124
|
7. Read the previous loop's outcome XML (`<worktree_path>/.bugteam-pr<N>-loop<L-1>.outcomes.xml`) and obtain its total finding count. If this is the first loop (L <= 1) or the file does not exist, skip this comparison. Compute the post-fix total: previous total minus bugs fixed in this round plus new violations found in the post-fix audit (step 6). If the post-fix total exceeds the previous total, flag all new findings as same-loop fix-targets and revise. An increase in total findings across loop transitions is a regression.
|
|
125
125
|
8. Any new findings become same-loop fix-targets. Internal iteration count increments by one.
|
|
126
126
|
9. After 3 internal iterations with fresh findings each time, exit `stuck: post-fix audit not converging`.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Category E — Dead code and unused imports
|
|
2
2
|
|
|
3
|
-
**What this category audits:** imports the diff adds but leaves unreferenced, functions defined but never called,
|
|
3
|
+
**What this category audits:** imports the diff adds but leaves unreferenced (dead imports), functions defined but never called, code made unreachable by a prior return or raise (dead returns), conditions that are always true or always false (dead branches), parameters that are accepted but never used (dead parameters), local variables assigned but never read (dead locals), removed-but-not-deleted symbols.
|
|
4
4
|
|
|
5
5
|
**Examples of Category E findings:**
|
|
6
6
|
- A new `import` line with zero corresponding references in the file.
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
- Code after an unconditional `return` or `raise`.
|
|
9
9
|
- A condition like `if False:` or `while True: ... return` where the loop body always returns immediately.
|
|
10
10
|
- An accepted parameter that the function body never uses.
|
|
11
|
+
- A local variable assigned and never read afterward in the same function.
|
|
11
12
|
|
|
12
13
|
**Companion reference:** see `../source-material-section-types.md`.
|
|
13
14
|
|
|
@@ -21,7 +22,7 @@
|
|
|
21
22
|
| E2 | Functions / methods defined but never called | Internal helpers defined in this PR with no call sites in this PR or elsewhere. |
|
|
22
23
|
| E3 | Code after unconditional return / raise / exit | Statements following a top-level `return`, `raise`, `sys.exit`, `os._exit` that cannot execute. |
|
|
23
24
|
| E4 | Always-true / always-false conditions | `if True:` / `if False:` / conditions provably constant given context. |
|
|
24
|
-
| E5 | Unused parameters | Parameters declared but never read inside the function body. |
|
|
25
|
+
| E5 | Unused parameters and locals | Parameters declared but never read inside the function body; local variables assigned but never read afterward in the same scope. |
|
|
25
26
|
| E6 | Removed-but-not-deleted symbol references | Symbols renamed/removed elsewhere with stale import or call sites left behind. |
|
|
26
27
|
| E7 | Test fixtures / helpers defined but never used | Pytest fixtures, test data builders, mock factories with no callers. |
|
|
27
28
|
| E8 | Stub / placeholder code without TODO | `pass`, `...`, `raise NotImplementedError` left without explanation or tracking. |
|
|
@@ -95,7 +95,7 @@ Lead: `Total: N (P0=N, P1=N, P2=N)`. For each sub-bucket A1–A9, produce Shape
|
|
|
95
95
|
|
|
96
96
|
# Worked example: jl-cmd/claude-code-config PR #394 (May 2026 audit experiment)
|
|
97
97
|
|
|
98
|
-
Audit jl-cmd/claude-code-config PR #394 for **Category A only** (API contract verification). Skip B–
|
|
98
|
+
Audit jl-cmd/claude-code-config PR #394 for **Category A only** (API contract verification). Skip B–N. Sub-bucket forced-exhaustion mode: Category A is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
99
99
|
|
|
100
100
|
PR: feat(scripts): add sweep-empty-dirs utility and scheduled-task installer
|
|
101
101
|
Head SHA: 62c9c169ee7a44824e5da25c4cf8b74fdca08a53
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category B only** (selector / query / engine compatibility). Skip A, C–
|
|
1
|
+
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category B only** (selector / query / engine compatibility). Skip A, C–N. Sub-bucket forced-exhaustion mode: Category B is decomposed into 7 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
2
2
|
|
|
3
3
|
[ARTIFACT METADATA: repo, ref/SHA, PR or commit range, file count, language matrix, declared engine/runtime/browser/DB targets — fill before running.]
|
|
4
4
|
ID prefix: `find`.
|
|
@@ -80,7 +80,7 @@ Lead: `Total: N (P0=N, P1=N, P2=N)`. For each sub-bucket B1–B7, produce Shape
|
|
|
80
80
|
|
|
81
81
|
# Worked example: jl-cmd/claude-code-config PR #394
|
|
82
82
|
|
|
83
|
-
Audit jl-cmd/claude-code-config PR #394 for **Category B only** (selector / query / engine compatibility). Skip A, C–
|
|
83
|
+
Audit jl-cmd/claude-code-config PR #394 for **Category B only** (selector / query / engine compatibility). Skip A, C–N. Sub-bucket forced-exhaustion mode: Category B is decomposed into 7 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
84
84
|
|
|
85
85
|
PR: feat(scripts): add sweep-empty-dirs utility and scheduled-task installer
|
|
86
86
|
Head SHA: 62c9c169ee7a44824e5da25c4cf8b74fdca08a53
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category C only** (resource cleanup and lifecycle). Skip A, B, D–
|
|
1
|
+
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category C only** (resource cleanup and lifecycle). Skip A, B, D–N. Sub-bucket forced-exhaustion mode: Category C is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
2
2
|
|
|
3
3
|
[ARTIFACT METADATA]
|
|
4
4
|
- Repository / artifact: [REPO_OR_ARTIFACT_NAME]
|
|
@@ -98,7 +98,7 @@ Read-only. No edits, no commits.
|
|
|
98
98
|
|
|
99
99
|
# Worked example: jl-cmd/claude-code-config PR #394
|
|
100
100
|
|
|
101
|
-
Audit jl-cmd/claude-code-config PR #394 for **Category C only** (resource cleanup and lifecycle). Skip A, B, D–
|
|
101
|
+
Audit jl-cmd/claude-code-config PR #394 for **Category C only** (resource cleanup and lifecycle). Skip A, B, D–N. Sub-bucket forced-exhaustion mode: Category C is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
102
102
|
|
|
103
103
|
PR: feat(scripts): add sweep-empty-dirs utility and scheduled-task installer
|
|
104
104
|
Head SHA: 62c9c169ee7a44824e5da25c4cf8b74fdca08a53
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category D only** (variable scoping, ordering, and unbound references). Skip A–C, E–
|
|
1
|
+
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category D only** (variable scoping, ordering, and unbound references). Skip A–C, E–N. Sub-bucket forced-exhaustion mode: Category D is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
2
2
|
|
|
3
3
|
[ARTIFACT METADATA]
|
|
4
4
|
- Repo / artifact: [REPO_OR_ARTIFACT]
|
|
@@ -89,7 +89,7 @@ Lead: `Total: N (P0=N, P1=N, P2=N)`. For each sub-bucket D1–D8, produce Shape
|
|
|
89
89
|
|
|
90
90
|
# Worked example: jl-cmd/claude-code-config PR #394
|
|
91
91
|
|
|
92
|
-
Audit jl-cmd/claude-code-config PR #394 for **Category D only** (variable scoping, ordering, and unbound references). Skip A–C, E–
|
|
92
|
+
Audit jl-cmd/claude-code-config PR #394 for **Category D only** (variable scoping, ordering, and unbound references). Skip A–C, E–N. Sub-bucket forced-exhaustion mode: Category D is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
93
93
|
|
|
94
94
|
PR: feat(scripts): add sweep-empty-dirs utility and scheduled-task installer
|
|
95
95
|
Head SHA: 62c9c169ee7a44824e5da25c4cf8b74fdca08a53
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category E only** (dead code and unused imports). Skip A–D, F–
|
|
1
|
+
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category E only** (dead code and unused imports). Skip A–D, F–N. Sub-bucket forced-exhaustion mode: Category E is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
2
2
|
|
|
3
3
|
[ARTIFACT METADATA]
|
|
4
4
|
- Repo / artifact: [REPO_OR_ARTIFACT_NAME]
|
|
@@ -43,8 +43,9 @@ Inline the artifact under this section using the section types defined in the ch
|
|
|
43
43
|
- Runtime-bound conditions (parameter values, `os.path.isdir`, `Test-Path`, environment lookups) are not constant; state the runtime source.
|
|
44
44
|
- Adversarial probes for proof-of-absence: (a) any `if 1:` / `if 0:` / `if True:` / `if False:` literals in the diff? (b) any condition of the form `if x:` where `x` was just assigned a literal in the line above? (c) any `assert True` / `assert False` in test bodies? (d) any short-circuit like `x or DEFAULT` where `x` was just constructed and is statically truthy?
|
|
45
45
|
|
|
46
|
-
**E5. Unused parameters**
|
|
46
|
+
**E5. Unused parameters and locals**
|
|
47
47
|
- For every function or method introduced or modified by the artifact, verify each declared parameter is read at least once in the body (including in default-argument expressions for inner functions, in closures, or in type guards).
|
|
48
|
+
- For every function or method introduced or modified by the artifact, verify each local variable assigned in the body is read at least once afterward in the same scope; an assignment whose value is never read is a dead local.
|
|
48
49
|
- Tuple-unpack discards (`for path, _, _ in os.walk(...)`) are out of scope — E5 specifically scopes "function parameters never read"; state this exclusion explicitly.
|
|
49
50
|
- `*args` / `**kwargs` / TypeScript rest spreads: confirm at least one consumer (forwarded to another call, iterated, indexed) or mark the parameter unused.
|
|
50
51
|
- Cross-language parameter declarations (PowerShell `param(...)`, shell positional `$1..$N`, Bash `getopts`): confirm each named parameter has at least one body reference.
|
|
@@ -86,7 +87,7 @@ Note: most Category E findings are P2 (style / cleanup) unless the dead code mas
|
|
|
86
87
|
|
|
87
88
|
# Worked example: jl-cmd/claude-code-config PR #394
|
|
88
89
|
|
|
89
|
-
Audit jl-cmd/claude-code-config PR #394 for **Category E only** (dead code and unused imports). Skip A–D, F–
|
|
90
|
+
Audit jl-cmd/claude-code-config PR #394 for **Category E only** (dead code and unused imports). Skip A–D, F–N. Sub-bucket forced-exhaustion mode: Category E is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
90
91
|
|
|
91
92
|
PR: feat(scripts): add sweep-empty-dirs utility and scheduled-task installer
|
|
92
93
|
Head SHA: 62c9c169ee7a44824e5da25c4cf8b74fdca08a53
|
|
@@ -128,7 +129,7 @@ ID prefix: `find`.
|
|
|
128
129
|
- `test_sweep_empty_dirs.py` line 13: `if str(_SCRIPTS_DIR) not in sys.path:` — runtime membership test; not constant.
|
|
129
130
|
- Adversarial probes for proof-of-absence: (a) does the diff introduce any `if 1:` / `if 0:` / `if True:` / `if False:` literals? grep the diff text. (b) any condition of the form `if x:` where `x` was just assigned a literal in the line above? (c) any `assert True` or `assert False` in test bodies? (none — verify).
|
|
130
131
|
|
|
131
|
-
**E5. Unused parameters**
|
|
132
|
+
**E5. Unused parameters and locals**
|
|
132
133
|
- `_log_walk_error(os_error: OSError) -> None` (line 14) — parameter `os_error` is read twice in the body (`os_error.filename`, `os_error.strerror`). Used.
|
|
133
134
|
- `sweep(root: str, min_age_seconds: int) -> list[str]` (line 18) — `root` is passed to `os.walk` (line 21); `min_age_seconds` is read at line 26. Both used.
|
|
134
135
|
- `_build_parser() -> argparse.ArgumentParser` (line 39) — zero parameters; nothing to verify.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category F only** (silent failures). Skip A–E, G–
|
|
1
|
+
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category F only** (silent failures). Skip A–E, G–N. Sub-bucket forced-exhaustion mode: Category F is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
2
2
|
|
|
3
3
|
[ARTIFACT METADATA]
|
|
4
4
|
- Title / short description: [TITLE]
|
|
@@ -98,7 +98,7 @@ Lead: `Total: N (P0=N, P1=N, P2=N)`. For each sub-bucket F1-F8, produce Shape A
|
|
|
98
98
|
|
|
99
99
|
# Worked example: jl-cmd/claude-code-config PR #394
|
|
100
100
|
|
|
101
|
-
Audit jl-cmd/claude-code-config PR #394 for **Category F only** (silent failures). Skip A–E, G–
|
|
101
|
+
Audit jl-cmd/claude-code-config PR #394 for **Category F only** (silent failures). Skip A–E, G–N. Sub-bucket forced-exhaustion mode: Category F is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
102
102
|
|
|
103
103
|
PR: feat(scripts): add sweep-empty-dirs utility and scheduled-task installer
|
|
104
104
|
Head SHA: 62c9c169ee7a44824e5da25c4cf8b74fdca08a53
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category G only** (off-by-one, bounds, integer overflow). Skip A–F, H–
|
|
1
|
+
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category G only** (off-by-one, bounds, integer overflow). Skip A–F, H–N. Sub-bucket forced-exhaustion mode: Category G is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
2
2
|
|
|
3
3
|
[ARTIFACT METADATA]
|
|
4
4
|
- Repository / artifact: [REPO_OR_ARTIFACT]
|
|
@@ -61,7 +61,7 @@ Lead: `Total: N (P0=N, P1=N, P2=N)`. For each sub-bucket G1-G8, produce Shape A
|
|
|
61
61
|
|
|
62
62
|
# Worked example: jl-cmd/claude-code-config PR #394
|
|
63
63
|
|
|
64
|
-
Audit jl-cmd/claude-code-config PR #394 for **Category G only** (off-by-one, bounds, integer overflow). Skip A–F, H–
|
|
64
|
+
Audit jl-cmd/claude-code-config PR #394 for **Category G only** (off-by-one, bounds, integer overflow). Skip A–F, H–N. Sub-bucket forced-exhaustion mode: Category G is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
65
65
|
|
|
66
66
|
PR: feat(scripts): add sweep-empty-dirs utility and scheduled-task installer
|
|
67
67
|
Head SHA: 62c9c169ee7a44824e5da25c4cf8b74fdca08a53
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category H only** (security boundaries). Skip A–G, I–
|
|
1
|
+
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category H only** (security boundaries). Skip A–G, I–N. Sub-bucket forced-exhaustion mode: Category H is decomposed into 10 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
2
2
|
|
|
3
3
|
## ARTIFACT METADATA — trust model
|
|
4
4
|
|
|
@@ -83,7 +83,7 @@ Note: Category H findings tend toward P0/P1 since they're security-relevant —
|
|
|
83
83
|
|
|
84
84
|
# Worked example: jl-cmd/claude-code-config PR #394
|
|
85
85
|
|
|
86
|
-
Audit jl-cmd/claude-code-config PR #394 for **Category H only** (security boundaries). Skip A–G, I–
|
|
86
|
+
Audit jl-cmd/claude-code-config PR #394 for **Category H only** (security boundaries). Skip A–G, I–N. Sub-bucket forced-exhaustion mode: Category H is decomposed into 10 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
87
87
|
|
|
88
88
|
PR: feat(scripts): add sweep-empty-dirs utility and scheduled-task installer
|
|
89
89
|
Head SHA: 62c9c169ee7a44824e5da25c4cf8b74fdca08a53
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category I only** (concurrency hazards). Skip A–H, J–
|
|
1
|
+
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category I only** (concurrency hazards). Skip A–H, J–N. Sub-bucket forced-exhaustion mode: Category I is decomposed into [N] sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
2
2
|
|
|
3
3
|
[ARTIFACT METADATA — including: is this code single-threaded, threaded, asyncio, multiprocessing, or mixed? Name the runtime (CPython 3.x, Node, Go, JVM, .NET, PowerShell runspace, browser JS), the concurrency primitives actually present (`threading`, `asyncio`, `multiprocessing`, `concurrent.futures`, `Thread`, `goroutine`, `Promise`, `Task`, `Start-ThreadJob`, `ForEach-Object -Parallel`, etc.), and the inter-process surface (shared filesystem, shared DB, shared cache, shared queue, signals). State explicitly which primitives are absent so each sub-bucket has a Shape B basis.]
|
|
4
4
|
|
|
@@ -88,7 +88,7 @@ Lead: `Total: N (P0=N, P1=N, P2=N)`. For each sub-bucket I1–I8, produce Shape
|
|
|
88
88
|
|
|
89
89
|
# Worked example: jl-cmd/claude-code-config PR #394
|
|
90
90
|
|
|
91
|
-
Audit jl-cmd/claude-code-config PR #394 for **Category I only** (concurrency hazards). Skip A–H, J–
|
|
91
|
+
Audit jl-cmd/claude-code-config PR #394 for **Category I only** (concurrency hazards). Skip A–H, J–N. Sub-bucket forced-exhaustion mode: Category I is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
92
92
|
|
|
93
93
|
PR: feat(scripts): add sweep-empty-dirs utility and scheduled-task installer
|
|
94
94
|
Head SHA: 62c9c169ee7a44824e5da25c4cf8b74fdca08a53
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category J only** (CODE_RULES.md compliance). Skip A–I, K. Sub-bucket forced-exhaustion mode: Category J is decomposed into 12 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
1
|
+
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category J only** (CODE_RULES.md compliance). Skip A–I, K–N. Sub-bucket forced-exhaustion mode: Category J is decomposed into 12 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
2
2
|
|
|
3
3
|
[ARTIFACT METADATA]
|
|
4
4
|
- Artifact: [PR title / commit subject / file set / patch series]
|
|
@@ -100,7 +100,7 @@ Note: most Category J findings are P2 (style / cleanup) since they don't affect
|
|
|
100
100
|
|
|
101
101
|
# Worked example: jl-cmd/claude-code-config PR #394
|
|
102
102
|
|
|
103
|
-
Audit jl-cmd/claude-code-config PR #394 for **Category J only** (CODE_RULES.md compliance). Skip A–I, K. Sub-bucket forced-exhaustion mode: Category J is decomposed into 12 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
103
|
+
Audit jl-cmd/claude-code-config PR #394 for **Category J only** (CODE_RULES.md compliance). Skip A–I, K–N. Sub-bucket forced-exhaustion mode: Category J is decomposed into 12 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
104
104
|
|
|
105
105
|
PR: feat(scripts): add sweep-empty-dirs utility and scheduled-task installer
|
|
106
106
|
Head SHA: 62c9c169ee7a44824e5da25c4cf8b74fdca08a53
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category K only** (codebase conflicts — incomplete propagation). Skip A–J. Sub-bucket forced-exhaustion mode: Category K is decomposed into 9 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
1
|
+
Audit [REPO/ARTIFACT] [TARGET_ID] for **Category K only** (codebase conflicts — incomplete propagation). Skip A–J, L–N. Sub-bucket forced-exhaustion mode: Category K is decomposed into 9 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
2
2
|
|
|
3
3
|
[ARTIFACT METADATA — including the BEFORE state of changed surfaces, so the agent can compare before vs after]
|
|
4
4
|
|
|
@@ -79,7 +79,7 @@ Lead: `Total: N (P0=N, P1=N, P2=N)`. For each sub-bucket K1-K9, produce Shape A
|
|
|
79
79
|
|
|
80
80
|
Note: PR #397 is the K canonical case, NOT #394.
|
|
81
81
|
|
|
82
|
-
Audit jl-cmd/claude-code-config PR #397 for **Category K only** (codebase conflicts — incomplete propagation). Skip A–J. Sub-bucket forced-exhaustion mode: Category K is decomposed into 9 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
82
|
+
Audit jl-cmd/claude-code-config PR #397 for **Category K only** (codebase conflicts — incomplete propagation). Skip A–J, L–N. Sub-bucket forced-exhaustion mode: Category K is decomposed into 9 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
|
|
83
83
|
|
|
84
84
|
PR: fix(hooks): improve hedging-language guardrail to surface user questions
|
|
85
85
|
Base SHA: 76f9c1a0048729b87c44626a3380dc840065c2fa (origin/main at PR open time)
|
package/docs/CODE_RULES.md
CHANGED
|
@@ -354,7 +354,7 @@ These principles cannot be reduced to a regex or AST visitor. They live in user-
|
|
|
354
354
|
|
|
355
355
|
### Audit-rubric reference
|
|
356
356
|
|
|
357
|
-
For multi-file architectural reviews see [`packages/claude-dev-env/audit-rubrics/`](../audit-rubrics/). Categories A–
|
|
357
|
+
For multi-file architectural reviews see [`packages/claude-dev-env/audit-rubrics/`](../audit-rubrics/). Categories A–N are maintained as agent rubrics. Category J (CODE_RULES.md compliance) mirrors the ⚡ hook-enforced rules as an audit-side rubric; the other categories stay agent rubrics because they rest on multi-file reasoning beyond a single-file hook's reach.
|
|
358
358
|
|
|
359
359
|
---
|
|
360
360
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"""Emit the complete AUDIT spawn prompt XML to stdout.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
<
|
|
3
|
+
Builds <context> and <scope> from CLI args; <bug_categories>,
|
|
4
|
+
<rubric_reference>, and <constraints> come from the shared constants in
|
|
5
|
+
skills_pr_loop_constants; <comment_posting> and <output_format> are built
|
|
6
|
+
inline.
|
|
5
7
|
|
|
6
8
|
Usage:
|
|
7
9
|
python scripts/build_audit_prompt.py --owner jl-cmd --repo claude-code-config --pr-number 422 --loop 1 --head-ref feat/branch --base-ref main --worktree-path <PATH> --run-temp-dir <PATH>
|
|
@@ -22,6 +24,7 @@ from _xml_utils import emit_pretty_xml
|
|
|
22
24
|
from skills_pr_loop_constants.path_resolver_constants import (
|
|
23
25
|
ALL_AUDIT_CATEGORY_ENTRIES,
|
|
24
26
|
ALL_AUDIT_CONSTRAINT_TEXTS,
|
|
27
|
+
AUDIT_RUBRIC_REFERENCE_TEXT,
|
|
25
28
|
)
|
|
26
29
|
|
|
27
30
|
|
|
@@ -74,6 +77,9 @@ def build_audit_prompt_xml(
|
|
|
74
77
|
cat_elem = SubElement(bug_categories, "category", {"id": each_category_id})
|
|
75
78
|
cat_elem.text = each_category_label
|
|
76
79
|
|
|
80
|
+
rubric_reference = SubElement(root, "rubric_reference")
|
|
81
|
+
rubric_reference.text = AUDIT_RUBRIC_REFERENCE_TEXT
|
|
82
|
+
|
|
77
83
|
constraints = SubElement(root, "constraints")
|
|
78
84
|
for each_constraint in ALL_AUDIT_CONSTRAINT_TEXTS:
|
|
79
85
|
SubElement(constraints, "constraint").text = each_constraint
|
|
@@ -167,12 +173,12 @@ def main(all_arguments: list[str]) -> int:
|
|
|
167
173
|
xml_output = emit_audit_prompt(
|
|
168
174
|
owner=arguments.owner,
|
|
169
175
|
repo=arguments.repo,
|
|
170
|
-
pr_number=
|
|
176
|
+
pr_number=arguments.pr_number,
|
|
171
177
|
loop=arguments.loop,
|
|
172
|
-
head_ref=
|
|
173
|
-
base_ref=
|
|
174
|
-
worktree_path=
|
|
175
|
-
run_temp_dir=
|
|
178
|
+
head_ref=arguments.head_ref,
|
|
179
|
+
base_ref=arguments.base_ref,
|
|
180
|
+
worktree_path=arguments.worktree_path,
|
|
181
|
+
run_temp_dir=arguments.run_temp_dir,
|
|
176
182
|
)
|
|
177
183
|
sys.stdout.write(xml_output)
|
|
178
184
|
return 0
|
|
@@ -29,19 +29,29 @@ ALL_AUDIT_CONSTRAINT_TEXTS = [
|
|
|
29
29
|
]
|
|
30
30
|
|
|
31
31
|
ALL_AUDIT_CATEGORY_ENTRIES = [
|
|
32
|
-
("A", "
|
|
33
|
-
("B", "
|
|
34
|
-
("C", "
|
|
35
|
-
("D", "
|
|
36
|
-
("E", "
|
|
37
|
-
("F", "
|
|
38
|
-
("G", "
|
|
39
|
-
("H", "
|
|
40
|
-
("I", "
|
|
41
|
-
("J", "
|
|
42
|
-
("K", "Codebase conflicts
|
|
32
|
+
("A", "API contract verification"),
|
|
33
|
+
("B", "Selector / query / engine compatibility"),
|
|
34
|
+
("C", "Resource cleanup and lifecycle"),
|
|
35
|
+
("D", "Variable scoping, ordering, and unbound references"),
|
|
36
|
+
("E", "Dead code and unused imports"),
|
|
37
|
+
("F", "Silent failures"),
|
|
38
|
+
("G", "Off-by-one, bounds, integer overflow"),
|
|
39
|
+
("H", "Security boundaries"),
|
|
40
|
+
("I", "Concurrency hazards"),
|
|
41
|
+
("J", "CODE_RULES.md compliance"),
|
|
42
|
+
("K", "Codebase conflicts (incomplete propagation)"),
|
|
43
|
+
("L", "Behavior-equivalence for refactors"),
|
|
44
|
+
("M", "Producer/consumer cardinality vs collection-type contract"),
|
|
45
|
+
("N", "Test-name scenario verifier"),
|
|
43
46
|
]
|
|
44
47
|
|
|
48
|
+
AUDIT_RUBRIC_REFERENCE_TEXT = (
|
|
49
|
+
"The category list above is a summary. The binding definition of each "
|
|
50
|
+
"category is its rubric file under $HOME/.claude/audit-rubrics/category_rubrics/ "
|
|
51
|
+
"(ready-to-send prompt variants under $HOME/.claude/audit-rubrics/prompts/). "
|
|
52
|
+
"Read the rubric files before auditing."
|
|
53
|
+
)
|
|
54
|
+
|
|
45
55
|
ALL_FIX_EXECUTION_STEPS = [
|
|
46
56
|
"Read the finding and verify it against the current file at file:line.",
|
|
47
57
|
"Write a failing test that reproduces the bug.",
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Tests pinning build_audit_prompt's emitted A-N category taxonomy."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib.util
|
|
6
|
+
import re
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from types import ModuleType
|
|
10
|
+
from xml.etree.ElementTree import Element
|
|
11
|
+
|
|
12
|
+
_SCRIPTS_DIR = Path(__file__).resolve().parent
|
|
13
|
+
if str(_SCRIPTS_DIR) not in sys.path:
|
|
14
|
+
sys.path.insert(0, str(_SCRIPTS_DIR))
|
|
15
|
+
|
|
16
|
+
from skills_pr_loop_constants.path_resolver_constants import (
|
|
17
|
+
ALL_AUDIT_CATEGORY_ENTRIES,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
_CATEGORY_RUBRICS_DIR = _SCRIPTS_DIR.parents[3] / "audit-rubrics" / "category_rubrics"
|
|
21
|
+
_HEADING_PATTERN = re.compile(r"^# Category ([A-N]) — (.+)$")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _load_build_audit_prompt() -> ModuleType:
|
|
25
|
+
module_path = _SCRIPTS_DIR / "build_audit_prompt.py"
|
|
26
|
+
spec = importlib.util.spec_from_file_location("build_audit_prompt", module_path)
|
|
27
|
+
assert spec is not None
|
|
28
|
+
assert spec.loader is not None
|
|
29
|
+
module = importlib.util.module_from_spec(spec)
|
|
30
|
+
sys.modules["build_audit_prompt"] = module
|
|
31
|
+
spec.loader.exec_module(module)
|
|
32
|
+
return module
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
build_audit_prompt = _load_build_audit_prompt()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _rubric_label_by_letter() -> dict[str, str]:
|
|
39
|
+
assert _CATEGORY_RUBRICS_DIR.is_dir(), f"Missing rubric directory: {_CATEGORY_RUBRICS_DIR}"
|
|
40
|
+
all_labels: dict[str, str] = {}
|
|
41
|
+
for each_rubric_file in sorted(_CATEGORY_RUBRICS_DIR.glob("category-*.md")):
|
|
42
|
+
all_rubric_lines = each_rubric_file.read_text(encoding="utf-8").splitlines()
|
|
43
|
+
assert all_rubric_lines, f"Empty rubric file: {each_rubric_file}"
|
|
44
|
+
each_match = _HEADING_PATTERN.match(all_rubric_lines[0])
|
|
45
|
+
assert each_match is not None, f"Heading pattern not matched in {each_rubric_file}"
|
|
46
|
+
all_labels[each_match.group(1)] = each_match.group(2)
|
|
47
|
+
return all_labels
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _build_audit_root() -> Element:
|
|
51
|
+
return build_audit_prompt.build_audit_prompt_xml(
|
|
52
|
+
owner="jl-cmd",
|
|
53
|
+
repo="claude-code-config",
|
|
54
|
+
pr_number=422,
|
|
55
|
+
loop=1,
|
|
56
|
+
head_ref="feat/branch",
|
|
57
|
+
base_ref="main",
|
|
58
|
+
worktree_path=Path("/tmp/bugteam-pr-422/worktree"),
|
|
59
|
+
run_temp_dir=Path("/tmp/bugteam-pr-422"),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_bug_categories_carry_ids_a_through_n_in_order() -> None:
|
|
64
|
+
root = _build_audit_root()
|
|
65
|
+
bug_categories = root.find("bug_categories")
|
|
66
|
+
assert bug_categories is not None
|
|
67
|
+
all_emitted_ids = [each_category.get("id") for each_category in bug_categories]
|
|
68
|
+
all_expected_ids = list("ABCDEFGHIJKLMN")
|
|
69
|
+
assert all_emitted_ids == all_expected_ids
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_emitted_category_labels_match_constant_entries() -> None:
|
|
73
|
+
root = _build_audit_root()
|
|
74
|
+
bug_categories = root.find("bug_categories")
|
|
75
|
+
assert bug_categories is not None
|
|
76
|
+
label_by_id = {
|
|
77
|
+
each_category.get("id"): each_category.text for each_category in bug_categories
|
|
78
|
+
}
|
|
79
|
+
for each_category_id, each_category_label in ALL_AUDIT_CATEGORY_ENTRIES:
|
|
80
|
+
assert label_by_id[each_category_id] == each_category_label
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_category_labels_match_rubric_file_headings() -> None:
|
|
84
|
+
assert dict(ALL_AUDIT_CATEGORY_ENTRIES) == _rubric_label_by_letter()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_rubric_reference_element_names_category_rubrics_directory() -> None:
|
|
88
|
+
root = _build_audit_root()
|
|
89
|
+
rubric_reference = root.find("rubric_reference")
|
|
90
|
+
assert rubric_reference is not None
|
|
91
|
+
assert rubric_reference.text is not None
|
|
92
|
+
assert "audit-rubrics/category_rubrics" in rubric_reference.text
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
## Constraints
|
|
4
4
|
|
|
5
|
-
- **Full A–
|
|
5
|
+
- **Full A–N audit every loop, no exceptions.** PR size, "focused audit," "team overhead," "CODE_RULES already passed" — not valid reasons. Empty `<findings/>` for any category is a valid result. The audit agent walks all A–N rubrics each loop.
|
|
6
6
|
- **One run per invocation, multi-PR supported.** All PRs in a single /bugteam invocation share one `run_temp_dir`. Per-PR identity lives in the subagent name prefix (`bugfind-pr<N>-loop<L>` / `bugfix-pr<N>-loop<L>`) and the `<run_temp_dir>/pr-<N>/` subfolder containing that PR's git worktree, diff patches, and outcome XML files.
|
|
7
7
|
- **Grant before any spawn, revoke before any return.** Step 0 grants project `.claude/**` permissions; Step 5 revokes. Both are mandatory. Revoke runs on every exit path including error, cap-reached, and stuck.
|
|
8
8
|
- **Fresh subagent per loop.** Both bugfind and bugfix are spawned new each loop. Reusing a subagent across loops accumulates context inside that subagent's window — defeats clean-room.
|
|
@@ -44,58 +44,30 @@ cd into `<worktree_path>` before any git or file operation.
|
|
|
44
44
|
a `---` separator and a worked example against an authentic PR below —
|
|
45
45
|
are in `$HOME/.claude/audit-rubrics/prompts/`):
|
|
46
46
|
|
|
47
|
-
A. API contract verification
|
|
47
|
+
A. API contract verification
|
|
48
48
|
B. Selector / query / engine compatibility
|
|
49
|
-
C. Resource cleanup and lifecycle
|
|
49
|
+
C. Resource cleanup and lifecycle
|
|
50
50
|
D. Variable scoping, ordering, and unbound references
|
|
51
|
-
E. Dead code
|
|
52
|
-
F. Silent failures
|
|
53
|
-
G. Off-by-one, bounds,
|
|
54
|
-
H. Security boundaries
|
|
55
|
-
I. Concurrency hazards
|
|
56
|
-
J.
|
|
57
|
-
K. Codebase conflicts
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
L. Behavior-equivalence for refactors. When the PR rewrites an existing
|
|
62
|
-
function (especially an enforcement check, parser, or path classifier),
|
|
63
|
-
compare the rewrite's edge-case handling against the sibling implementation
|
|
64
|
-
at the same git commit base. Pin the historically-valid inputs in a
|
|
65
|
-
`KNOWN_GOOD_INPUTS` table and assert each still passes. Cited in audits:
|
|
66
|
-
ccc#479 F1 (`#noqa` no-space variant dropped after a tokenize-based
|
|
67
|
-
refactor); ccc#479 F4 (bare `#` lookalike misclassified after refactor);
|
|
68
|
-
ccc#479 F5 (inline `#!` lookalike misclassified); ccc#479 F6 (early-exit
|
|
69
|
-
invariant dropped); ccc#472 F44 (`startswith('## Problem')` too loose vs
|
|
70
|
-
the sibling regex shape).
|
|
71
|
-
M. Producer/consumer cardinality vs collection-type contract. For each new
|
|
72
|
-
function returning `list[X]`, `Sequence[X]`, or `Iterable[X]`, ask
|
|
73
|
-
whether the return can contain duplicates and whether any downstream
|
|
74
|
-
consumer treats the value as a set. Subprocess-stdout parsers must return
|
|
75
|
-
`frozenset[Path]` or `dict.fromkeys`-deduplicated `list[Path]`.
|
|
76
|
-
Functions whose consumer is itself an `extend(...)` into a list pass;
|
|
77
|
-
functions with explicit "duplicates preserved" docstring text pass.
|
|
78
|
-
Cited in audits: pa#143 F10 (`_extract_paths_from_everything_cli_stdout`
|
|
79
|
-
duplicates → `RuntimeError` — the only High-severity crash bug in the
|
|
80
|
-
audit set); pa#136 F30 / F32 (duplicate content_id rows submit twice;
|
|
81
|
-
writeback ignores content_id key).
|
|
82
|
-
N. Test-name claims a scenario the body does not enter. Tests named
|
|
83
|
-
`test_*_at_*`, `test_*_under_*`, `test_*_when_*`, and `test_*_with_*`
|
|
84
|
-
must, via monkeypatch / fixture inspection, demonstrate the named
|
|
85
|
-
condition is in effect when the system under test runs. Path-decision
|
|
86
|
-
functions (registered in `*_path_exemptions.py` / `is_*_path` /
|
|
87
|
-
`_resolve_*_path` modules) must ship with a parametric matrix of
|
|
88
|
-
canonical edge cases (empty string, single filename, tilde, UNC,
|
|
89
|
-
drive-letter, symlinked, `..`-containing, trailing-slash). Tests with
|
|
90
|
-
neutral names (`test_returns_empty_list_on_x`) are unaffected. Cited
|
|
91
|
-
in audits: ccc#476 F5 / F21 / F23 / F26 / F27 (cross-platform
|
|
92
|
-
scenarios never exercised under the claimed conditions); pa#135 F11 /
|
|
93
|
-
F15 (string-shape and integration tests that exercise only the no-op
|
|
94
|
-
branch); pa#136 F50 (`<substring> not in executed_sql` assertion that
|
|
95
|
-
cannot fail because the substring shape never matches the real
|
|
96
|
-
fragment).
|
|
51
|
+
E. Dead code and unused imports
|
|
52
|
+
F. Silent failures
|
|
53
|
+
G. Off-by-one, bounds, integer overflow
|
|
54
|
+
H. Security boundaries
|
|
55
|
+
I. Concurrency hazards
|
|
56
|
+
J. CODE_RULES.md compliance
|
|
57
|
+
K. Codebase conflicts (incomplete propagation)
|
|
58
|
+
L. Behavior-equivalence for refactors
|
|
59
|
+
M. Producer/consumer cardinality vs collection-type contract
|
|
60
|
+
N. Test-name scenario verifier
|
|
97
61
|
</bug_categories>
|
|
98
62
|
|
|
63
|
+
<rubric_reference>
|
|
64
|
+
The category list above is a summary. The binding definition of each
|
|
65
|
+
category is its rubric file under
|
|
66
|
+
`$HOME/.claude/audit-rubrics/category_rubrics/` (ready-to-send prompt
|
|
67
|
+
variants under `$HOME/.claude/audit-rubrics/prompts/`). Read the rubric
|
|
68
|
+
files before auditing.
|
|
69
|
+
</rubric_reference>
|
|
70
|
+
|
|
99
71
|
<constraints>
|
|
100
72
|
- Read-only on source code: the audit does not modify any source file.
|
|
101
73
|
- Cite file:line for every finding.
|
package/skills/bugteam/SKILL.md
CHANGED
|
@@ -11,10 +11,10 @@ description: >-
|
|
|
11
11
|
# Bugteam
|
|
12
12
|
|
|
13
13
|
Audit–fix until convergence. Bugfind: `code-quality-agent`, fresh context each
|
|
14
|
-
loop, auditing all A–
|
|
14
|
+
loop, auditing all A–N categories. Bugfix: `clean-coder`. Hard cap: 20 audit
|
|
15
15
|
loops. Grant `.claude/**` at start, revoke always at end.
|
|
16
16
|
|
|
17
|
-
The audit agent loads the A–
|
|
17
|
+
The audit agent loads the A–N category rubrics from
|
|
18
18
|
`$HOME/.claude/audit-rubrics/{category_rubrics,prompts}/` alongside
|
|
19
19
|
[`PROMPTS.md`](PROMPTS.md) and produces a single outcome XML per loop.
|
|
20
20
|
|
|
@@ -146,7 +146,7 @@ end-to-end mental model before starting Step 0.
|
|
|
146
146
|
| Posting the end-of-pass audit review via `post_audit_thread.py` (APPROVE on CLEAN — the request event; GitHub stores it as `state=APPROVED` — REQUEST_CHANGES with inline anchored comments on DIRTY) | [§ Audit posting](#audit-posting) |
|
|
147
147
|
| Posting per-finding fix replies via GitHub MCP `add_reply_to_pull_request_comment` (rendered with the unified template at [`_shared/pr-loop/audit-reply-template.md`](../../_shared/pr-loop/audit-reply-template.md)) | [reference/github-pr-reviews.md](reference/github-pr-reviews.md) |
|
|
148
148
|
| Teardown, PR description rewrite via `pr-description-writer`, permission revoke, final report | [reference/teardown-publish-permissions.md](reference/teardown-publish-permissions.md) |
|
|
149
|
-
| Spawn-prompt XML, A–
|
|
149
|
+
| Spawn-prompt XML, A–N category bindings, outcome XML schemas | [PROMPTS.md](PROMPTS.md) |
|
|
150
150
|
| Per-category audit content (sub-buckets, decision criteria, ready-to-send Variant C templates) | `$HOME/.claude/audit-rubrics/{category_rubrics,prompts}/` |
|
|
151
151
|
| Invariants and design rationale | [CONSTRAINTS.md](CONSTRAINTS.md), [reference/design-rationale.md](reference/design-rationale.md) |
|
|
152
152
|
| Audit-contract finding shape (Shape A / B), Haiku secondary, post-fix self-audit | [reference/audit-contract.md](reference/audit-contract.md) |
|
|
@@ -159,11 +159,11 @@ end-to-end mental model before starting Step 0.
|
|
|
159
159
|
- `SKILL.md` — this hub.
|
|
160
160
|
- `reference/` — workflow detail per situation.
|
|
161
161
|
- `scripts/` — utility scripts executed, not loaded as primary context.
|
|
162
|
-
- `PROMPTS.md` — spawn XML, A–
|
|
162
|
+
- `PROMPTS.md` — spawn XML, A–N category bindings, outcome schemas.
|
|
163
163
|
- `CONSTRAINTS.md` — invariants.
|
|
164
164
|
- `EXAMPLES.md` — exit scenarios.
|
|
165
165
|
- `sources.md` — doc URLs and verbatim quotes.
|
|
166
166
|
- `~/.claude/audit-rubrics/` — installed by `npx claude-dev-env` from
|
|
167
|
-
`packages/claude-dev-env/audit-rubrics/`; the audit agent reads all A–
|
|
167
|
+
`packages/claude-dev-env/audit-rubrics/`; the audit agent reads all A–N
|
|
168
168
|
rubrics under `category_rubrics/` and prompts under `prompts/`. Required
|
|
169
169
|
at audit time alongside `PROMPTS.md`.
|
|
@@ -21,7 +21,7 @@ Each finding an audit produces MUST be one of exactly two shapes.
|
|
|
21
21
|
"id": "loop<L>-<K>",
|
|
22
22
|
"file": "path/relative/to/repo/root.py",
|
|
23
23
|
"line": 123,
|
|
24
|
-
"category": "A | B | C | D | E | F | G | H | I | J | K",
|
|
24
|
+
"category": "A | B | C | D | E | F | G | H | I | J | K | L | M | N",
|
|
25
25
|
"severity": "P0 | P1 | P2",
|
|
26
26
|
"excerpt": "verbatim code snippet from the offending line(s)",
|
|
27
27
|
"failure_mode": "one sentence describing what goes wrong and when",
|
|
@@ -37,7 +37,7 @@ Used when an audit investigates a category and does NOT find a bug. Bare "verifi
|
|
|
37
37
|
|
|
38
38
|
```json
|
|
39
39
|
{
|
|
40
|
-
"category": "A | B | C | D | E | F | G | H | I | J | K",
|
|
40
|
+
"category": "A | B | C | D | E | F | G | H | I | J | K | L | M | N",
|
|
41
41
|
"files_opened": ["file1.py", "file2.py"],
|
|
42
42
|
"lines_quoted": [
|
|
43
43
|
{"file": "file1.py", "line": 88, "text": "verbatim line content"}
|
|
@@ -89,7 +89,7 @@ After the primary finding list is complete, every audit runs a second pass again
|
|
|
89
89
|
|
|
90
90
|
The audit must either produce new Shape A findings citing new file:line references not present in the first pass, or cite explicit Shape B adversarial-probe entries for each category it re-examined. An adversarial pass that returns "nothing new, confident first pass was complete" is REJECTED — produce evidence or findings, not confidence.
|
|
91
91
|
|
|
92
|
-
For `/bugteam`, the single audit agent provides per-category coverage by walking all A–
|
|
92
|
+
For `/bugteam`, the single audit agent provides per-category coverage by walking all A–N rubrics in one invocation.
|
|
93
93
|
|
|
94
94
|
## Merge rules
|
|
95
95
|
|
|
@@ -113,7 +113,7 @@ Sequence:
|
|
|
113
113
|
3. Run `py_compile` (or language-equivalent) on each modified file.
|
|
114
114
|
4. Compute `fix_diff` against pre-fix contents for the modified set.
|
|
115
115
|
5. Run `bugteam_code_rules_gate.py` with explicit paths for every modified file.
|
|
116
|
-
6. Spawn a scoped audit of `fix_diff` with full A–
|
|
116
|
+
6. Spawn a scoped audit of `fix_diff` with full A–N rigor, Shape A/B contract, adversarial pass, AND Haiku secondary in parallel (paranoid mode on post-fix).
|
|
117
117
|
7. Any new findings become same-loop fix-targets. Internal iteration count increments by one.
|
|
118
118
|
8. After 3 internal iterations with fresh findings each time, exit `stuck: post-fix audit not converging`.
|
|
119
119
|
9. Only when `gate_findings` empty AND `post_fix_findings` empty: `git add`, commit, push.
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
## Core principle (expanded)
|
|
4
4
|
|
|
5
|
-
One audit agent (`code-quality-agent`, opus) walks all A–
|
|
5
|
+
One audit agent (`code-quality-agent`, opus) walks all A–N categories per loop. One fix agent (`clean-coder`, opus) addresses the audit's findings.
|
|
6
6
|
|
|
7
7
|
Fresh-spawn clean-room isolation: each `Agent` call creates a new subagent with its own context window and no access to prior conversation. After the subagent writes its outcome XML and self-terminates, the lead reads the file. Results never accumulate in the lead’s context beyond the XML artifact. Verbatim Anthropic quotes and URLs: [`../sources.md`](../sources.md).
|
|
8
8
|
|
package/skills/findbugs/SKILL.md
CHANGED
|
@@ -88,16 +88,25 @@ The XML prompt skeleton:
|
|
|
88
88
|
|
|
89
89
|
<bug_categories>
|
|
90
90
|
Investigate each explicitly:
|
|
91
|
-
A. API contract verification
|
|
91
|
+
A. API contract verification
|
|
92
92
|
B. Selector / query / engine compatibility
|
|
93
|
-
C. Resource cleanup and lifecycle
|
|
93
|
+
C. Resource cleanup and lifecycle
|
|
94
94
|
D. Variable scoping, ordering, and unbound references
|
|
95
95
|
E. Dead code and unused imports
|
|
96
|
-
F. Silent failures
|
|
97
|
-
G. Off-by-one, bounds,
|
|
98
|
-
H. Security boundaries
|
|
99
|
-
I. Concurrency hazards
|
|
100
|
-
J.
|
|
96
|
+
F. Silent failures
|
|
97
|
+
G. Off-by-one, bounds, integer overflow
|
|
98
|
+
H. Security boundaries
|
|
99
|
+
I. Concurrency hazards
|
|
100
|
+
J. CODE_RULES.md compliance
|
|
101
|
+
K. Codebase conflicts (incomplete propagation)
|
|
102
|
+
L. Behavior-equivalence for refactors
|
|
103
|
+
M. Producer/consumer cardinality vs collection-type contract
|
|
104
|
+
N. Test-name scenario verifier
|
|
105
|
+
|
|
106
|
+
The category list above is a summary. The binding definition of each
|
|
107
|
+
category is its rubric file under $HOME/.claude/audit-rubrics/category_rubrics/
|
|
108
|
+
(ready-to-send prompt variants under $HOME/.claude/audit-rubrics/prompts/).
|
|
109
|
+
Read the rubric files before auditing.
|
|
101
110
|
</bug_categories>
|
|
102
111
|
|
|
103
112
|
<constraints>
|
|
@@ -256,18 +265,18 @@ Want me to run /fixbugs for the P0/P1 findings?
|
|
|
256
265
|
User: `/findbugs`
|
|
257
266
|
Claude: [resolves PR #42 from current branch, fetches full diff, spawns code-quality-agent foreground with self-contained prompt, returns]
|
|
258
267
|
|
|
259
|
-
`1 P0 / 2 P1 / 0 P2 —
|
|
268
|
+
`1 P0 / 2 P1 / 0 P2 — 11 categories cleared`
|
|
260
269
|
|
|
261
270
|
`P0 — race condition on shared cache write`
|
|
262
|
-
` src/cache.py:88 — concurrent writers can both pass the existence check before either writes (category:
|
|
271
|
+
` src/cache.py:88 — concurrent writers can both pass the existence check before either writes (category: I — Concurrency hazards)`
|
|
263
272
|
|
|
264
273
|
`P1 — silent paste failure`
|
|
265
|
-
` utils/clipboard.py:33 — validated_paste returns success without verifying the post-paste state (category:
|
|
274
|
+
` utils/clipboard.py:33 — validated_paste returns success without verifying the post-paste state (category: F — Silent failures)`
|
|
266
275
|
|
|
267
276
|
`P1 — unbound variable on early-exception path`
|
|
268
|
-
` src/processor.py:283 — scheduling_log referenced after try/finally where it may be unbound (category: scoping)`
|
|
277
|
+
` src/processor.py:283 — scheduling_log referenced after try/finally where it may be unbound (category: D — Variable scoping, ordering, and unbound references)`
|
|
269
278
|
|
|
270
|
-
`Verified clean: API contract, selector compatibility, resource cleanup, dead code, off-by-one, security boundaries,
|
|
279
|
+
`Verified clean: API contract, selector compatibility, resource cleanup, dead code, off-by-one, security boundaries, CODE_RULES.md compliance, codebase conflicts, behavior-equivalence, producer/consumer cardinality, Test-name scenario verifier`
|
|
271
280
|
|
|
272
281
|
`Open questions: none`
|
|
273
282
|
|
package/skills/fixbugs/SKILL.md
CHANGED
|
@@ -33,7 +33,7 @@ Locate the most recent `/findbugs` output in the current conversation. For each
|
|
|
33
33
|
|
|
34
34
|
- Severity (`P0` / `P1` / `P2`)
|
|
35
35
|
- `file:line`
|
|
36
|
-
- Category (the A–
|
|
36
|
+
- Category (the A–N letter or category name `/findbugs` reported)
|
|
37
37
|
- One-sentence description as `/findbugs` wrote it
|
|
38
38
|
|
|
39
39
|
Apply the severity filter from `$ARGUMENTS` if present:
|
|
@@ -105,6 +105,15 @@ For each unresolved thread, verify the concern against current HEAD;
|
|
|
105
105
|
either fix-and-resolve, or reply-with-note-and-resolve when the concern
|
|
106
106
|
no longer applies.
|
|
107
107
|
|
|
108
|
+
**Full-PR-diff rule: every CODE-REVIEW round (Step 5) and every BUGTEAM
|
|
109
|
+
round (Step 6) covers the FULL `origin/main...HEAD` diff — every file
|
|
110
|
+
the PR touches.** A round that scopes to a subset — only the last commit,
|
|
111
|
+
only files touched since the prior clean SHA, only bugbot-flagged paths,
|
|
112
|
+
or any other delta cut — does not satisfy the gate, and a "clean" verdict
|
|
113
|
+
against a partial diff is not a valid clean. Re-run the round against the
|
|
114
|
+
full diff before recording `code_review_clean_at` or treating the bugteam
|
|
115
|
+
round as converged. This rule holds every tick, every loop, every PR.
|
|
116
|
+
|
|
108
117
|
- [ ] **Step 0: Grant project permissions**
|
|
109
118
|
`python "$HOME/.claude/skills/bugteam/scripts/grant_project_claude_permissions.py"`
|
|
110
119
|
|
|
@@ -159,12 +168,22 @@ no longer applies.
|
|
|
159
168
|
|
|
160
169
|
Pre-condition: `bugbot_clean_at == current_head` (or `bugbot_down == true`).
|
|
161
170
|
|
|
162
|
-
Run Claude Code's built-in `/code-review --fix` on the
|
|
171
|
+
Run Claude Code's built-in `/code-review --fix` on the full
|
|
172
|
+
`origin/main...HEAD` diff —
|
|
163
173
|
the [local diff review](https://code.claude.com/docs/en/code-review#review-a-diff-locally)
|
|
164
174
|
— so it reviews the diff and applies its findings to the working
|
|
165
175
|
tree. Pass no effort argument, so the review uses the session's
|
|
166
176
|
current effort.
|
|
167
177
|
|
|
178
|
+
**Scope: the FULL `origin/main...HEAD` diff every tick** — every file
|
|
179
|
+
the PR touches. Do not delta-scope to commits added since the prior
|
|
180
|
+
clean SHA, do not scope to a single file, do not scope to bugbot's
|
|
181
|
+
flagged paths. Before running, confirm the working tree is on the
|
|
182
|
+
PR's HEAD with no uncommitted edits, then invoke `/code-review --fix`
|
|
183
|
+
with no path arguments so it audits the whole branch diff against
|
|
184
|
+
`origin/main`. A partial-scope round does not count and cannot set
|
|
185
|
+
`code_review_clean_at`.
|
|
186
|
+
|
|
168
187
|
- [ ] **fixes applied** (working tree changed) →
|
|
169
188
|
- [ ] Commit the applied fixes (one commit) → push
|
|
170
189
|
- [ ] reset `bugbot_clean_at = null`, `code_review_clean_at = null`
|
|
@@ -187,6 +206,14 @@ no longer applies.
|
|
|
187
206
|
`pr_converge_bugteam_enforcer` hook blocks it. `qbug` is NOT an accepted
|
|
188
207
|
substitute; `bugteam` is the only allowed skill at this step.
|
|
189
208
|
|
|
209
|
+
**Scope: the FULL `origin/main...HEAD` diff every tick** — every file
|
|
210
|
+
the PR touches. Pass the PR URL as the sole argument so bugteam audits
|
|
211
|
+
the whole branch diff against `origin/main`. Do not pass a file list,
|
|
212
|
+
a path filter, a commit range, or any "just the new commits since
|
|
213
|
+
last clean" cut — bugteam owns its own discovery on the full PR diff.
|
|
214
|
+
A partial-scope round does not count and cannot satisfy the
|
|
215
|
+
converged-on-current-HEAD condition below.
|
|
216
|
+
|
|
190
217
|
After bugteam completes, re-resolve HEAD.
|
|
191
218
|
|
|
192
219
|
- [ ] **bugteam pushed new commits** →
|
|
@@ -117,14 +117,23 @@ c. Decide (four branches; match first whose predicate holds):
|
|
|
117
117
|
|
|
118
118
|
Local correctness/quality pass between BUGBOT clean and BUGTEAM. Enters
|
|
119
119
|
after BUGBOT reports clean on `current_head` (or `bugbot_down == true`).
|
|
120
|
-
Runs Claude Code's built-in `/code-review --fix` on the
|
|
121
|
-
produces no GitHub review artifact, so there
|
|
122
|
-
resolve.
|
|
123
|
-
|
|
124
|
-
a. Run Claude Code's built-in `/code-review --fix` on the
|
|
125
|
-
the
|
|
126
|
-
|
|
127
|
-
|
|
120
|
+
Runs Claude Code's built-in `/code-review --fix` on the full
|
|
121
|
+
`origin/main...HEAD` diff; it produces no GitHub review artifact, so there
|
|
122
|
+
are no code-review threads to resolve.
|
|
123
|
+
|
|
124
|
+
a. Run Claude Code's built-in `/code-review --fix` on the FULL
|
|
125
|
+
`origin/main...HEAD` diff — every file the PR touches — via the
|
|
126
|
+
[local diff review](https://code.claude.com/docs/en/code-review#review-a-diff-locally).
|
|
127
|
+
It reviews the diff and applies its findings to the working tree.
|
|
128
|
+
|
|
129
|
+
Before running, confirm the working tree sits on the PR's HEAD with no
|
|
130
|
+
uncommitted edits, then invoke `/code-review --fix` with no path
|
|
131
|
+
arguments so it audits the whole branch diff against `origin/main`. Do
|
|
132
|
+
not delta-scope to commits added since the prior clean SHA, do not
|
|
133
|
+
scope to a single file, do not scope to bugbot's flagged paths. A
|
|
134
|
+
partial-scope round does not count and cannot set
|
|
135
|
+
`code_review_clean_at`. Pass no effort argument, so the review uses
|
|
136
|
+
the session's current effort.
|
|
128
137
|
|
|
129
138
|
b. Decide (two branches; match first whose predicate holds):
|
|
130
139
|
|
|
@@ -144,6 +153,13 @@ b. Decide (two branches; match first whose predicate holds):
|
|
|
144
153
|
|
|
145
154
|
a. Run **bugteam** on current PR.
|
|
146
155
|
|
|
156
|
+
Pass the PR URL as the sole argument so bugteam audits the FULL
|
|
157
|
+
`origin/main...HEAD` diff — every file the PR touches. Bugteam owns
|
|
158
|
+
its own discovery on the full PR diff. Do not pass a file list, a
|
|
159
|
+
path filter, a commit range, or any "just the new commits since last
|
|
160
|
+
clean" cut. A partial-scope round does not count and cannot satisfy
|
|
161
|
+
the converged-on-current-HEAD condition in step (d).
|
|
162
|
+
|
|
147
163
|
- **`Skill` invokable**: invoke bugteam
|
|
148
164
|
with `Skill`.
|
|
149
165
|
|
package/skills/qbug/SKILL.md
CHANGED
|
@@ -3,7 +3,7 @@ name: qbug
|
|
|
3
3
|
description: >-
|
|
4
4
|
Required baseline review for every new PR. Runs the /bugteam audit → fix →
|
|
5
5
|
commit → push cycle via one clean-coder subagent (not a full team), looping
|
|
6
|
-
until convergence or stuck. Uses the same CODE_RULES gate, A–
|
|
6
|
+
until convergence or stuck. Uses the same CODE_RULES gate, A–N category
|
|
7
7
|
rubric, and per-loop PR review shape as /bugteam — without TeamCreate,
|
|
8
8
|
teammates, per-loop clean-room, or a loop cap. Invoke /bugteam instead for
|
|
9
9
|
larger PRs that need per-loop bias isolation or a hard loop cap. Triggers:
|
|
@@ -21,7 +21,7 @@ Shared artifacts with /bugteam are referenced below by path, using the `${CLAUDE
|
|
|
21
21
|
|
|
22
22
|
- Pre-flight script: `${CLAUDE_SKILL_DIR}/../../_shared/pr-loop/scripts/preflight.py`
|
|
23
23
|
- Code-rules gate script: `${CLAUDE_SKILL_DIR}/../../_shared/pr-loop/scripts/code_rules_gate.py`
|
|
24
|
-
- Bug category rubric A–
|
|
24
|
+
- Bug category rubric A–N: [`bugteam/PROMPTS.md`](../bugteam/PROMPTS.md#audit-spawn-prompt-xml-bugfind-teammate)
|
|
25
25
|
- **Audit contract** (finding schema, proof-of-absence, adversarial pass, Haiku secondary, post-fix self-audit, diagnostics JSON): [`bugteam/reference/audit-contract.md`](../bugteam/reference/audit-contract.md)
|
|
26
26
|
- PR comment lifecycle shape: [`bugteam/SKILL.md`](../bugteam/SKILL.md#audit-posting)
|
|
27
27
|
|
|
@@ -117,7 +117,7 @@ Agent(
|
|
|
117
117
|
subagent_type="code-quality-agent",
|
|
118
118
|
model="haiku",
|
|
119
119
|
description="qbug Haiku secondary audit for PR <number>",
|
|
120
|
-
prompt="<audit-only prompt: read the PR diff, apply A-
|
|
120
|
+
prompt="<audit-only prompt: read the PR diff, apply A-N categories from <categories_file>, return structured findings. No FIX, no git add, no git commit, no git push.>",
|
|
121
121
|
run_in_background=False
|
|
122
122
|
)
|
|
123
123
|
```
|
|
@@ -188,7 +188,7 @@ The subagent receives this prompt and loops internally — the lead does not re-
|
|
|
188
188
|
|
|
189
189
|
- Read the patch file.
|
|
190
190
|
- Audit only added/modified lines. Read <categories_file> for the
|
|
191
|
-
A–
|
|
191
|
+
A–N category definitions; investigate each category explicitly.
|
|
192
192
|
- Follow the shared audit contract at
|
|
193
193
|
bugteam/reference/audit-contract.md. Per category: produce
|
|
194
194
|
either a Shape A structured finding or a Shape B structured
|
|
@@ -444,7 +444,7 @@ Delete the resolved `<qbug_temp_dir>` tree and any `.qbug-*.md` temp files in th
|
|
|
444
444
|
- **No loop cap.** Cycle runs until `converged`, `stuck`, or `error`. User can interrupt.
|
|
445
445
|
- **Code rules gate before every AUDIT.** Same `validate_content` logic as /bugteam.
|
|
446
446
|
- **One commit per FIX action.** Linear branch, fast-forward push only.
|
|
447
|
-
- **Categories A–
|
|
447
|
+
- **Categories A–N.** Same rubric as [`bugteam/PROMPTS.md`](../bugteam/PROMPTS.md).
|
|
448
448
|
- **One review per loop.** Anchored findings as `comments[]`; unanchored findings surface in the calling skill's user-facing output (chat reply to the user) rather than in the PR review body.
|
|
449
449
|
- **PR description rewrite on every exit**, same as /bugteam Step 4.5.
|
|
450
450
|
- **Temp file cleanup on every exit path.**
|
|
@@ -11,20 +11,21 @@ from pathlib import Path
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
SKILL_FILE_PATH = Path(__file__).parent / "SKILL.md"
|
|
14
|
-
PROMPTS_FILE_PATH = Path(__file__).parent.parent / "bugteam" / "PROMPTS.md"
|
|
15
14
|
CONTRACT_FILE_PATH = (
|
|
16
15
|
Path(__file__).parent.parent / "bugteam" / "reference" / "audit-contract.md"
|
|
17
16
|
)
|
|
17
|
+
CATEGORY_E_RUBRIC_FILE_PATH = (
|
|
18
|
+
Path(__file__).parent.parent.parent
|
|
19
|
+
/ "audit-rubrics"
|
|
20
|
+
/ "category_rubrics"
|
|
21
|
+
/ "category-e-dead-code.md"
|
|
22
|
+
)
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
def _load_skill_text() -> str:
|
|
21
26
|
return SKILL_FILE_PATH.read_text(encoding="utf-8")
|
|
22
27
|
|
|
23
28
|
|
|
24
|
-
def _load_prompts_text() -> str:
|
|
25
|
-
return PROMPTS_FILE_PATH.read_text(encoding="utf-8")
|
|
26
|
-
|
|
27
|
-
|
|
28
29
|
def _load_contract_text() -> str:
|
|
29
30
|
return CONTRACT_FILE_PATH.read_text(encoding="utf-8")
|
|
30
31
|
|
|
@@ -136,21 +137,10 @@ def test_step2_spawn_should_reference_clean_coder_and_haiku_secondary() -> None:
|
|
|
136
137
|
)
|
|
137
138
|
|
|
138
139
|
|
|
139
|
-
def
|
|
140
|
-
|
|
141
|
-
assert
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
assert
|
|
146
|
-
"dead local" in prompts_text.lower() or "dead locals" in prompts_text.lower()
|
|
147
|
-
), "Category E must cover dead locals"
|
|
148
|
-
assert (
|
|
149
|
-
"dead import" in prompts_text.lower() or "dead imports" in prompts_text.lower()
|
|
150
|
-
), "Category E must cover dead imports"
|
|
151
|
-
assert (
|
|
152
|
-
"dead branch" in prompts_text.lower() or "dead branches" in prompts_text.lower()
|
|
153
|
-
), "Category E must cover dead branches"
|
|
154
|
-
assert (
|
|
155
|
-
"dead return" in prompts_text.lower() or "dead returns" in prompts_text.lower()
|
|
156
|
-
), "Category E must cover dead returns"
|
|
140
|
+
def test_category_e_rubric_should_cover_expanded_dead_code_variants() -> None:
|
|
141
|
+
rubric_text = CATEGORY_E_RUBRIC_FILE_PATH.read_text(encoding="utf-8").lower()
|
|
142
|
+
assert "dead parameter" in rubric_text, "Category E must cover dead parameters"
|
|
143
|
+
assert "dead local" in rubric_text, "Category E must cover dead locals"
|
|
144
|
+
assert "dead import" in rubric_text, "Category E must cover dead imports"
|
|
145
|
+
assert "dead branch" in rubric_text, "Category E must cover dead branches"
|
|
146
|
+
assert "dead return" in rubric_text, "Category E must cover dead returns"
|
package/skills/refine/SKILL.md
CHANGED
|
@@ -164,7 +164,7 @@ Spawn `general-purpose` (`subagent_type: general-purpose`, foreground) with:
|
|
|
164
164
|
- **Ambiguity** — no parked open questions where a decision is required for implementation to begin
|
|
165
165
|
- **Implementer-readiness** — a downstream implementer can act on each step without back-and-forth (file paths named, agents named, change concrete)
|
|
166
166
|
- A required return shape: structured findings as `severity (P0/P1/P2) | location | violation`, plus an explicit `CLEAN` verdict when no findings remain
|
|
167
|
-
- An explicit instruction NOT to apply code-review rubrics (CODE_RULES categories A–
|
|
167
|
+
- An explicit instruction NOT to apply code-review rubrics (CODE_RULES categories A–N, API contracts, resource cleanup, etc.) — the audit target is a markdown plan, not source code
|
|
168
168
|
|
|
169
169
|
If the verdict is `CLEAN`: skip step 8 and proceed to step 10.
|
|
170
170
|
|