@event4u/agent-config 1.16.0 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/.agent-src/commands/{agents-audit.md → agents/audit.md} +4 -3
  2. package/.agent-src/commands/{agents-cleanup.md → agents/cleanup.md} +12 -6
  3. package/.agent-src/commands/{agents-prepare.md → agents/prepare.md} +4 -3
  4. package/.agent-src/commands/agents.md +46 -0
  5. package/.agent-src/commands/{chat-history-checkpoint.md → chat-history/checkpoint.md} +4 -4
  6. package/.agent-src/commands/{chat-history-clear.md → chat-history/clear.md} +4 -4
  7. package/.agent-src/commands/{chat-history-resume.md → chat-history/resume.md} +4 -4
  8. package/.agent-src/commands/chat-history/show.md +107 -0
  9. package/.agent-src/commands/chat-history.md +33 -89
  10. package/.agent-src/commands/{commit-in-chunks.md → commit/in-chunks.md} +15 -13
  11. package/.agent-src/commands/commit.md +22 -2
  12. package/.agent-src/commands/{context-create.md → context/create.md} +4 -3
  13. package/.agent-src/commands/{context-refactor.md → context/refactor.md} +4 -3
  14. package/.agent-src/commands/context.md +44 -0
  15. package/.agent-src/commands/{copilot-agents-init.md → copilot-agents/init.md} +4 -3
  16. package/.agent-src/commands/{copilot-agents-optimize.md → copilot-agents/optimize.md} +4 -3
  17. package/.agent-src/commands/copilot-agents.md +44 -0
  18. package/.agent-src/commands/council/default.md +221 -0
  19. package/.agent-src/commands/{council-design.md → council/design.md} +6 -5
  20. package/.agent-src/commands/{council-optimize.md → council/optimize.md} +7 -6
  21. package/.agent-src/commands/{council-pr.md → council/pr.md} +6 -5
  22. package/.agent-src/commands/council.md +47 -212
  23. package/.agent-src/commands/{create-pr-description.md → create-pr/description-only.md} +4 -2
  24. package/.agent-src/commands/create-pr.md +26 -5
  25. package/.agent-src/commands/{feature-dev.md → feature/dev.md} +5 -10
  26. package/.agent-src/commands/{feature-explore.md → feature/explore.md} +4 -8
  27. package/.agent-src/commands/{feature-plan.md → feature/plan.md} +4 -8
  28. package/.agent-src/commands/{feature-refactor.md → feature/refactor.md} +4 -8
  29. package/.agent-src/commands/{feature-roadmap.md → feature/roadmap.md} +6 -10
  30. package/.agent-src/commands/feature.md +6 -12
  31. package/.agent-src/commands/{fix-ci.md → fix/ci.md} +4 -8
  32. package/.agent-src/commands/{fix-portability.md → fix/portability.md} +4 -8
  33. package/.agent-src/commands/{fix-pr-bot-comments.md → fix/pr-bots.md} +4 -8
  34. package/.agent-src/commands/{fix-pr-developer-comments.md → fix/pr-developers.md} +4 -8
  35. package/.agent-src/commands/{fix-pr-comments.md → fix/pr.md} +7 -11
  36. package/.agent-src/commands/{fix-references.md → fix/refs.md} +4 -8
  37. package/.agent-src/commands/{fix-seeder.md → fix/seeder.md} +4 -8
  38. package/.agent-src/commands/fix.md +7 -13
  39. package/.agent-src/commands/{do-and-judge.md → judge/on-diff.md} +4 -3
  40. package/.agent-src/commands/judge/solo.md +90 -0
  41. package/.agent-src/commands/{do-in-steps.md → judge/steps.md} +4 -3
  42. package/.agent-src/commands/judge.md +35 -70
  43. package/.agent-src/commands/{memory-add.md → memory/add.md} +4 -3
  44. package/.agent-src/commands/{memory-full.md → memory/load.md} +4 -3
  45. package/.agent-src/commands/{memory-promote.md → memory/promote.md} +4 -3
  46. package/.agent-src/commands/{propose-memory.md → memory/propose.md} +4 -3
  47. package/.agent-src/commands/memory.md +48 -0
  48. package/.agent-src/commands/{module-create.md → module/create.md} +4 -3
  49. package/.agent-src/commands/{module-explore.md → module/explore.md} +4 -3
  50. package/.agent-src/commands/module.md +44 -0
  51. package/.agent-src/commands/{optimize-agents.md → optimize/agents.md} +4 -8
  52. package/.agent-src/commands/{optimize-augmentignore.md → optimize/augmentignore.md} +4 -9
  53. package/.agent-src/commands/{optimize-rtk-filters.md → optimize/rtk.md} +4 -8
  54. package/.agent-src/commands/{optimize-skills.md → optimize/skills.md} +4 -8
  55. package/.agent-src/commands/optimize.md +4 -10
  56. package/.agent-src/commands/{override-create.md → override/create.md} +4 -3
  57. package/.agent-src/commands/{override-manage.md → override/manage.md} +4 -3
  58. package/.agent-src/commands/override.md +44 -0
  59. package/.agent-src/commands/{roadmap-create.md → roadmap/create.md} +4 -3
  60. package/.agent-src/commands/{roadmap-execute.md → roadmap/execute.md} +4 -3
  61. package/.agent-src/commands/roadmap.md +44 -0
  62. package/.agent-src/commands/{tests-create.md → tests/create.md} +4 -3
  63. package/.agent-src/commands/{tests-execute.md → tests/execute.md} +4 -3
  64. package/.agent-src/commands/tests.md +44 -0
  65. package/.agent-src/contexts/communication/rules-auto/artifact-engagement-recording-mechanics.md +72 -0
  66. package/.agent-src/contexts/communication/rules-auto/augment-portability-mechanics.md +79 -0
  67. package/.agent-src/contexts/communication/rules-auto/augment-source-of-truth-mechanics.md +98 -0
  68. package/.agent-src/contexts/communication/rules-auto/cli-output-handling-mechanics.md +87 -0
  69. package/.agent-src/contexts/communication/rules-auto/command-suggestion-policy-mechanics.md +62 -0
  70. package/.agent-src/contexts/communication/rules-auto/docs-sync-mechanics.md +78 -0
  71. package/.agent-src/contexts/communication/rules-auto/package-ci-checks-mechanics.md +85 -0
  72. package/.agent-src/contexts/communication/rules-auto/review-routing-awareness-mechanics.md +65 -0
  73. package/.agent-src/contexts/communication/rules-auto/roadmap-progress-sync-mechanics.md +78 -0
  74. package/.agent-src/contexts/communication/rules-auto/skill-quality-mechanics.md +62 -0
  75. package/.agent-src/contexts/communication/rules-auto/slash-command-routing-policy-mechanics.md +55 -0
  76. package/.agent-src/contexts/communication/rules-auto/ui-audit-gate-mechanics.md +53 -0
  77. package/.agent-src/contexts/communication/rules-auto/user-interaction-mechanics.md +77 -0
  78. package/.agent-src/contexts/judges/no-consolidate-rationale.md +102 -0
  79. package/.agent-src/contexts/judges/persona-voice-rubric.md +140 -0
  80. package/.agent-src/rules/artifact-engagement-recording.md +13 -69
  81. package/.agent-src/rules/ask-when-uncertain.md +27 -42
  82. package/.agent-src/rules/augment-portability.md +15 -61
  83. package/.agent-src/rules/augment-source-of-truth.md +27 -93
  84. package/.agent-src/rules/cli-output-handling.md +10 -76
  85. package/.agent-src/rules/command-suggestion-policy.md +18 -59
  86. package/.agent-src/rules/commit-conventions.md +17 -14
  87. package/.agent-src/rules/direct-answers.md +34 -49
  88. package/.agent-src/rules/docker-commands.md +5 -5
  89. package/.agent-src/rules/docs-sync.md +15 -69
  90. package/.agent-src/rules/language-and-tone.md +48 -72
  91. package/.agent-src/rules/missing-tool-handling.md +28 -22
  92. package/.agent-src/rules/no-cheap-questions.md +45 -52
  93. package/.agent-src/rules/no-roadmap-references.md +73 -0
  94. package/.agent-src/rules/package-ci-checks.md +21 -61
  95. package/.agent-src/rules/preservation-guard.md +64 -29
  96. package/.agent-src/rules/review-routing-awareness.md +24 -43
  97. package/.agent-src/rules/roadmap-progress-sync.md +10 -71
  98. package/.agent-src/rules/security-sensitive-stop.md +8 -8
  99. package/.agent-src/rules/skill-quality.md +16 -48
  100. package/.agent-src/rules/slash-command-routing-policy.md +7 -4
  101. package/.agent-src/rules/think-before-action.md +52 -42
  102. package/.agent-src/rules/tool-safety.md +19 -16
  103. package/.agent-src/rules/ui-audit-gate.md +24 -38
  104. package/.agent-src/rules/user-interaction.md +13 -68
  105. package/.agent-src/skills/ai-council/SKILL.md +2 -0
  106. package/.agent-src/skills/api-testing/SKILL.md +1 -1
  107. package/.agent-src/skills/check-refs/SKILL.md +59 -40
  108. package/.agent-src/skills/conventional-commits-writing/SKILL.md +86 -28
  109. package/.agent-src/skills/copilot-agents-optimization/SKILL.md +5 -5
  110. package/.agent-src/skills/developer-like-execution/SKILL.md +4 -4
  111. package/.agent-src/skills/finishing-a-development-branch/SKILL.md +101 -65
  112. package/.agent-src/skills/flux/SKILL.md +30 -10
  113. package/.agent-src/skills/github-ci/SKILL.md +2 -2
  114. package/.agent-src/skills/judge-code-quality/SKILL.md +7 -8
  115. package/.agent-src/skills/judge-security-auditor/SKILL.md +4 -5
  116. package/.agent-src/skills/judge-test-coverage/SKILL.md +3 -4
  117. package/.agent-src/skills/lint-skills/SKILL.md +57 -39
  118. package/.agent-src/skills/md-language-check/SKILL.md +61 -39
  119. package/.agent-src/skills/override-management/SKILL.md +5 -5
  120. package/.agent-src/skills/quality-tools/SKILL.md +2 -2
  121. package/.agent-src/skills/react-shadcn-ui/SKILL.md +116 -43
  122. package/.agent-src/skills/readme-reviewer/SKILL.md +30 -29
  123. package/.agent-src/skills/readme-writing/SKILL.md +78 -53
  124. package/.agent-src/skills/readme-writing-package/SKILL.md +50 -47
  125. package/.agent-src/skills/receiving-code-review/SKILL.md +52 -47
  126. package/.agent-src/skills/refine-prompt/SKILL.md +0 -1
  127. package/.agent-src/skills/requesting-code-review/SKILL.md +35 -30
  128. package/.agent-src/skills/security/SKILL.md +7 -2
  129. package/.agent-src/skills/security-audit/SKILL.md +7 -3
  130. package/.agent-src/skills/systematic-debugging/SKILL.md +68 -60
  131. package/.agent-src/skills/test-driven-development/SKILL.md +59 -57
  132. package/.agent-src/skills/test-performance/SKILL.md +0 -1
  133. package/.agent-src/skills/traefik/SKILL.md +4 -4
  134. package/.agent-src/skills/verify-completion-evidence/SKILL.md +28 -26
  135. package/.claude-plugin/marketplace.json +22 -11
  136. package/AGENTS.md +2 -2
  137. package/CHANGELOG.md +90 -1
  138. package/README.md +18 -17
  139. package/docs/architecture.md +4 -6
  140. package/docs/catalog.md +67 -39
  141. package/docs/contracts/STABILITY.md +13 -7
  142. package/docs/contracts/adr-chat-history-split.md +1 -3
  143. package/docs/contracts/adr-command-suggestion.md +0 -2
  144. package/docs/contracts/adr-implement-ticket-runtime.md +1 -2
  145. package/docs/contracts/adr-product-ui-track.md +3 -6
  146. package/docs/contracts/adr-prompt-driven-execution.md +3 -4
  147. package/docs/contracts/agent-memory-contract.md +6 -11
  148. package/docs/contracts/artifact-engagement-flow.md +6 -9
  149. package/docs/contracts/command-clusters.md +56 -46
  150. package/docs/contracts/command-suggestion-flow.md +1 -3
  151. package/docs/contracts/context-paths.md +99 -0
  152. package/docs/contracts/file-ownership-matrix.json +6722 -0
  153. package/docs/contracts/file-ownership-matrix.md +134 -0
  154. package/docs/contracts/implement-ticket-flow.md +6 -9
  155. package/docs/contracts/linear-ai-rules-inclusion.md +0 -1
  156. package/docs/contracts/linear-ai-three-layers.md +0 -2
  157. package/docs/contracts/load-context-budget-model.md +178 -0
  158. package/docs/contracts/load-context-schema.md +1 -3
  159. package/docs/contracts/rule-interactions.md +0 -1
  160. package/docs/contracts/rule-priority-hierarchy.md +1 -1
  161. package/docs/contracts/ui-track-flow.md +7 -17
  162. package/docs/customization.md +2 -0
  163. package/docs/getting-started.md +5 -4
  164. package/docs/guidelines/agent-infra/asking-and-brevity-examples.md +100 -0
  165. package/package.json +1 -1
  166. package/scripts/_one_off_phase4_dispatch_latency.py +108 -0
  167. package/scripts/_one_off_phase6_trigger_jaccard.py +92 -0
  168. package/scripts/_phase2_shim_helper.py +109 -0
  169. package/scripts/agent-config +10 -0
  170. package/scripts/ai_council/_one_off_2a4_acceptance.py +208 -0
  171. package/scripts/ai_council/_one_off_context_layer_v1_estimate.py +67 -0
  172. package/scripts/ai_council/_one_off_context_layer_v1_review.py +292 -0
  173. package/scripts/ai_council/_one_off_followups_review.py +259 -0
  174. package/scripts/ai_council/_one_off_nondestructive_inline_audit.py +209 -0
  175. package/scripts/ai_council/_one_off_phase_2a_budget_rebalance.py +257 -0
  176. package/scripts/ai_council/_one_off_phase_2a_post_revert.py +197 -0
  177. package/scripts/ai_council/_one_off_rule_hardening_v1.py +251 -0
  178. package/scripts/ai_council/_one_off_structural_open_questions.py +232 -0
  179. package/scripts/ai_council/_one_off_structural_optimization.py +144 -0
  180. package/scripts/ai_council/_one_off_structural_v3_gaps.py +252 -0
  181. package/scripts/ai_council/_one_off_structural_v3_review.py +240 -0
  182. package/scripts/check_always_budget.py +363 -45
  183. package/scripts/check_cluster_patterns.py +159 -0
  184. package/scripts/check_command_count_messaging.py +14 -7
  185. package/scripts/check_context_paths.py +201 -0
  186. package/scripts/check_no_roadmap_refs.py +155 -0
  187. package/scripts/check_phase_coupling.py +148 -0
  188. package/scripts/check_portability.py +2 -0
  189. package/scripts/check_references.py +29 -2
  190. package/scripts/check_safety_floor_untouched.py +125 -0
  191. package/scripts/command_suggester/loader.py +4 -1
  192. package/scripts/compress.py +59 -13
  193. package/scripts/generate_index.py +6 -2
  194. package/scripts/generate_ownership_matrix.py +323 -0
  195. package/scripts/hooks/augment-roadmap-progress.sh +57 -0
  196. package/scripts/install.py +49 -28
  197. package/scripts/lint_no_new_atomic_commands.py +12 -11
  198. package/scripts/requirements-evals.txt +1 -0
  199. package/scripts/roadmap_progress_hook.py +159 -0
  200. package/scripts/schemas/command.schema.json +4 -3
  201. package/scripts/skill_linter.py +1 -0
  202. package/scripts/sync_agent_settings.py +25 -2
  203. package/scripts/update_counts.py +7 -0
@@ -0,0 +1,65 @@
1
+ # Review Routing Awareness — mechanics
2
+
3
+ Memory-lookup snippet, "do NOT overreach" guardrails, and anti-pattern
4
+ catalog for the
5
+ [`review-routing-awareness`](../../../rules/review-routing-awareness.md)
6
+ rule. The four required-behavior steps and the "when this rule applies"
7
+ trigger live in the rule; this file is the lookup material for the
8
+ fallback path and the failure-mode list.
9
+
10
+ ## Memory-lookup fallback
11
+
12
+ If neither `.github/ownership-map.yml` (or `agents/ownership-map.yml`)
13
+ nor `.github/historical-bug-patterns.yml` (or
14
+ `agents/historical-bug-patterns.yml`) exists, fall back to the
15
+ engineering-memory layer via
16
+ [`memory-access`](../../../../docs/guidelines/agent-infra/memory-access.md):
17
+
18
+ ```python
19
+ from scripts.memory_lookup import retrieve
20
+ extra = retrieve(
21
+ types=["ownership", "historical-patterns"],
22
+ keys=<changed file paths>,
23
+ limit=5,
24
+ )
25
+ ```
26
+
27
+ Curated memory (`agents/memory/ownership.yml`,
28
+ `agents/memory/historical-patterns.yml`) carries the same schema as the
29
+ project-local YAMLs and is merged into the routing output alongside
30
+ them. If both memory and project YAMLs are absent, skip the rule and
31
+ rely on [`reviewer-awareness`](../../../rules/reviewer-awareness.md)
32
+ defaults. **Do not invent owners or patterns** from context.
33
+
34
+ ## Surface findings — worked examples
35
+
36
+ When producing a review plan, include:
37
+
38
+ - **Owner-mapped roles** — explicitly preferred over generic roles. If
39
+ the ownership map says `app/Billing/**` is owned by `finance-engineering
40
+ + security`, use those, not "backend + security".
41
+ - **Historical-pattern warnings** — list every matched pattern with a
42
+ short label and the required control, e.g. _"Pattern: N+1 on tenant
43
+ listings → add an eager-load regression test"_.
44
+ - **Confidence note** — if the ownership map is stale (last updated > 6
45
+ months ago per the `updated` field), say so. Ownership maps rot.
46
+
47
+ ## Do NOT overreach
48
+
49
+ - **Never rename paths** or add ownership entries as a side effect of a
50
+ code change. Ownership map edits are a separate, explicit task.
51
+ - **Never mark a change safe** only because no pattern matched. Pattern
52
+ absence means "no known hit", not "no risk".
53
+ - **Never copy historical-pattern names into the diff** as code comments
54
+ or commit messages — they are routing metadata, not commentary.
55
+
56
+ ## Anti-patterns — reject them
57
+
58
+ - Suggesting owners "because this looks like billing code" without
59
+ consulting the ownership map when one exists.
60
+ - Inventing historical patterns from general knowledge — patterns must
61
+ come from the project's own registry.
62
+ - Downgrading a matched high-severity pattern because "the author said
63
+ it's fine" — the pattern was registered because it bit before.
64
+ - Treating an out-of-date map as absent. Flag staleness; do not silently
65
+ skip.
@@ -0,0 +1,78 @@
1
+ # Roadmap Progress Sync — mechanics
2
+
3
+ Mechanics, triggers, and failure-mode catalog for the
4
+ [`roadmap-progress-sync`](../../../rules/roadmap-progress-sync.md)
5
+ rule. The Iron Laws and status semantics live in the rule; this
6
+ file holds the lookup material the rule pulls when a trigger fires.
7
+
8
+ ## How to regenerate
9
+
10
+ ```bash
11
+ ./agent-config roadmap:progress
12
+ ```
13
+
14
+ The `./agent-config` wrapper is written into the project root by the
15
+ package installer and delegates to the master CLI inside
16
+ `node_modules/@event4u/agent-config/` or `vendor/event4u/agent-config/`.
17
+ No global tooling required.
18
+
19
+ ## Triggers
20
+
21
+ | Edit | Must run, same response |
22
+ |---|---|
23
+ | **Create a new roadmap file** | regenerate dashboard |
24
+ | **Rename or delete a roadmap file** | regenerate dashboard |
25
+ | Mark step `[x]`, `[~]`, `[-]`, or unmark back to `[ ]` | regenerate dashboard |
26
+ | Add, rename, or remove a phase | regenerate dashboard |
27
+ | **Last `[ ]` flips** — roadmap reaches `count_open == 0` | `git mv` → `archive/` (or `skipped/`) **then** regenerate dashboard |
28
+ | Move roadmap between `roadmaps/` ↔ `archive/` ↔ `skipped/` | regenerate dashboard |
29
+
30
+ **Batching rule:** if you edit multiple checkboxes in one response, a
31
+ **single** regeneration at the end of that response is enough — but
32
+ the response must not end without it. If one of those edits closes a
33
+ roadmap, archive it first, then run the single regen.
34
+
35
+ ## Pre-send self-check — MANDATORY
36
+
37
+ Before sending any reply that touched `agents/roadmaps/`, run this
38
+ silent gate:
39
+
40
+ 1. Did this turn create, rename, delete, or move a roadmap file? → regen MUST be in the reply.
41
+ 2. Did this turn flip any checkbox in a roadmap file? → regen MUST be in the reply.
42
+ 3. Did the regen output (`✅ Wrote agents/roadmaps-progress.md · …`) actually appear this turn? → if no, run it now before sending.
43
+ 4. **Autonomous roadmap execution gate** — did this turn complete a roadmap step (code saved + verification passed) without flipping its checkbox? → flip `[x]` (or `[~]` if multi-turn) and regen before sending.
44
+ 5. **Trackable-roadmap gate** — did this turn create or substantially edit a roadmap file? → does it now contain at least one `- [ ]` per non-intro phase, **or** carry `status: draft` in frontmatter? → if neither, add the checklist or the draft flag before sending.
45
+
46
+ Any "yes" + no regen run = rule violation. Rerun before sending.
47
+
48
+ ## Failure modes
49
+
50
+ - **Created the roadmap, marked Phase 1 done across multiple turns,
51
+ never regenerated** — dashboard silently lies "this roadmap does
52
+ not exist" to the next reader. Canonical failure of this rule;
53
+ the rule was hardened in response to it.
54
+ - **Regenerated yesterday, edited today, "I'll regen at session
55
+ end"** — session ends from a crash, regen never lands.
56
+ - **Closed a roadmap (last `[ ]` → `[x]`) and regenerated before
57
+ `git mv`** — the closed roadmap reappears in "Open roadmaps".
58
+ - **Edited the dashboard by hand to "fix it quickly"** — next regen
59
+ overwrites the manual edit; no audit trail of why.
60
+ - **Autonomous run, four steps shipped across four turns, dashboard
61
+ flat the whole time, single regen at the end** — user lost
62
+ progress visibility for the entire run. Each completed step must
63
+ flip its checkbox in the reply that ships it.
64
+ - **Decision-only roadmap shipped without checkboxes** — file
65
+ contains decisions / ICE / block-sequencing but zero `- [ ]`,
66
+ dashboard shows `0/0` or omits it. Pair with a `## Phase N`
67
+ section or mark `status: draft` (CI catches this now).
68
+ - **Headings off-canon (`### P0 #N`, `## Block A`, `### Sequencing
69
+ — Phase 1`)** — `PHASE_RE` skips them, roadmap invisible to the
70
+ dashboard. Rename to `## Phase <id>` or mark `status: draft`.
71
+
72
+ ## Do NOT
73
+
74
+ - Do NOT edit `agents/roadmaps-progress.md` by hand — always regenerate.
75
+ - Do NOT defer the regen to "next commit" or "before push" — same response.
76
+ - Do NOT rely on CI (`--check` mode) as the first line of defence — CI is last-line, not real-time.
77
+ - Do NOT skip the regen because "only one checkbox changed" — the dashboard aggregates counts and phase percentages that shift on single edits.
78
+ - Do NOT leave a 100%-complete roadmap in `agents/roadmaps/` "for review" — `git mv` to archive **before** regenerating, otherwise it reappears in "Open roadmaps".
@@ -0,0 +1,62 @@
1
+ # Skill quality — mechanics
2
+
3
+ Description-triggering recipe, merge-preservation invariants, and
4
+ compression-preservation invariants for the
5
+ [`skill-quality`](../../../rules/skill-quality.md) rule. The minimum
6
+ sharpness table, required sections, frontmatter contract, the
7
+ skill-independence Iron Law, and the refactor-safety NEVER list live
8
+ in the rule; this file is the lookup material when authoring or
9
+ refactoring a skill.
10
+
11
+ ## Description Triggering
12
+
13
+ Claude routes skills by reading the frontmatter `description`. Polite, generic,
14
+ or hedged descriptions cause **undertriggering** — the skill never loads when it
15
+ should, and the user never learns it exists.
16
+
17
+ Make descriptions "pushy" — explicit about when to fire:
18
+
19
+ - Start with a concrete verb phrase: `Use when ...`, `Creates ...`, `Reviews ...`.
20
+ - Name 2+ concrete triggers — domains, symptoms, file types, user phrasing.
21
+ - End with: `... even if they don't explicitly ask for \`<skill-name>\`.`
22
+ - Avoid hedges: `may help with`, `can be useful for`, `covers various`.
23
+ - **Keep it ≤ 200 characters.** `scripts/skill_linter.py` warns at
24
+ `description_too_long` above this. If the pushy tail pushes you over, cut
25
+ adjectives, drop the second example phrasing, or collapse a list — do
26
+ **not** drop the trigger vocabulary or the `even if ...` tail.
27
+
28
+ Source: [`skills/skill-creator` in `anthropics/skills`](https://github.com/anthropics/skills/blob/main/skills/skill-creator/SKILL.md).
29
+
30
+ **Litmus test:** Read the description cold, without the skill's body. If you
31
+ cannot name at least two phrasings a user would realistically type that should
32
+ route to this skill, the description is too polite. Rewrite it.
33
+
34
+ ## Merge Preservation
35
+
36
+ When merging or refactoring skills, the merged result MUST preserve:
37
+
38
+ 1. **Strongest validation** from each source skill
39
+ 2. **Strongest example** (good/bad contrast) from each source
40
+ 3. **Strongest anti-pattern** from each source
41
+ 4. **All concrete decision criteria** that differ between sources
42
+
43
+ A merge is invalid if:
44
+ - Validation got weaker than the strongest source
45
+ - Examples were lost without replacement
46
+ - Anti-pattern coverage decreased
47
+ - The merged skill became a generic umbrella doc
48
+
49
+ ## Compression Preservation
50
+
51
+ When compressing a skill, the compressed version MUST preserve:
52
+
53
+ - Trigger quality (description + When to use)
54
+ - All procedure steps that contain decisions
55
+ - All concrete validation checks
56
+ - All gotchas and anti-patterns
57
+ - Strongest example (at minimum one good/bad contrast)
58
+
59
+ Compression may remove:
60
+ - Verbose explanations
61
+ - Redundant examples (keep the strongest)
62
+ - Commentary that doesn't affect execution
@@ -0,0 +1,55 @@
1
+ # Slash-command routing — cluster mechanics
2
+
3
+ Lookup table for the `slash-command-routing-policy` rule. Lists the
4
+ locked clusters and their sub-commands so the rule itself can stay at
5
+ its current LOC while still reflecting the full surface. Source of
6
+ truth for the cluster names is
7
+ [`docs/contracts/command-clusters.md`](../../../../docs/contracts/command-clusters.md);
8
+ this file mirrors that contract for runtime lookup. Linter:
9
+ `scripts/check_cluster_patterns.py` (verifies dispatcher shape).
10
+
11
+ ## Locked clusters and sub-commands
12
+
13
+ | Cluster | Phase | Sub-commands | Replaces |
14
+ |---|:-:|---|---|
15
+ | `/fix` | 1 | `ci` · `pr` · `pr-bots` · `pr-developers` · `portability` · `refs` · `seeder` | `/fix-ci` · `/fix-pr-comments` · `/fix-pr-bot-comments` · `/fix-pr-developer-comments` · `/fix-portability` · `/fix-references` · `/fix-seeder` |
16
+ | `/optimize` | 1 | `agents` · `augmentignore` · `rtk` · `skills` | `/optimize-agents` · `/optimize-augmentignore` · `/optimize-rtk-filters` · `/optimize-skills` |
17
+ | `/feature` | 1 | `explore` · `plan` · `refactor` · `roadmap` | `/feature-explore` · `/feature-plan` · `/feature-refactor` · `/feature-roadmap` |
18
+ | `/chat-history` | 2 | `show` · `resume` · `clear` · `checkpoint` | `/chat-history` (legacy status) · `/chat-history-resume` · `/chat-history-clear` · `/chat-history-checkpoint` |
19
+ | `/agents` | 2 | `audit` · `cleanup` · `prepare` | `/agents-audit` · `/agents-cleanup` · `/agents-prepare` |
20
+ | `/memory` | 2 | `add` · `load` · `promote` · `propose` | `/memory-add` · `/memory-full` · `/memory-promote` · `/propose-memory` |
21
+ | `/roadmap` | 2 | `create` · `execute` | `/roadmap-create` · `/roadmap-execute` |
22
+ | `/module` | 2 | `create` · `explore` | `/module-create` · `/module-explore` |
23
+ | `/tests` | 2 | `create` · `execute` | `/tests-create` · `/tests-execute` |
24
+ | `/context` | 2 | `create` · `refactor` | `/context-create` · `/context-refactor` |
25
+ | `/override` | 2 | `create` · `manage` | `/override-create` · `/override-manage` |
26
+ | `/copilot-agents` | 2 | `init` · `optimize` | `/copilot-agents-init` · `/copilot-agents-optimize` |
27
+ | `/judge` | 2 | `solo` · `on-diff` · `steps` | `/judge` (legacy standalone) · `/do-and-judge` · `/do-in-steps` |
28
+ | `/commit` | 2 | flag: `--in-chunks` | `/commit-in-chunks` |
29
+ | `/create-pr` | 2 | flag: `--description-only` | `/create-pr-description` |
30
+
31
+ ## Routing semantics
32
+
33
+ 1. The user types `/<cluster> [<sub>] [args]`.
34
+ 2. Match the cluster against the table above. If the leading token is
35
+ a dispatcher cluster, route to the dispatcher's `commands/<cluster>.md`
36
+ and let the dispatcher's "Dispatch" section pick the sub-command.
37
+ 3. If the leading token is a flag-cluster (`/commit`, `/create-pr`),
38
+ the cluster file is the entry point itself; flags absorb the
39
+ former helper command.
40
+ 4. **Legacy atomic shims** (`/fix-ci`, `/agents-audit`, …) keep working
41
+ for one release cycle. They emit a deprecation warning and forward
42
+ to the cluster invocation. New invocations should always use the
43
+ cluster form.
44
+ 5. If a sub-command is unknown, the dispatcher prints the menu — never
45
+ guess.
46
+
47
+ ## Removal cycle
48
+
49
+ | Cycle | Active form | Shim form |
50
+ |---|---|---|
51
+ | `1.15.x` / `1.16.x` | Phase 1 cluster commands | Phase 1 atomic shims |
52
+ | `1.17.0` | Phase 1 + Phase 2 cluster commands | Phase 2 atomic shims (Phase 1 atomics removed) |
53
+ | next minor after `1.17.x` | Cluster commands only | — (Phase 2 atomics removed) |
54
+
55
+ Consumers see the canonical surface as the cluster form throughout.
@@ -0,0 +1,53 @@
1
+ # UI-audit gate — mechanics
2
+
3
+ Findings-shape spec, failure-mode catalog, and cloud-surface
4
+ adaptation for the
5
+ [`ui-audit-gate`](../../../rules/ui-audit-gate.md) rule. The Iron
6
+ Law, the activation triggers, the allow-list, and the action sequence
7
+ when the gate fires live in the rule; this file is the lookup
8
+ material when an agent has to verify what counts as findings or
9
+ recognise a failure mode.
10
+
11
+ ## What "audit findings" means
12
+
13
+ `state.ui_audit` is a non-empty dict carrying at least one of:
14
+
15
+ - `components_found` — `{path, name, kind, similarity?}` inventory
16
+ entries from [`existing-ui-audit`](../../../skills/existing-ui-audit/SKILL.md).
17
+ - `greenfield: true` plus `greenfield_decision` ∈
18
+ `{scaffold, bare, external_reference}`.
19
+ - Legacy `components` alias — back-compat for the same shape.
20
+
21
+ `null`, `{}`, or a dict without those keys is **not** findings;
22
+ emit `@agent-directive: existing-ui-audit` instead of writing code.
23
+
24
+ ## Failure modes
25
+
26
+ - Writing the component first and "thinking about reuse later".
27
+ - Citing a similar-looking component from memory without verifying
28
+ it via the audit.
29
+ - Treating `state.ui_audit = {}` as "audit ran, found nothing" —
30
+ empty dict is rejected on purpose; an audit that finds nothing
31
+ must record either ≥1 `components_found` or the greenfield branch.
32
+ - Bypassing the gate for "just one tile".
33
+
34
+ ## Interactions
35
+
36
+ - [`improve-before-implement`](../../../rules/improve-before-implement.md) — runs
37
+ first when the request is ambiguous; this rule is the next gate.
38
+ - [`ask-when-uncertain`](../../../rules/ask-when-uncertain.md) — "just build it"
39
+ does **not** drop the audit; acknowledge, run audit, continue.
40
+ - [`directives/ui/audit.py`](../../../templates/scripts/work_engine/directives/ui/audit.py)
41
+ — code-layer twin; this rule covers the cases where the engine
42
+ is not in the loop.
43
+ - [`existing-ui-audit`](../../../skills/existing-ui-audit/SKILL.md) — the
44
+ skill that produces the findings.
45
+
46
+ ## Cloud Behavior
47
+
48
+ On cloud surfaces the engine is not shipped, so `state.ui_audit`
49
+ does not exist. The Iron Law still applies: take the visible
50
+ inventory of files in conversation context as the audit, and
51
+ surface a one-line audit summary in the reply before writing the
52
+ component. The gate is satisfied by an explicit summary, not by
53
+ silently skipping.
@@ -0,0 +1,77 @@
1
+ # User Interaction — mechanics
2
+
3
+ Format examples, common failure modes, progress indicators, and
4
+ summary patterns for the [`user-interaction`](../../../rules/user-interaction.md)
5
+ rule. Iron Law 1 (single-source recommendation) and Iron Law 2
6
+ (pre-send self-check) live in the rule; this file is the lookup
7
+ material for the format details.
8
+
9
+ ## Common failure modes — known, named, no excuses
10
+
11
+ - **End-of-turn menu skipped.** Reply answers the question fine, then ends with `> 1. Foo > 2. Bar > 3. Stop` and no `Empfehlung:`. Iron Law 1 was violated — these are numbered options, position is irrelevant.
12
+ - **"Genuinely no preference" hedge.** Pick anyway. The agent has more context than the user on the trade-off; refusing to pick dumps the work back. Pick the safest option, name the flip-condition.
13
+ - **"User knows the project better" hedge.** Same failure mode, different costume. The user asked for an opinion by virtue of accepting the options block; deliver it.
14
+ - **Multi-block reply with one recommendation.** Two options blocks but only one `Empfehlung:` line — the second block is unguarded. Rule 5 of Iron Law 2 closes this.
15
+
16
+ ## Examples
17
+
18
+ **Binary choice:**
19
+
20
+ ```
21
+ > 1. Interactive — ask before each comment
22
+ > 2. Automatic — handle all independently
23
+
24
+ **Recommendation: 1 — Interactive** — the comments touch security-sensitive code,
25
+ so a wrong auto-fix is more expensive than approving each one. Caveat: flip to 2
26
+ if the comments turn out to be pure formatting.
27
+ ```
28
+
29
+ **Multiple choice with skip:**
30
+
31
+ ```
32
+ > 1. Fix the code
33
+ > 2. Fix the test
34
+ > 3. Skip
35
+
36
+ **Recommendation: 1 — Fix the code** — the test asserts the documented behaviour;
37
+ the production code drifted from the contract. Caveat: pick 2 only if the contract
38
+ itself is wrong.
39
+ ```
40
+
41
+ **Confirmation with context:**
42
+
43
+ ```
44
+ > Found PR #1399 on branch `chore/refactor-agent-setup-2`.
45
+ >
46
+ > 1. Yes, that's the right PR
47
+ > 2. No, different PR — I'll provide the URL
48
+
49
+ **Recommendation: 1 — Yes** — the branch name matches the PR title exactly.
50
+ Caveat: flip to 2 if the PR was reopened from a different branch.
51
+ ```
52
+
53
+ ## When NOT to use numbered options
54
+
55
+ - **Open-ended questions** where the answer is free text (e.g., "What should the class be named?").
56
+ - **Simple yes/no** can use numbered options OR accept "ja"/"nein" directly.
57
+ Even for yes/no, prefer numbered options if there's additional context to show.
58
+
59
+ ## Progress Indicators
60
+
61
+ When processing multiple items (e.g., review comments, test failures), show progress:
62
+
63
+ ```
64
+ **Comment 3/7** — `filename.php:42`
65
+ ```
66
+
67
+ ## Summaries
68
+
69
+ After completing a batch of actions, provide a summary table:
70
+
71
+ ```
72
+ | # | File | Action |
73
+ |---|---|---|
74
+ | 1 | `file.php` | Fixed null check |
75
+ | 2 | `test.php` | Updated assertion |
76
+ | 3 | `config.php` | Skipped (intentional) |
77
+ ```
@@ -0,0 +1,102 @@
1
+ ---
2
+ audience: maintainers
3
+ status: stable
4
+ stability: stable
5
+ purpose: >
6
+ Locked decision rationale for Phase 3a of the
7
+ road-to-structural-optimization roadmap. The four `judge-*` skills
8
+ are intentionally **not** consolidated under a shared procedure
9
+ context. Future maintainers reading this file should not retry the
10
+ pattern without invalidating the data here.
11
+ ---
12
+
13
+ # Why `judge-*` skills are NOT consolidated
14
+
15
+ A Phase 3a spike evaluated extracting a shared procedure file
16
+ (`contexts/judges/judge-shared-procedure.md`) loaded by all four
17
+ `judge-*` skills. The locked Q1=A shape (separate skills + shared
18
+ procedure context) was implemented and benchmarked. **Outcome: net
19
+ LOC negative.** The pattern is rejected for this family.
20
+
21
+ ## The numbers
22
+
23
+ Spike measurements on `judge-test-coverage` (the smallest persona,
24
+ chosen per roadmap 3a.0):
25
+
26
+ | State | Skill LOC | Shared LOC | Total per family of 4 |
27
+ |---|---|---|---|
28
+ | Baseline (4 separate) | 153 + 157 + 157 + 166 = **633** | 0 | 633 |
29
+ | Slim attempt #1 | 138 | 134 | 4 × 138 + 134 = 686 ❌ |
30
+ | Slim attempt #2 (aggressive) | 133 | 117 | 4 × 133 + 117 = 649 ❌ |
31
+
32
+ The success-criteria threshold from the roadmap (≥ 30 % per-skill
33
+ LOC reduction) maps to 153 → ≤ 107 for `judge-test-coverage`. The
34
+ aggressive attempt landed at 133 — a 13 % reduction. To go further,
35
+ each skill would have to surrender persona-specific content (analysis
36
+ rubric, anti-pattern list, severity definitions, scope-boundary Do
37
+ NOTs) — exactly the content that makes each persona reviewable.
38
+
39
+ ## Why the math doesn't work
40
+
41
+ A judge skill is **not** procedurally complex. Its value is the
42
+ persona surface:
43
+
44
+ - a 6–11 row analysis table tailored to the lens (correctness, security,
45
+ test gaps, code quality)
46
+ - a list of persona-specific anti-patterns and gotchas
47
+ - a severity legend with thresholds calibrated to the lens
48
+ - scope-boundary Do NOTs that route to sibling judges
49
+
50
+ Procedural overhead (verdict semantics, validation scaffold,
51
+ output-format frame, runtime boundary, model fallback) is small in
52
+ absolute terms — extracting it adds a context file but only saves
53
+ ~15–25 LOC per skill. With four skills, the per-skill saving is
54
+ amortized against the shared file once; below five or six skills,
55
+ the structural tax dominates.
56
+
57
+ The same argument applies to procedural duplication in any small
58
+ skill family where the procedural skeleton is short and the persona
59
+ table is the bulk of the file.
60
+
61
+ ## Decision
62
+
63
+ Phase 3a is closed with status **DO NOT CONSOLIDATE**. The four
64
+ `judge-*` skills remain separate, self-contained, and free of
65
+ `load_context:` indirection. Each skill's "Procedure" section carries
66
+ its full procedural body inline. This trades minor maintenance
67
+ duplication for full persona isolation and zero structural tax.
68
+
69
+ Phase 3b (`project-analysis-*`, 8 skills, 959 LOC) and Phase 3c
70
+ (`skill-*`, 4 skills, 782 LOC) **continue independently** per the
71
+ roadmap's "3b/3c continue independently" abort note. The math there
72
+ is more favorable: 8 stack-specific analysis skills share a much
73
+ larger procedural skeleton (project discovery, version resolution,
74
+ docs loading, architecture mapping) than 4 judge skills do.
75
+
76
+ ## Reopening conditions
77
+
78
+ Reopen this decision **only** if at least one of these holds:
79
+
80
+ 1. The `judge-*` family grows to ≥ 6 skills (procedural amortization
81
+ crosses break-even).
82
+ 2. A new persona-orthogonal procedural step is introduced that all
83
+ judges must execute identically (e.g. a CI-integrated verdict
84
+ reporter), and that step is non-trivial (≥ 30 LOC).
85
+ 3. The success-criteria threshold in
86
+ `road-to-structural-optimization.md` § 3a is renegotiated to a
87
+ lower per-skill LOC bar (with explicit justification — the 30 %
88
+ bar exists because anything less is structural noise).
89
+
90
+ Until one of those holds, treat any "let's extract shared procedure
91
+ across the judges" proposal as a regression and cite this file.
92
+
93
+ ## See also
94
+
95
+ - [`persona-voice-rubric`](persona-voice-rubric.md) — the voice
96
+ preservation rubric used during the spike (still applies if a future
97
+ consolidation attempt needs the same check).
98
+ - Sibling judges, all kept inline:
99
+ [`judge-bug-hunter`](../../skills/judge-bug-hunter/SKILL.md),
100
+ [`judge-code-quality`](../../skills/judge-code-quality/SKILL.md),
101
+ [`judge-security-auditor`](../../skills/judge-security-auditor/SKILL.md),
102
+ [`judge-test-coverage`](../../skills/judge-test-coverage/SKILL.md).
@@ -0,0 +1,140 @@
1
+ ---
2
+ stability: beta
3
+ ---
4
+
5
+ # Persona-Voice Rubric for `judge-*` Skills
6
+
7
+ > **Audience:** authors and reviewers of any future `judge-*`
8
+ > consolidation spike. Also loaded via `load_context:` by future
9
+ > `judge-*` slim shapes when an A-shape ships.
10
+
11
+ This rubric defines **what "persona voice" means** for the four
12
+ `judge-*` skills (`judge-bug-hunter`, `judge-code-quality`,
13
+ `judge-security-auditor`, `judge-test-coverage`) and **how to score
14
+ voice preservation** when comparing a slimmed (Option-A: separate skill
15
+ + shared procedure context) output against the current single-file
16
+ baseline. The rubric is the measurement instrument behind the 3a.0.1
17
+ voice gate (≥ 4.0/5) and the 0.5 author + 2-reviewer protocol (avg ≥
18
+ 3.5/5).
19
+
20
+ ## The five dimensions
21
+
22
+ Each dimension scored independently on a 1–5 integer scale. Scorers
23
+ record one numeric score plus one short rationale per dimension.
24
+
25
+ ### 1. Tone
26
+
27
+ The judge's posture toward the diff. Distinctive markers per persona:
28
+
29
+ | Skill | Tone marker |
30
+ |---|---|
31
+ | `judge-bug-hunter` | Skeptical, edge-case-first ("what input breaks this?") |
32
+ | `judge-code-quality` | Reformist, convention-bound ("this clashes with the codebase pattern at X") |
33
+ | `judge-security-auditor` | Adversarial, threat-modeling ("what would an attacker do here?") |
34
+ | `judge-test-coverage` | Falsification-first ("does this test fail without the fix?") |
35
+
36
+ Score 5 = posture indistinguishable from baseline. Score 1 = generic
37
+ "reviewer voice" with no persona signal.
38
+
39
+ ### 2. Vocabulary
40
+
41
+ Domain-specific terms the persona reaches for unprompted. Captured by
42
+ keyword profile from the baseline SKILL.md (e.g., `judge-security-auditor`
43
+ uses *trust boundary*, *sink*, *SSRF*, *mass assignment*;
44
+ `judge-test-coverage` uses *uncovered branch*, *over-mocking*,
45
+ *regression test*, *tautological assertion*).
46
+
47
+ Score 5 = ≥ 80% of baseline keyword profile present in slimmed output
48
+ on a representative diff. Score 1 = < 20% present, or vocabulary leaks
49
+ in from a sibling persona (collision marker — see § 5 below).
50
+
51
+ ### 3. Prompt-shape preservation
52
+
53
+ The structural skeleton of the output (Verdict, Issues, Severity,
54
+ Required fields). Must match the baseline `Output format` block
55
+ verbatim in field order, label spelling, and severity icon set.
56
+
57
+ Score 5 = byte-identical field structure (only finding content
58
+ differs). Score 3 = minor reorder, no missing fields. Score 1 = a
59
+ required field dropped, renamed, or merged.
60
+
61
+ ### 4. Refusal patterns
62
+
63
+ How the judge handles out-of-scope concerns. Each `judge-*` baseline
64
+ explicitly **refuses** to comment on dimensions another judge owns
65
+ (e.g., `judge-test-coverage` refuses to flag correctness or style;
66
+ `judge-security-auditor` refuses to flag pure logic bugs).
67
+
68
+ Score 5 = refusal triggers fire on the same out-of-scope inputs as
69
+ baseline, with the same handoff phrasing ("route to `judge-X`"). Score
70
+ 1 = persona accepts an out-of-scope finding silently (mode-collision
71
+ risk realized).
72
+
73
+ ### 5. Evidence-citation style
74
+
75
+ How findings are anchored to the diff. Baselines all cite `path:LINE`
76
+ with a one-line reason and (where applicable) a "what the test should
77
+ assert" / "what the attacker would do" clause.
78
+
79
+ Score 5 = every finding has the same citation shape as baseline —
80
+ `path:LINE` plus the persona-specific clause (uncovered branch /
81
+ threat class / pattern violated / failing input). Score 1 = findings
82
+ are vague ("there's a problem in the validator") or use a sibling
83
+ persona's citation shape.
84
+
85
+ ## Scoring template
86
+
87
+ For each scored output, fill out the matrix below — once per scorer
88
+ (author + 2 reviewers, see scoring protocol):
89
+
90
+ ```
91
+ Skill under test: judge-<persona>
92
+ Diff sample id: <pr-number-or-fixture-id>
93
+ Baseline output: <attached or hash>
94
+ Slimmed output: <attached or hash>
95
+
96
+ | Dimension | Score (1–5) | Rationale (≤ 30 words) |
97
+ |----------------------------|-------------|--------------------------------|
98
+ | 1. Tone | | |
99
+ | 2. Vocabulary | | |
100
+ | 3. Prompt-shape preservation | | |
101
+ | 4. Refusal patterns | | |
102
+ | 5. Evidence-citation style | | |
103
+ | **Per-scorer mean** | | (sum / 5, one decimal) |
104
+ ```
105
+
106
+ ## Acceptance arithmetic
107
+
108
+ Three independent scorers produce three per-scorer means. The 3a.0.1
109
+ gate fires on **two** thresholds simultaneously:
110
+
111
+ - **Aggregate mean** (all dimensions, all scorers) **≥ 3.5/5** —
112
+ matches the 0.5.1 protocol.
113
+ - **Tone dimension specifically** mean **≥ 4.0/5** — matches the
114
+ 3a.0.1 voice-preservation kill-criterion. Tone is the load-bearing
115
+ dimension; vocabulary, prompt-shape, and citation style mostly track
116
+ it.
117
+
118
+ Either threshold missed → escalate to council per 0.5.1 (any
119
+ individual scorer < 3.0 also escalates). Council either revises the
120
+ slim shape (one rework only) or invokes the 3a.0.2 abort branch and
121
+ files `contexts/judges/no-consolidate-rationale.md`.
122
+
123
+ ## What this rubric does NOT measure
124
+
125
+ - **Verdict parity** — covered separately by 3a.3 (cosine-token
126
+ similarity ≤ 10% drift, 100% verdict parity).
127
+ - **Latency budget** — covered by 3a.0.1 (≤ +50ms vs. baseline).
128
+ - **Mode-collision proxy** — explicit security-only-diff probe under
129
+ 3a.0.1. Failure here aborts 3a regardless of voice score.
130
+ - **Implementation correctness** — judges read diffs, this rubric
131
+ reads judges. The diff under test is a fixture, not the artefact
132
+ graded.
133
+
134
+ ## References
135
+
136
+ - [`docs/contracts/context-paths.md`](../../../docs/contracts/context-paths.md)
137
+ — locked path tree (this file lives at `contexts/judges/`).
138
+ - [`docs/contracts/load-context-schema.md`](../../../docs/contracts/load-context-schema.md)
139
+ — frontmatter contract for citing this rubric from a slimmed `judge-*`
140
+ skill in Phase 3a.2.