@event4u/agent-config 3.3.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (561) hide show
  1. package/.agent-src/README.md +2 -2
  2. package/.agent-src/commands/agent-handoff.md +31 -2
  3. package/.agent-src/commands/agent-status.md +5 -5
  4. package/.agent-src/commands/agents/audit.md +8 -8
  5. package/.agent-src/commands/agents/init.md +25 -1
  6. package/.agent-src/commands/agents/optimize.md +3 -3
  7. package/.agent-src/commands/agents/user.md +1 -1
  8. package/.agent-src/commands/agents.md +1 -1
  9. package/.agent-src/commands/analyze-reference-repo.md +1 -1
  10. package/.agent-src/commands/check-current-md.md +8 -8
  11. package/.agent-src/commands/{compress.md → condense.md} +55 -55
  12. package/.agent-src/commands/context/create.md +7 -4
  13. package/.agent-src/commands/context/refactor.md +3 -1
  14. package/.agent-src/commands/feature/dev.md +1 -1
  15. package/.agent-src/commands/feature/explore.md +1 -1
  16. package/.agent-src/commands/feature/plan.md +10 -8
  17. package/.agent-src/commands/feature/refactor.md +3 -1
  18. package/.agent-src/commands/feature/roadmap.md +7 -4
  19. package/.agent-src/commands/fix/portability.md +3 -3
  20. package/.agent-src/commands/fix/refs.md +4 -4
  21. package/.agent-src/commands/ghostwriter.md +2 -2
  22. package/.agent-src/commands/memory/learn-low-impact.md +3 -3
  23. package/.agent-src/commands/module/explore.md +34 -8
  24. package/.agent-src/commands/optimize/agents-dir.md +9 -7
  25. package/.agent-src/commands/optimize/augmentignore.md +2 -2
  26. package/.agent-src/commands/optimize/skills.md +9 -9
  27. package/.agent-src/commands/post-as.md +1 -1
  28. package/.agent-src/commands/project-analyze.md +2 -2
  29. package/.agent-src/commands/project-health.md +3 -2
  30. package/.agent-src/commands/research/deep.md +1 -1
  31. package/.agent-src/commands/research/report.md +1 -1
  32. package/.agent-src/commands/research.md +1 -1
  33. package/.agent-src/commands/roadmap/ai-council.md +1 -1
  34. package/.agent-src/commands/roadmap/create.md +9 -4
  35. package/.agent-src/commands/rule-compliance-audit.md +1 -1
  36. package/.agent-src/commands/upstream-contribute.md +14 -14
  37. package/.agent-src/commands/video/from-script.md +1 -1
  38. package/.agent-src/commands/video/scene.md +1 -1
  39. package/.agent-src/commands/video/stitch.md +1 -1
  40. package/.agent-src/commands/video/storyboard.md +1 -1
  41. package/.agent-src/commands/video.md +1 -1
  42. package/.agent-src/contexts/augment-infrastructure.md +1 -1
  43. package/.agent-src/contexts/authority/commit-mechanics.md +15 -0
  44. package/.agent-src/contexts/authority/kernel-rule-edits.md +3 -3
  45. package/.agent-src/contexts/authority/scope-mechanics.md +1 -1
  46. package/.agent-src/contexts/communication/rules-auto/augment-source-of-truth-mechanics.md +28 -28
  47. package/.agent-src/contexts/communication/rules-auto/skill-quality-mechanics.md +4 -4
  48. package/.agent-src/contexts/communication/rules-auto/think-before-action-mechanics.md +2 -2
  49. package/.agent-src/contexts/contracts/artifact-engagement-flow.md +6 -6
  50. package/.agent-src/contexts/contracts/command-suggestion-flow.md +3 -3
  51. package/.agent-src/contexts/contracts/emergency-triage-block.md +4 -4
  52. package/.agent-src/contexts/contracts/frugality-charter.md +3 -3
  53. package/.agent-src/contexts/documentation-hierarchy.md +14 -7
  54. package/.agent-src/contexts/execution/autonomy-examples.md +1 -1
  55. package/.agent-src/contexts/execution/cheap-question-mechanics.md +39 -2
  56. package/.agent-src/contexts/execution/roadmap-process-loop.md +28 -5
  57. package/.agent-src/contexts/override-system.md +5 -5
  58. package/.agent-src/ghostwriter/fictional-fixture-v1.md +1 -1
  59. package/.agent-src/personas/advisors/first-principles.md +1 -1
  60. package/.agent-src/personas/hollywood-director.md +1 -1
  61. package/.agent-src/rules/architecture.md +5 -1
  62. package/.agent-src/rules/augment-edit-discipline.md +5 -5
  63. package/.agent-src/rules/augment-source-of-truth.md +15 -15
  64. package/.agent-src/rules/commit-conventions.md +1 -1
  65. package/.agent-src/rules/commit-policy.md +10 -0
  66. package/.agent-src/rules/domain-adoption-policy.md +3 -3
  67. package/.agent-src/rules/fast-path-marker-visibility.md +3 -3
  68. package/.agent-src/rules/finance-safety-floor.md +1 -1
  69. package/.agent-src/rules/framework-neutrality-in-generic-skills.md +8 -8
  70. package/.agent-src/rules/git-history-discipline.md +1 -1
  71. package/.agent-src/rules/improve-before-implement.md +2 -2
  72. package/.agent-src/rules/language-and-tone.md +2 -2
  73. package/.agent-src/rules/media-governance-routing.md +5 -5
  74. package/.agent-src/rules/no-attribution-footers.md +1 -0
  75. package/.agent-src/rules/no-cheap-questions.md +3 -0
  76. package/.agent-src/rules/no-decorative-emojis-in-git-surfaces.md +111 -0
  77. package/.agent-src/rules/no-pr-progress-comments.md +118 -0
  78. package/.agent-src/rules/no-roadmap-references.md +3 -3
  79. package/.agent-src/rules/non-destructive-by-default.md +1 -1
  80. package/.agent-src/rules/persona-governance.md +3 -3
  81. package/.agent-src/rules/preservation-guard.md +15 -15
  82. package/.agent-src/rules/roadmap-ci-steps-policy.md +7 -3
  83. package/.agent-src/rules/rule-type-governance.md +1 -1
  84. package/.agent-src/rules/skill-quality.md +1 -1
  85. package/.agent-src/rules/{caveman-speak.md → telegraph-speak.md} +15 -15
  86. package/.agent-src/rules/token-optimizer-maintenance.md +6 -6
  87. package/.agent-src/skills/agent-docs-writing/SKILL.md +17 -11
  88. package/.agent-src/skills/agents-md-thin-root/SKILL.md +9 -9
  89. package/.agent-src/skills/check-refs/SKILL.md +2 -2
  90. package/.agent-src/skills/code-refactoring/SKILL.md +2 -2
  91. package/.agent-src/skills/command-writing/SKILL.md +19 -19
  92. package/.agent-src/skills/comp-banding/SKILL.md +1 -1
  93. package/.agent-src/skills/condense-memory/SKILL.md +131 -0
  94. package/.agent-src/skills/context-authoring/SKILL.md +2 -2
  95. package/.agent-src/skills/context-document/SKILL.md +5 -3
  96. package/.agent-src/skills/copilot-agents-optimization/SKILL.md +3 -3
  97. package/.agent-src/skills/description-assist/SKILL.md +2 -2
  98. package/.agent-src/skills/git-workflow/SKILL.md +1 -1
  99. package/.agent-src/skills/guideline-writing/SKILL.md +5 -5
  100. package/.agent-src/skills/learning-to-rule-or-skill/SKILL.md +4 -4
  101. package/.agent-src/skills/lint-skills/SKILL.md +3 -3
  102. package/.agent-src/skills/md-language-check/SKILL.md +2 -2
  103. package/.agent-src/skills/module-detect-on-the-fly/SKILL.md +138 -0
  104. package/.agent-src/skills/module-management/SKILL.md +166 -94
  105. package/.agent-src/skills/override-management/SKILL.md +1 -1
  106. package/.agent-src/skills/persona-writing/SKILL.md +5 -5
  107. package/.agent-src/skills/positioning-strategy/SKILL.md +1 -1
  108. package/.agent-src/skills/project-docs/SKILL.md +6 -4
  109. package/.agent-src/skills/readme-reviewer/SKILL.md +2 -2
  110. package/.agent-src/skills/roadmap-management/SKILL.md +13 -1
  111. package/.agent-src/skills/roadmap-writing/SKILL.md +4 -2
  112. package/.agent-src/skills/rule-refactor/SKILL.md +5 -5
  113. package/.agent-src/skills/rule-writing/SKILL.md +18 -18
  114. package/.agent-src/skills/script-writing/SKILL.md +1 -1
  115. package/.agent-src/skills/skill-improvement-pipeline/SKILL.md +6 -6
  116. package/.agent-src/skills/skill-management/SKILL.md +21 -21
  117. package/.agent-src/skills/skill-reviewer/SKILL.md +2 -2
  118. package/.agent-src/skills/skill-writing/SKILL.md +8 -8
  119. package/.agent-src/skills/skill-writing/evals/triggers.json +1 -1
  120. package/.agent-src/skills/token-optimizer/SKILL.md +4 -4
  121. package/.agent-src/skills/unit-economics-modeling/SKILL.md +1 -1
  122. package/.agent-src/skills/upstream-contribute/SKILL.md +17 -17
  123. package/.agent-src/templates/AGENTS.md +1 -0
  124. package/.agent-src/templates/agent-settings.md +24 -13
  125. package/.agent-src/templates/agents/agent-project-settings.example.yml +61 -2
  126. package/.agent-src/templates/command.md +5 -5
  127. package/.agent-src/templates/contexts.md +1 -1
  128. package/.agent-src/templates/copilot-instructions.md +8 -8
  129. package/.agent-src/templates/features.md +1 -1
  130. package/.agent-src/templates/hooks/pre-commit-frontmatter +2 -2
  131. package/.agent-src/templates/hooks/pre-commit-roadmap-progress +3 -3
  132. package/.agent-src/templates/persona.md +2 -2
  133. package/.agent-src/templates/roadmaps.md +1 -1
  134. package/.agent-src/templates/rule.md +13 -13
  135. package/.agent-src/templates/scripts/memory_lookup.py +1 -1
  136. package/.agent-src/templates/scripts/memory_status.py +2 -2
  137. package/.agent-src/templates/scripts/work_engine/_lib/agent_settings.py +195 -1
  138. package/.agent-src/templates/scripts/work_engine/orchestration.py +1 -1
  139. package/.agent-src/templates/skill-archive-note.md +5 -5
  140. package/.agent-src/templates/skill.md +1 -1
  141. package/.claude-plugin/marketplace.json +4 -4
  142. package/AGENTS.md +16 -17
  143. package/CHANGELOG.md +216 -3
  144. package/CONTRIBUTING.md +31 -12
  145. package/README.md +21 -12
  146. package/config/agent-settings.template.yml +22 -2
  147. package/config/discovery/unassigned-artefacts.yml +24 -24
  148. package/config/profiles/full.ini +1 -1
  149. package/dist/cli/agent-config.js +52 -3
  150. package/dist/cli/agent-config.js.map +1 -1
  151. package/dist/cli/commands/uiServe.js +9 -0
  152. package/dist/cli/commands/uiServe.js.map +1 -1
  153. package/dist/cli/registry.js +2 -1
  154. package/dist/cli/registry.js.map +1 -1
  155. package/dist/discovery/deprecation-report.md +1 -1
  156. package/dist/discovery/discovery-manifest.json +649 -606
  157. package/dist/discovery/discovery-manifest.json.sha256 +1 -1
  158. package/dist/discovery/discovery-manifest.summary.md +4 -4
  159. package/dist/discovery/orphan-report.md +1 -1
  160. package/dist/discovery/packs.json +439 -437
  161. package/dist/discovery/trust-report.md +5 -5
  162. package/dist/discovery/workspaces.json +450 -448
  163. package/dist/install/atomic.js +92 -0
  164. package/dist/install/atomic.js.map +1 -0
  165. package/dist/install/conflict.js +196 -0
  166. package/dist/install/conflict.js.map +1 -0
  167. package/dist/install/detect.js +218 -0
  168. package/dist/install/detect.js.map +1 -0
  169. package/dist/install/paths.js +82 -0
  170. package/dist/install/paths.js.map +1 -0
  171. package/dist/install/plan.js +157 -0
  172. package/dist/install/plan.js.map +1 -0
  173. package/dist/install/txlog.js +140 -0
  174. package/dist/install/txlog.js.map +1 -0
  175. package/dist/install/types.js +19 -0
  176. package/dist/install/types.js.map +1 -0
  177. package/dist/install/wizard-plan.js +184 -0
  178. package/dist/install/wizard-plan.js.map +1 -0
  179. package/dist/mcp/registry-manifest.json +4 -4
  180. package/dist/router.json +67 -19
  181. package/dist/server/app.js +6 -0
  182. package/dist/server/app.js.map +1 -1
  183. package/dist/server/routes/install.js +358 -0
  184. package/dist/server/routes/install.js.map +1 -0
  185. package/dist/server/routes/wizard.js +468 -32
  186. package/dist/server/routes/wizard.js.map +1 -1
  187. package/dist/server/routes/workspace.js +396 -0
  188. package/dist/server/routes/workspace.js.map +1 -0
  189. package/dist/server/schemas/settings.js +5 -3
  190. package/dist/server/schemas/settings.js.map +1 -1
  191. package/dist/ui/assets/index-BDAhhpDV.js +40 -0
  192. package/dist/ui/assets/index-BDAhhpDV.js.map +1 -0
  193. package/dist/ui/assets/index-BXZILUxe.css +1 -0
  194. package/dist/ui/index.html +2 -2
  195. package/docs/MIGRATION.md +1 -1
  196. package/docs/adrs/cost/0001-hard-stop-hook.md +1 -1
  197. package/docs/adrs/router/0001-three-tier-routing.md +4 -4
  198. package/docs/adrs/schema/0001-json-schema-frontmatter.md +1 -1
  199. package/docs/adrs/smoke/0001-per-tier-smoke-scripts.md +4 -4
  200. package/docs/adrs/{caveman → telegraph}/0001-default-off-until-bench.md +9 -9
  201. package/docs/adrs/telegraph/README.md +9 -0
  202. package/docs/architecture/augment-projection.md +4 -4
  203. package/docs/architecture/claude-bundle.md +1 -1
  204. package/docs/architecture/current-onboard-baseline.md +3 -3
  205. package/docs/architecture/multi-tool-projection.md +10 -10
  206. package/docs/architecture/source-projection.md +27 -27
  207. package/docs/architecture.md +19 -15
  208. package/docs/archive/CHANGELOG-pre-2.11.0.md +2 -2
  209. package/docs/archive/CHANGELOG-pre-2.15.0.md +3 -3
  210. package/docs/archive/CHANGELOG-pre-2.16.0.md +1 -1
  211. package/docs/archive/CHANGELOG-pre-2.2.0.md +70 -70
  212. package/docs/archive/CHANGELOG-pre-2.20.0.md +2 -2
  213. package/docs/archive/CHANGELOG-pre-2.25.0.md +15 -15
  214. package/docs/archive/CHANGELOG-pre-3.0.0.md +4 -4
  215. package/docs/archive/CHANGELOG-pre-3.1.0.md +2 -2
  216. package/docs/archive/CHANGELOG-pre-3.2.0.md +3 -3
  217. package/docs/benchmark.md +65 -0
  218. package/docs/benchmarks.md +16 -16
  219. package/docs/catalog.md +17 -15
  220. package/docs/contracts/CHANGELOG-conventions.md +1 -1
  221. package/docs/contracts/STABILITY.md +2 -2
  222. package/docs/contracts/adoption-signal-floor.md +110 -0
  223. package/docs/contracts/adr-chat-history-split.md +4 -4
  224. package/docs/contracts/adr-command-suggestion.md +4 -4
  225. package/docs/contracts/adr-gtm-context-spine.md +1 -1
  226. package/docs/contracts/adr-implement-ticket-runtime.md +4 -4
  227. package/docs/contracts/adr-install-user-type-axis.md +1 -1
  228. package/docs/contracts/adr-layout.md +2 -2
  229. package/docs/contracts/adr-product-ui-track.md +10 -10
  230. package/docs/contracts/adr-user-types-axis.md +3 -3
  231. package/docs/contracts/adr-wing4-context-spine.md +1 -1
  232. package/docs/contracts/agent-memory-contract.md +3 -3
  233. package/docs/contracts/agents-md-tech-stack.md +2 -2
  234. package/docs/contracts/ai-council-config.md +2 -2
  235. package/docs/contracts/at-rest-encryption.md +4 -0
  236. package/docs/contracts/audit-log-v1.md +1 -1
  237. package/docs/contracts/benchmark-ab-contract.md +101 -0
  238. package/docs/contracts/benchmark-corpus-spec.md +1 -1
  239. package/docs/contracts/branch-protection-policy.md +98 -0
  240. package/docs/contracts/ci-cost-budget.md +106 -0
  241. package/docs/contracts/ci-green-floor.md +141 -0
  242. package/docs/contracts/command-clusters.md +6 -6
  243. package/docs/contracts/command-surface-tiers.md +2 -2
  244. package/docs/contracts/command-taxonomy.md +2 -2
  245. package/docs/contracts/{compression-default-kill-criterion.md → condensation-default-kill-criterion.md} +29 -29
  246. package/docs/contracts/config-presets.md +1 -1
  247. package/docs/contracts/context-paths.md +3 -3
  248. package/docs/contracts/context-spine.md +1 -1
  249. package/docs/contracts/cost-summary-schema.md +12 -12
  250. package/docs/contracts/cross-wing-handoff.md +4 -4
  251. package/docs/contracts/daily-workspace.md +4 -0
  252. package/docs/contracts/decision-trace-v1.md +2 -2
  253. package/docs/contracts/discovery-manifest.md +4 -4
  254. package/docs/contracts/explain-modes.md +4 -0
  255. package/docs/contracts/file-ownership-matrix.json +3493 -3318
  256. package/docs/contracts/file-ownership-matrix.md +3 -3
  257. package/docs/contracts/frontmatter-contract.md +4 -4
  258. package/docs/contracts/ghostwriter-schema.md +3 -3
  259. package/docs/contracts/gui-wizard.md +110 -97
  260. package/docs/contracts/harness-expectations.md +123 -0
  261. package/docs/contracts/host-agent-protocol.md +4 -0
  262. package/docs/contracts/implement-ticket-flow.md +9 -9
  263. package/docs/contracts/install-scopes.md +77 -0
  264. package/docs/contracts/iron-law-overrides.txt +1 -1
  265. package/docs/contracts/kernel-membership.md +26 -26
  266. package/docs/contracts/linear-ai-rules-inclusion.md +1 -1
  267. package/docs/contracts/linter-structural-model.md +2 -2
  268. package/docs/contracts/load-context-budget-model.md +4 -4
  269. package/docs/contracts/load-context-schema.md +13 -13
  270. package/docs/contracts/local-analytics.md +4 -0
  271. package/docs/contracts/local-knowledge-ingestion.md +1 -1
  272. package/docs/contracts/mcp-cloud-scope.md +2 -2
  273. package/docs/contracts/mcp-phase-1-scope.md +3 -3
  274. package/docs/contracts/measurement-baseline.md +5 -5
  275. package/docs/contracts/mental-models.md +30 -30
  276. package/docs/contracts/multi-tool-projection-fidelity.md +4 -4
  277. package/docs/contracts/namespace.md +4 -4
  278. package/docs/contracts/orchestration-dsl-v1.md +7 -7
  279. package/docs/contracts/package-self-orientation.md +12 -12
  280. package/docs/contracts/persona-schema.md +6 -6
  281. package/docs/contracts/pilot/language-and-tone.md +1 -1
  282. package/docs/contracts/plain-language-surface.md +117 -0
  283. package/docs/contracts/profile-system.md +3 -3
  284. package/docs/contracts/release-pr-gating.md +103 -0
  285. package/docs/contracts/role-experience.md +3 -3
  286. package/docs/contracts/rule-classification.md +13 -13
  287. package/docs/contracts/rule-interactions.md +4 -4
  288. package/docs/contracts/rule-interactions.yml +30 -30
  289. package/docs/contracts/rule-priority-hierarchy.md +13 -13
  290. package/docs/contracts/rule-router.md +2 -2
  291. package/docs/contracts/safety-model.md +1 -1
  292. package/docs/contracts/skill-distribution-channels.md +61 -0
  293. package/docs/contracts/skill-domains.md +2 -2
  294. package/docs/contracts/smoke-contracts.md +5 -5
  295. package/docs/contracts/telegraph-telemetry.md +83 -0
  296. package/docs/contracts/trust-and-safety.md +5 -5
  297. package/docs/contracts/ui-stack-extension.md +7 -7
  298. package/docs/contracts/ui-track-flow.md +9 -9
  299. package/docs/contracts/user-type-schema.md +4 -4
  300. package/docs/contracts/workflow-packs.md +4 -4
  301. package/docs/contracts/workspace-documents.md +4 -0
  302. package/docs/customization.md +28 -8
  303. package/docs/decisions/ADR-001-kernel-swap-deferred.md +6 -6
  304. package/docs/decisions/ADR-002-kernel-bucket-overrides.md +11 -11
  305. package/docs/decisions/ADR-003-flat-cluster-subs-and-colon-syntax.md +2 -2
  306. package/docs/decisions/ADR-004-rule-governance-pruning.md +4 -4
  307. package/docs/decisions/ADR-005-subagent-worktrees.md +7 -7
  308. package/docs/decisions/ADR-011-domain-pack-readiness.md +6 -6
  309. package/docs/decisions/ADR-013-discovery-frontmatter-contract.md +3 -3
  310. package/docs/decisions/ADR-015-discovery-manifest-contract.md +3 -3
  311. package/docs/decisions/ADR-017-monorepo-physical-layout.md +10 -10
  312. package/docs/decisions/ADR-018-trust-and-safety-layer.md +6 -6
  313. package/docs/decisions/ADR-019-router-json-dist-location.md +2 -2
  314. package/docs/decisions/ADR-020-global-only-consumer-scope.md +2 -2
  315. package/docs/decisions/ADR-021-deployment-shape.md +3 -3
  316. package/docs/decisions/ADR-022-daily-workspace-decomposition.md +1 -1
  317. package/docs/decisions/ADR-027-changelog-machine-vs-manual.md +2 -2
  318. package/docs/decisions/ADR-028-root-layout.md +7 -7
  319. package/docs/decisions/ADR-029-multi-workspace-deferred.md +2 -2
  320. package/docs/decisions/ADR-rule-kernel-and-router.md +5 -5
  321. package/docs/deploy/connector-setup.md +2 -2
  322. package/docs/deploy/policy-cookbook.md +2 -2
  323. package/docs/deploy/team-deployment-posture.md +20 -0
  324. package/docs/development.md +17 -17
  325. package/docs/distribution/registries.md +32 -0
  326. package/docs/distribution/registry-submissions.md +85 -0
  327. package/docs/distribution/telemetry-schema.md +1 -1
  328. package/docs/getting-started-by-role.md +45 -3
  329. package/docs/getting-started.md +2 -2
  330. package/docs/guidelines/agent-infra/5w2h-analysis.md +3 -3
  331. package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +1 -1
  332. package/docs/guidelines/agent-infra/asking-and-brevity-examples.md +3 -3
  333. package/docs/guidelines/agent-infra/carve-out-predicates.md +3 -3
  334. package/docs/guidelines/agent-infra/critical-thinking.md +4 -4
  335. package/docs/guidelines/agent-infra/direct-answers-demos.md +1 -1
  336. package/docs/guidelines/agent-infra/first-principles.md +2 -2
  337. package/docs/guidelines/agent-infra/inversion-thinking.md +5 -5
  338. package/docs/guidelines/agent-infra/layered-settings.md +56 -2
  339. package/docs/guidelines/agent-infra/mental-models.md +3 -3
  340. package/docs/guidelines/agent-infra/roadmap-progress-mechanics.md +2 -2
  341. package/docs/guidelines/agent-infra/rule-type-governance.md +1 -1
  342. package/docs/guidelines/agent-infra/scqa-framework.md +5 -5
  343. package/docs/guidelines/agent-infra/self-improvement-pipeline.md +2 -2
  344. package/docs/guidelines/agent-infra/six-hats.md +3 -3
  345. package/docs/guidelines/agent-infra/skill-quality-checklist.md +5 -5
  346. package/docs/guidelines/agent-infra/systems-thinking.md +1 -1
  347. package/docs/guidelines/agent-infra/verify-before-complete-demos.md +1 -1
  348. package/docs/guidelines/augment-portability-patterns.md +4 -4
  349. package/docs/guidelines/cross-role-handoff.md +2 -2
  350. package/docs/guidelines/php/php-coding-patterns.md +1 -1
  351. package/docs/guidelines/prompt-templates.md +6 -6
  352. package/docs/maintainers/dev-mode.md +1 -1
  353. package/docs/mcp.md +1 -1
  354. package/docs/parity/bench.json +3 -3
  355. package/docs/parity/ruflo.md +2 -2
  356. package/docs/profiles.md +11 -11
  357. package/docs/quality.md +11 -11
  358. package/docs/safety.md +3 -3
  359. package/docs/setup/mcp-client-config.md +1 -1
  360. package/docs/setup/mcp-r2-bootstrap.md +1 -1
  361. package/docs/setup/mcp-server-docker.md +3 -3
  362. package/docs/setup/per-ide/windsurf.md +1 -1
  363. package/docs/skills-catalog.md +8 -7
  364. package/docs/troubleshooting.md +1 -1
  365. package/docs/walkthroughs/daily-workspace-a11y.md +87 -0
  366. package/llms.txt +7 -6
  367. package/package.json +1 -1
  368. package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
  369. package/scripts/_archive/README.md +2 -2
  370. package/scripts/_archive/_backfill_skill_domains.py +3 -3
  371. package/scripts/_archive/_bootstrap_tier_frontmatter.py +3 -3
  372. package/scripts/_archive/_p43_bodies.py +10 -10
  373. package/scripts/_archive/{_p43_compress.py → _p43_condense.py} +5 -5
  374. package/scripts/_archive/_p4_migrate.py +7 -7
  375. package/scripts/_archive/_phase2_shim_helper.py +1 -1
  376. package/scripts/_archive/_pilot_council_question.py +5 -5
  377. package/scripts/_cli/explain_last/inputs.py +1 -1
  378. package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
  379. package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
  380. package/scripts/_lib/agent_settings.py +195 -1
  381. package/scripts/_lib/agent_src.py +19 -19
  382. package/scripts/_lib/bench_ab_cache.py +162 -0
  383. package/scripts/_lib/bench_ab_scoring.py +209 -0
  384. package/scripts/_lib/{bench_caveman.py → bench_telegraph.py} +21 -21
  385. package/scripts/_lib/{bench_caveman_report.py → bench_telegraph_report.py} +21 -21
  386. package/scripts/_lib/claude_desktop_bundler.py +5 -5
  387. package/scripts/_lib/module_detection.py +223 -0
  388. package/scripts/_lib/scope_guard.sh +162 -0
  389. package/scripts/_phase4_bucket.py +3 -3
  390. package/scripts/_pilot_measure.py +4 -4
  391. package/scripts/_tmp_scan_framework_leakage.py +1 -1
  392. package/scripts/adoption_report.py +195 -0
  393. package/scripts/adoption_snapshot.py +219 -0
  394. package/scripts/adoption_status.py +166 -0
  395. package/scripts/ai-video/lib/parse-blueprint.sh +1 -1
  396. package/scripts/ai_council/advisors.py +5 -5
  397. package/scripts/ai_council/compile_corpus.py +1 -1
  398. package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +3 -3
  399. package/scripts/ai_council/one_off_archive/2026-05/_one_off_context_layer_v1_review.py +2 -2
  400. package/scripts/ai_council/one_off_archive/2026-05/_one_off_inject_quiet_flag.py +1 -1
  401. package/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_v2.sh +1 -1
  402. package/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_verbosity.sh +1 -1
  403. package/scripts/ai_council/one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py +3 -3
  404. package/scripts/ai_council/one_off_archive/2026-05/_one_off_per_task.sh +1 -1
  405. package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py +1 -1
  406. package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py +6 -6
  407. package/scripts/ai_council/one_off_archive/2026-05/_one_off_rebalancing_audit.py +1 -1
  408. package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +6 -6
  409. package/scripts/annotate_discovery.py +13 -13
  410. package/scripts/apply_modules_config.py +290 -0
  411. package/scripts/audit_adr_coverage.py +2 -2
  412. package/scripts/audit_auto_rules.py +2 -2
  413. package/scripts/audit_cloud_compatibility.py +3 -3
  414. package/scripts/audit_command_surface.py +9 -9
  415. package/scripts/audit_likelihood.py +2 -2
  416. package/scripts/audit_user_type_axis.py +2 -2
  417. package/scripts/bench_ab_cache_dispatch.py +68 -0
  418. package/scripts/bench_ab_clone.py +170 -0
  419. package/scripts/bench_ab_diff.py +227 -0
  420. package/scripts/bench_ab_integrity.py +143 -0
  421. package/scripts/bench_ab_run.py +235 -0
  422. package/scripts/bench_ab_task_runner.py +369 -0
  423. package/scripts/bench_ab_tracka_run.py +202 -0
  424. package/scripts/{bench_compress_memory.py → bench_condense_memory.py} +16 -16
  425. package/scripts/bench_run.py +33 -33
  426. package/scripts/bench_runner.py +2 -2
  427. package/scripts/bootstrap.sh +99 -0
  428. package/scripts/build_cloud_bundle.py +6 -6
  429. package/scripts/build_discovery_manifest.py +7 -7
  430. package/scripts/build_linear_digest.py +3 -3
  431. package/scripts/build_rule_trigger_matrix.py +8 -8
  432. package/scripts/chat_history.py +5 -5
  433. package/scripts/check_always_budget.py +11 -5
  434. package/scripts/check_augment_description_cap.py +3 -3
  435. package/scripts/check_cluster_patterns.py +2 -2
  436. package/scripts/check_command_count_messaging.py +3 -3
  437. package/scripts/{check_compression.py → check_condensation.py} +34 -34
  438. package/scripts/{check_compressed_paths.py → check_condensed_paths.py} +8 -8
  439. package/scripts/check_context_paths.py +7 -7
  440. package/scripts/check_council_layout.py +2 -2
  441. package/scripts/check_council_references.py +9 -9
  442. package/scripts/check_iron_law_prominence.py +2 -2
  443. package/scripts/check_kernel_rule_bundle.py +2 -2
  444. package/scripts/check_module_management_neutral.py +149 -0
  445. package/scripts/check_no_roadmap_refs.py +9 -9
  446. package/scripts/check_portability.py +3 -3
  447. package/scripts/check_public_catalog_links.py +4 -4
  448. package/scripts/check_references.py +7 -6
  449. package/scripts/check_release_pr_shape.py +112 -0
  450. package/scripts/check_reply_consistency.py +3 -3
  451. package/scripts/check_safety_floor_untouched.py +1 -1
  452. package/scripts/check_template_pin_drift.py +5 -5
  453. package/scripts/check_token_optimizer_freshness.py +3 -3
  454. package/scripts/ci_status.py +301 -0
  455. package/scripts/ci_time_ratio.py +1 -1
  456. package/scripts/cleanup_other_scope.sh +146 -0
  457. package/scripts/compile_router.py +10 -10
  458. package/scripts/{compress.py → condense.py} +64 -64
  459. package/scripts/condense.sh +18 -0
  460. package/scripts/{compress_memory.py → condense_memory.py} +33 -33
  461. package/scripts/config/presets.py +2 -2
  462. package/scripts/config/profiles.py +1 -1
  463. package/scripts/cost_by_conversation.py +3 -3
  464. package/scripts/cost_summary.py +7 -7
  465. package/scripts/count_token_optimizer_usage.sh +1 -1
  466. package/scripts/gen_discovery_baseline.py +5 -5
  467. package/scripts/generate_index.py +6 -6
  468. package/scripts/generate_ownership_matrix.py +10 -10
  469. package/scripts/generate_pack_manifests.py +1 -1
  470. package/scripts/ghostwriter_fixture_allowlist.txt +1 -1
  471. package/scripts/install +3 -3
  472. package/scripts/install-hooks.sh +6 -6
  473. package/scripts/install.py +273 -45
  474. package/scripts/install.sh +187 -1
  475. package/scripts/inventory_frontmatter.py +2 -2
  476. package/scripts/iron_law_sha.py +3 -3
  477. package/scripts/lint_agents_layout.py +14 -7
  478. package/scripts/lint_agents_md.py +4 -4
  479. package/scripts/lint_archived_skills.py +3 -3
  480. package/scripts/lint_artefact_frontmatter.py +2 -2
  481. package/scripts/lint_bench_ab.py +172 -0
  482. package/scripts/lint_bench_corpus.py +1 -1
  483. package/scripts/lint_command_tiers.py +5 -5
  484. package/scripts/lint_context_spine_usage.py +1 -1
  485. package/scripts/lint_framework_leakage.py +7 -7
  486. package/scripts/lint_framework_leakage_allowlist.json +152 -84
  487. package/scripts/lint_ghostwriter_source.py +3 -3
  488. package/scripts/lint_handoffs.py +1 -1
  489. package/scripts/lint_load_context.py +11 -11
  490. package/scripts/lint_media_policy_linkage.py +5 -5
  491. package/scripts/lint_namespace.py +1 -1
  492. package/scripts/lint_no_new_atomic_commands.py +2 -2
  493. package/scripts/lint_orchestration_dsl.py +1 -1
  494. package/scripts/lint_pack_boundaries.py +2 -2
  495. package/scripts/lint_persona_governance.py +4 -4
  496. package/scripts/lint_role_experiences.py +237 -0
  497. package/scripts/lint_rule_interactions.py +2 -2
  498. package/scripts/lint_rule_tiers.py +1 -1
  499. package/scripts/lint_trust_coherence.py +2 -2
  500. package/scripts/mcp_registry_submit.sh +187 -0
  501. package/scripts/mcp_server/tools.py +1 -1
  502. package/scripts/measure_frugality_savings.py +10 -10
  503. package/scripts/measure_patterns.py +1 -1
  504. package/scripts/measure_projection_bytes.py +5 -5
  505. package/scripts/measure_rule_budget.py +3 -3
  506. package/scripts/measure_skill_reduction.py +1 -1
  507. package/scripts/memory_lookup.py +1 -1
  508. package/scripts/memory_status.py +2 -2
  509. package/scripts/migrate_command_suggestions.py +3 -3
  510. package/scripts/mine_session.py +1 -1
  511. package/scripts/move_artefact.py +3 -3
  512. package/scripts/new_skill.py +2 -2
  513. package/scripts/pack_mcp_content.py +9 -9
  514. package/scripts/plan_physical_move.py +6 -6
  515. package/scripts/print_required_checks.py +196 -0
  516. package/scripts/probe_skill_registration.py +413 -0
  517. package/scripts/propose_modules_config.py +145 -0
  518. package/scripts/prototype_lint_contradictions.py +1 -1
  519. package/scripts/recruit_preflight.sh +152 -0
  520. package/scripts/refine_ticket_detect.py +3 -3
  521. package/scripts/release.py +20 -0
  522. package/scripts/render_benchmark_md.py +308 -0
  523. package/scripts/roadmap_progress_hook.py +1 -1
  524. package/scripts/run_skill_evals.py +2 -2
  525. package/scripts/runtime_registry.py +4 -4
  526. package/scripts/schemas/command.schema.json +4 -4
  527. package/scripts/schemas/rule.schema.json +5 -5
  528. package/scripts/schemas/skill.schema.json +3 -3
  529. package/scripts/schemas/user-type.schema.json +1 -1
  530. package/scripts/score_skill_selection.py +1 -1
  531. package/scripts/skill_collision_clusters.py +2 -2
  532. package/scripts/skill_linter.py +81 -81
  533. package/scripts/skill_overlap.py +5 -5
  534. package/scripts/skill_tools/audit_persona_coverage.py +2 -2
  535. package/scripts/skill_tools/audit_user_type_coverage.py +2 -2
  536. package/scripts/skill_tools/run_block_d_eval.py +1 -1
  537. package/scripts/skill_tools/score_skill_relevance.py +1 -1
  538. package/scripts/skill_tools/suggest_skill_for_task.py +1 -1
  539. package/scripts/skill_trigger_eval.py +3 -3
  540. package/scripts/smoke/kernel.sh +7 -1
  541. package/scripts/smoke/router.sh +5 -5
  542. package/scripts/smoke/skills.sh +1 -1
  543. package/scripts/smoke_quickstart.py +1 -1
  544. package/scripts/snapshot_agent_outputs.py +3 -3
  545. package/scripts/spotcheck_thin_root.py +1 -1
  546. package/scripts/{caveman_stats.py → telegraph_stats.py} +18 -18
  547. package/scripts/update_counts.py +1 -1
  548. package/scripts/validate_decision_engine.py +1 -1
  549. package/scripts/validate_frontmatter.py +1 -1
  550. package/scripts/validate_safe_paths.py +3 -3
  551. package/scripts/{validate_caveman_carveouts.py → validate_telegraph_carveouts.py} +7 -7
  552. package/scripts/verify_roadmap_closure.py +6 -6
  553. package/templates/consumer-settings/ONBOARDING.md +41 -0
  554. package/.agent-src/commands/install-via-agent.md +0 -129
  555. package/.agent-src/skills/compress-memory/SKILL.md +0 -131
  556. package/dist/ui/assets/index-D-DY1ywI.js +0 -35
  557. package/dist/ui/assets/index-D-DY1ywI.js.map +0 -1
  558. package/dist/ui/assets/index-Dqfhmg-d.css +0 -1
  559. package/docs/adrs/caveman/README.md +0 -9
  560. package/docs/contracts/caveman-telemetry.md +0 -83
  561. package/scripts/compress.sh +0 -18
@@ -0,0 +1,170 @@
1
+ #!/usr/bin/env python3
2
+ """Materialise the `with` and `without` clones for the package-impact A/B bench.
3
+
4
+ Phase 1 Step 2 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
5
+
6
+ The fixture lives at `internal/bench/ab/fixture/`. Both clones are byte-identical
7
+ copies of the fixture; the `with` clone additionally receives the agent-config
8
+ surface (`.claude/`, `.augment/`, `AGENTS.md`, `CLAUDE.md`) so a Claude Code
9
+ session run inside it sees the same files a consumer project would after
10
+ running the installer.
11
+
12
+ Idempotent: re-running without `--refresh` leaves an existing clone alone. With
13
+ `--refresh`, the target clone is removed and rebuilt from scratch.
14
+
15
+ The clones tree (`internal/bench/ab/clones/`) is gitignored — only this script's
16
+ output schema is committed.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import hashlib
22
+ import json
23
+ import os
24
+ import shutil
25
+ import sys
26
+ from pathlib import Path
27
+
28
+ REPO_ROOT = Path(__file__).resolve().parents[1]
29
+ AB_ROOT = REPO_ROOT / "internal" / "bench" / "ab"
30
+ FIXTURE = AB_ROOT / "fixture"
31
+ CLONES = AB_ROOT / "clones"
32
+
33
+ # Surfaces the `with` clone inherits from the package root.
34
+ WITH_SURFACES = (
35
+ ".claude",
36
+ ".augment",
37
+ "AGENTS.md",
38
+ "CLAUDE.md",
39
+ )
40
+
41
+
42
+ def die(msg: str) -> None:
43
+ sys.stderr.write(f"bench_ab_clone: {msg}\n")
44
+ raise SystemExit(1)
45
+
46
+
47
+ def copytree_preserve(src: Path, dst: Path) -> None:
48
+ """Copy tree, dereferencing symlinks.
49
+
50
+ The package installs the `.claude/` surface as a tree of symlinks into
51
+ `.agent-src/`. Cloning the surface as symlinks would carry pointers that
52
+ resolve against the package root, not the clone — meaning a Claude Code
53
+ session inside the clone could not actually read the rule bodies. Deref
54
+ at copy time produces standalone files inside the clone.
55
+ """
56
+ if dst.exists():
57
+ shutil.rmtree(dst)
58
+ shutil.copytree(src, dst, symlinks=False)
59
+
60
+
61
+ def materialise_clone(variant: str, target: Path) -> None:
62
+ """Copy the fixture into the target, then layer the variant-specific surface."""
63
+ target.mkdir(parents=True, exist_ok=True)
64
+ # Mirror the fixture
65
+ for entry in FIXTURE.iterdir():
66
+ dest = target / entry.name
67
+ if entry.is_dir():
68
+ copytree_preserve(entry, dest)
69
+ else:
70
+ shutil.copy2(entry, dest)
71
+ # Layer the agent-config surface onto the `with` variant
72
+ if variant == "with":
73
+ for surface in WITH_SURFACES:
74
+ src = REPO_ROOT / surface
75
+ if not src.exists():
76
+ # Best-effort: a missing surface is reported but does not fail
77
+ sys.stderr.write(
78
+ f"bench_ab_clone: surface '{surface}' missing in package root; "
79
+ "with-clone may not be representative\n"
80
+ )
81
+ continue
82
+ dest = target / surface
83
+ if src.is_dir():
84
+ copytree_preserve(src, dest)
85
+ else:
86
+ shutil.copy2(src, dest)
87
+
88
+
89
+ def target_shape_hash() -> str:
90
+ """Stable hash of the fixture tree + the with-surface list.
91
+
92
+ Used by Phase 2's cache key. Recomputing this here keeps the cache code
93
+ and the clone code reading the same surface definition.
94
+ """
95
+ h = hashlib.sha256()
96
+ h.update(b"with-surfaces:" + json.dumps(WITH_SURFACES).encode() + b"\n")
97
+ for path in sorted(FIXTURE.rglob("*")):
98
+ if not path.is_file():
99
+ continue
100
+ rel = path.relative_to(FIXTURE).as_posix()
101
+ h.update(f"{rel}\n".encode())
102
+ h.update(path.read_bytes())
103
+ h.update(b"\n")
104
+ return h.hexdigest()[:16]
105
+
106
+
107
+ def write_manifest(variant: str, target: Path) -> None:
108
+ """Drop a small manifest so other scripts can verify the clone shape."""
109
+ manifest = {
110
+ "variant": variant,
111
+ "target_shape_hash": target_shape_hash(),
112
+ "with_surfaces": list(WITH_SURFACES),
113
+ "fixture_relpath": FIXTURE.relative_to(REPO_ROOT).as_posix(),
114
+ }
115
+ (target / ".bench-ab-manifest.json").write_text(
116
+ json.dumps(manifest, indent=2) + "\n"
117
+ )
118
+
119
+
120
+ def clone(variant: str, *, refresh: bool) -> Path:
121
+ target = CLONES / variant
122
+ if target.exists() and not refresh:
123
+ sys.stdout.write(f"bench_ab_clone: {variant} clone already present at {target} (use --refresh to rebuild)\n")
124
+ return target
125
+ if target.exists():
126
+ shutil.rmtree(target)
127
+ materialise_clone(variant, target)
128
+ write_manifest(variant, target)
129
+ sys.stdout.write(f"bench_ab_clone: built {variant} clone at {target}\n")
130
+ return target
131
+
132
+
133
+ def parse_args(argv: list[str]) -> argparse.Namespace:
134
+ parser = argparse.ArgumentParser(
135
+ description="Materialise `with` and `without` clones for the A/B bench."
136
+ )
137
+ parser.add_argument(
138
+ "--refresh",
139
+ action="store_true",
140
+ help="Force-rebuild even if the clone already exists.",
141
+ )
142
+ parser.add_argument(
143
+ "--variant",
144
+ choices=("with", "without", "both"),
145
+ default="both",
146
+ help="Which clone to materialise (default: both).",
147
+ )
148
+ parser.add_argument(
149
+ "--print-shape-hash",
150
+ action="store_true",
151
+ help="Print the target-shape hash and exit without cloning.",
152
+ )
153
+ return parser.parse_args(argv)
154
+
155
+
156
+ def main(argv: list[str] | None = None) -> int:
157
+ args = parse_args(argv if argv is not None else sys.argv[1:])
158
+ if not FIXTURE.exists():
159
+ die(f"fixture missing at {FIXTURE}")
160
+ if args.print_shape_hash:
161
+ sys.stdout.write(target_shape_hash() + "\n")
162
+ return 0
163
+ variants = ("with", "without") if args.variant == "both" else (args.variant,)
164
+ for v in variants:
165
+ clone(v, refresh=args.refresh)
166
+ return 0
167
+
168
+
169
+ if __name__ == "__main__":
170
+ raise SystemExit(main())
@@ -0,0 +1,227 @@
1
+ #!/usr/bin/env python3
2
+ """Diff two A/B reports (one per variant) into a comparison artefact.
3
+
4
+ Phase 2 Step 4 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
5
+
6
+ Inputs: two report JSON paths. Output: a JSON artefact under
7
+ `internal/bench/reports/ab/diff/{stamp}-{corpus}-diff.json` plus a matching
8
+ `.md`. Phase 5's renderer consumes this artefact to populate `docs/benchmark.md`.
9
+
10
+ The diff content depends on the corpus:
11
+
12
+ - `ab-tracka` — trigger-accuracy %, false-positive count, per-rule lift.
13
+ - `ab-trackb` — completion-rate per category, wall-time, tokens, cost,
14
+ ask-vs-act ratio, tool-call count.
15
+
16
+ Phase 2 only writes the structural skeleton (delta object with `with`,
17
+ `without`, `delta` keys); Phases 3 and 4 plug their real metrics into
18
+ the `results` blocks the runners emit, and the diff is computed in
19
+ `compute_track_a_diff` / `compute_track_b_diff` here.
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import argparse
24
+ import json
25
+ import sys
26
+ from datetime import datetime, timezone
27
+ from pathlib import Path
28
+
29
+ REPO_ROOT = Path(__file__).resolve().parent.parent
30
+ REPORTS_DIR = REPO_ROOT / "internal" / "bench" / "reports" / "ab"
31
+ DIFF_DIR = REPORTS_DIR / "diff"
32
+
33
+
34
+ def utc_stamp() -> str:
35
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
36
+
37
+
38
+ def load_report(path: Path) -> dict:
39
+ return json.loads(path.read_text())
40
+
41
+
42
+ def compute_track_a_diff(with_results: dict, without_results: dict) -> dict:
43
+ """Track A: trigger accuracy + per-rule lift.
44
+
45
+ Phase 3 populates `triggers`, `per_rule_accuracy`, `false_positives` in the
46
+ `results` block. This helper computes the delta. While Phase 3 is not yet
47
+ landed, we surface what we have and zero what we don't — never invent
48
+ numbers.
49
+ """
50
+ def take(d: dict, key: str, default: float = 0.0) -> float:
51
+ value = d.get(key, default)
52
+ try:
53
+ return float(value)
54
+ except (TypeError, ValueError):
55
+ return default
56
+
57
+ with_acc = take(with_results, "trigger_accuracy")
58
+ without_acc = take(without_results, "trigger_accuracy")
59
+ return {
60
+ "trigger_accuracy": {
61
+ "with": with_acc,
62
+ "without": without_acc,
63
+ "delta_pct_points": round(with_acc - without_acc, 3),
64
+ },
65
+ "false_positives": {
66
+ "with": with_results.get("false_positives", 0),
67
+ "without": without_results.get("false_positives", 0),
68
+ },
69
+ "per_rule": {
70
+ "with": with_results.get("per_rule_accuracy", {}),
71
+ "without": without_results.get("per_rule_accuracy", {}),
72
+ },
73
+ }
74
+
75
+
76
+ def compute_track_b_diff(with_results: dict, without_results: dict) -> dict:
77
+ """Track B: completion rate per category + wall-time + tokens + cost + ask-vs-act."""
78
+ def mean(d: dict, key: str) -> float:
79
+ try:
80
+ return float(d.get(key, 0.0))
81
+ except (TypeError, ValueError):
82
+ return 0.0
83
+
84
+ with_cats = with_results.get("per_category", {})
85
+ without_cats = without_results.get("per_category", {})
86
+ categories = sorted(set(with_cats) | set(without_cats))
87
+ per_category = {}
88
+ for cat in categories:
89
+ per_category[cat] = {
90
+ "with": with_cats.get(cat, {}),
91
+ "without": without_cats.get(cat, {}),
92
+ }
93
+
94
+ return {
95
+ "per_category": per_category,
96
+ "wall_time_seconds": {
97
+ "with": mean(with_results, "mean_wall_time"),
98
+ "without": mean(without_results, "mean_wall_time"),
99
+ "delta": round(
100
+ mean(with_results, "mean_wall_time")
101
+ - mean(without_results, "mean_wall_time"),
102
+ 3,
103
+ ),
104
+ },
105
+ "tokens": {
106
+ "with": mean(with_results, "mean_tokens"),
107
+ "without": mean(without_results, "mean_tokens"),
108
+ "delta": round(
109
+ mean(with_results, "mean_tokens")
110
+ - mean(without_results, "mean_tokens"),
111
+ 3,
112
+ ),
113
+ },
114
+ "cost_usd": {
115
+ "with": mean(with_results, "mean_cost_usd"),
116
+ "without": mean(without_results, "mean_cost_usd"),
117
+ "delta": round(
118
+ mean(with_results, "mean_cost_usd")
119
+ - mean(without_results, "mean_cost_usd"),
120
+ 4,
121
+ ),
122
+ },
123
+ "ask_vs_act_ratio": {
124
+ "with": mean(with_results, "ask_vs_act_ratio"),
125
+ "without": mean(without_results, "ask_vs_act_ratio"),
126
+ },
127
+ "tool_calls_per_task": {
128
+ "with": mean(with_results, "mean_tool_calls"),
129
+ "without": mean(without_results, "mean_tool_calls"),
130
+ },
131
+ }
132
+
133
+
134
+ def render_markdown(diff: dict) -> str:
135
+ lines = [
136
+ f"# A/B Bench Diff — {diff['corpus']}",
137
+ "",
138
+ f"- Stamp: `{diff['stamp']}`",
139
+ f"- With: `{diff['with_report']}`",
140
+ f"- Without: `{diff['without_report']}`",
141
+ "",
142
+ "## Delta",
143
+ "",
144
+ "```json",
145
+ json.dumps(diff.get("delta", {}), indent=2),
146
+ "```",
147
+ "",
148
+ ]
149
+ return "\n".join(lines)
150
+
151
+
152
+ def parse_args(argv: list[str]) -> argparse.Namespace:
153
+ parser = argparse.ArgumentParser(
154
+ description="Diff two A/B bench reports (one per variant)."
155
+ )
156
+ parser.add_argument("with_report", type=Path, help="Report JSON for variant=with")
157
+ parser.add_argument("without_report", type=Path, help="Report JSON for variant=without")
158
+ parser.add_argument(
159
+ "--out-dir",
160
+ type=Path,
161
+ default=DIFF_DIR,
162
+ help="Where to write the diff artefact (default: internal/bench/reports/ab/diff/)",
163
+ )
164
+ return parser.parse_args(argv)
165
+
166
+
167
+ def main(argv: list[str] | None = None) -> int:
168
+ args = parse_args(argv if argv is not None else sys.argv[1:])
169
+ if not args.with_report.exists():
170
+ sys.stderr.write(f"bench_ab_diff: missing {args.with_report}\n")
171
+ return 1
172
+ if not args.without_report.exists():
173
+ sys.stderr.write(f"bench_ab_diff: missing {args.without_report}\n")
174
+ return 1
175
+ with_rep = load_report(args.with_report)
176
+ without_rep = load_report(args.without_report)
177
+ if with_rep.get("variant") != "with":
178
+ sys.stderr.write(
179
+ f"bench_ab_diff: {args.with_report} variant is "
180
+ f"{with_rep.get('variant')!r}, expected 'with'\n"
181
+ )
182
+ return 1
183
+ if without_rep.get("variant") != "without":
184
+ sys.stderr.write(
185
+ f"bench_ab_diff: {args.without_report} variant is "
186
+ f"{without_rep.get('variant')!r}, expected 'without'\n"
187
+ )
188
+ return 1
189
+ if with_rep.get("corpus") != without_rep.get("corpus"):
190
+ sys.stderr.write(
191
+ f"bench_ab_diff: corpus mismatch — with={with_rep.get('corpus')} "
192
+ f"without={without_rep.get('corpus')}\n"
193
+ )
194
+ return 1
195
+ corpus = with_rep.get("corpus") or "unknown"
196
+ with_results = with_rep.get("results", {})
197
+ without_results = without_rep.get("results", {})
198
+ if corpus == "ab-tracka":
199
+ delta = compute_track_a_diff(with_results, without_results)
200
+ elif corpus == "ab-trackb":
201
+ delta = compute_track_b_diff(with_results, without_results)
202
+ else:
203
+ delta = {
204
+ "note": f"no diff strategy registered for corpus {corpus!r}",
205
+ "with_results": with_results,
206
+ "without_results": without_results,
207
+ }
208
+ stamp = utc_stamp()
209
+ diff = {
210
+ "schema": "ab-bench-diff/0.1",
211
+ "stamp": stamp,
212
+ "corpus": corpus,
213
+ "with_report": str(args.with_report.resolve().relative_to(REPO_ROOT)),
214
+ "without_report": str(args.without_report.resolve().relative_to(REPO_ROOT)),
215
+ "delta": delta,
216
+ }
217
+ args.out_dir.mkdir(parents=True, exist_ok=True)
218
+ json_path = args.out_dir / f"{stamp}-{corpus}-diff.json"
219
+ md_path = json_path.with_suffix(".md")
220
+ json_path.write_text(json.dumps(diff, indent=2) + "\n")
221
+ md_path.write_text(render_markdown(diff))
222
+ sys.stdout.write(f"bench_ab_diff: wrote {json_path.relative_to(REPO_ROOT)}\n")
223
+ return 0
224
+
225
+
226
+ if __name__ == "__main__":
227
+ raise SystemExit(main())
@@ -0,0 +1,143 @@
1
+ #!/usr/bin/env python3
2
+ """Assert the two A/B clones differ only in the agent-config surface.
3
+
4
+ Phase 1 Step 3 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
5
+
6
+ The bench's whole validity hinges on the two clones being identical except for
7
+ whether the agent-config surface is present. This script enumerates the file
8
+ trees of both clones and compares byte-by-byte, allowing differences only at
9
+ the documented surface paths (`.claude/`, `.augment/`, `AGENTS.md`,
10
+ `CLAUDE.md`) and the variant manifest.
11
+
12
+ Exit code:
13
+ 0 — clones are identical except at the allowed surface
14
+ 1 — clone is missing, or a task-target file diverges between variants
15
+ 2 — usage error
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import argparse
20
+ import hashlib
21
+ import sys
22
+ from pathlib import Path
23
+
24
+ REPO_ROOT = Path(__file__).resolve().parents[1]
25
+ AB_ROOT = REPO_ROOT / "internal" / "bench" / "ab"
26
+ CLONES = AB_ROOT / "clones"
27
+
28
+ # Surfaces where divergence is expected (variant-bearing).
29
+ ALLOWED_DELTA_PATHS = (
30
+ ".claude",
31
+ ".augment",
32
+ "AGENTS.md",
33
+ "CLAUDE.md",
34
+ )
35
+ # Variant-distinguishing manifest written by bench_ab_clone.
36
+ ALLOWED_DELTA_FILES = (
37
+ ".bench-ab-manifest.json",
38
+ )
39
+
40
+
41
+ def is_under_allowed_path(rel: Path) -> bool:
42
+ parts = rel.parts
43
+ if not parts:
44
+ return False
45
+ head = parts[0]
46
+ if head in ALLOWED_DELTA_PATHS:
47
+ return True
48
+ return rel.as_posix() in ALLOWED_DELTA_FILES
49
+
50
+
51
+ def file_hash(path: Path) -> str:
52
+ h = hashlib.sha256()
53
+ with path.open("rb") as fh:
54
+ for chunk in iter(lambda: fh.read(65536), b""):
55
+ h.update(chunk)
56
+ return h.hexdigest()
57
+
58
+
59
+ def index_clone(root: Path) -> dict[str, str]:
60
+ """Return {relpath: sha256} for every regular file under `root`."""
61
+ out: dict[str, str] = {}
62
+ for path in sorted(root.rglob("*")):
63
+ if not path.is_file():
64
+ continue
65
+ rel = path.relative_to(root)
66
+ out[rel.as_posix()] = file_hash(path)
67
+ return out
68
+
69
+
70
+ def parse_args(argv: list[str]) -> argparse.Namespace:
71
+ parser = argparse.ArgumentParser(
72
+ description="Verify the A/B clones differ only in the agent-config surface."
73
+ )
74
+ parser.add_argument(
75
+ "--verbose",
76
+ action="store_true",
77
+ help="Print every checked file (default: only divergences)",
78
+ )
79
+ return parser.parse_args(argv)
80
+
81
+
82
+ def main(argv: list[str] | None = None) -> int:
83
+ args = parse_args(argv if argv is not None else sys.argv[1:])
84
+
85
+ with_root = CLONES / "with"
86
+ without_root = CLONES / "without"
87
+ for label, root in (("with", with_root), ("without", without_root)):
88
+ if not root.exists():
89
+ sys.stderr.write(
90
+ f"bench_ab_integrity: {label} clone missing at {root} — run scripts/bench_ab_clone.py first\n"
91
+ )
92
+ return 1
93
+
94
+ with_index = index_clone(with_root)
95
+ without_index = index_clone(without_root)
96
+
97
+ # Files only in `with` — must all sit under the allowed surface
98
+ only_in_with = sorted(set(with_index) - set(without_index))
99
+ bad_only_with = [
100
+ rel for rel in only_in_with if not is_under_allowed_path(Path(rel))
101
+ ]
102
+ # Files only in `without` — there should be none
103
+ only_in_without = sorted(set(without_index) - set(with_index))
104
+ bad_only_without = [
105
+ rel for rel in only_in_without if not is_under_allowed_path(Path(rel))
106
+ ]
107
+ # Files present in both — must match byte-for-byte unless under the surface
108
+ shared = sorted(set(with_index) & set(without_index))
109
+ bad_diff = [
110
+ rel
111
+ for rel in shared
112
+ if with_index[rel] != without_index[rel] and not is_under_allowed_path(Path(rel))
113
+ ]
114
+
115
+ if args.verbose:
116
+ sys.stdout.write(
117
+ f"bench_ab_integrity: with={len(with_index)} files, without={len(without_index)} files, shared={len(shared)}\n"
118
+ )
119
+
120
+ if not bad_only_with and not bad_only_without and not bad_diff:
121
+ sys.stdout.write(
122
+ "bench_ab_integrity: clones differ only at the allowed surface (.claude, .augment, AGENTS.md, CLAUDE.md, manifest).\n"
123
+ )
124
+ return 0
125
+
126
+ sys.stderr.write("bench_ab_integrity: INTEGRITY FAILURE\n")
127
+ if bad_only_with:
128
+ sys.stderr.write(" files only in `with` (NOT in allowed surface):\n")
129
+ for rel in bad_only_with:
130
+ sys.stderr.write(f" + {rel}\n")
131
+ if bad_only_without:
132
+ sys.stderr.write(" files only in `without` (NOT in allowed surface):\n")
133
+ for rel in bad_only_without:
134
+ sys.stderr.write(f" - {rel}\n")
135
+ if bad_diff:
136
+ sys.stderr.write(" files present in both but byte-divergent:\n")
137
+ for rel in bad_diff:
138
+ sys.stderr.write(f" ~ {rel}\n")
139
+ return 1
140
+
141
+
142
+ if __name__ == "__main__":
143
+ raise SystemExit(main())