@event4u/agent-config 3.2.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (607) hide show
  1. package/.agent-src/README.md +2 -2
  2. package/.agent-src/commands/agent-handoff.md +31 -2
  3. package/.agent-src/commands/agent-status.md +6 -6
  4. package/.agent-src/commands/agents/audit.md +8 -8
  5. package/.agent-src/commands/agents/init.md +25 -1
  6. package/.agent-src/commands/agents/optimize.md +3 -3
  7. package/.agent-src/commands/agents/user.md +1 -1
  8. package/.agent-src/commands/agents.md +1 -1
  9. package/.agent-src/commands/analyze-reference-repo.md +1 -1
  10. package/.agent-src/commands/check-current-md.md +8 -8
  11. package/.agent-src/commands/{compress.md → condense.md} +55 -55
  12. package/.agent-src/commands/context/create.md +7 -4
  13. package/.agent-src/commands/context/refactor.md +3 -1
  14. package/.agent-src/commands/feature/dev.md +1 -1
  15. package/.agent-src/commands/feature/explore.md +1 -1
  16. package/.agent-src/commands/feature/plan.md +10 -8
  17. package/.agent-src/commands/feature/refactor.md +3 -1
  18. package/.agent-src/commands/feature/roadmap.md +7 -4
  19. package/.agent-src/commands/fix/portability.md +3 -3
  20. package/.agent-src/commands/fix/refs.md +4 -4
  21. package/.agent-src/commands/ghostwriter.md +2 -2
  22. package/.agent-src/commands/memory/learn-low-impact.md +3 -3
  23. package/.agent-src/commands/module/explore.md +34 -8
  24. package/.agent-src/commands/optimize/agents-dir.md +9 -7
  25. package/.agent-src/commands/optimize/augmentignore.md +2 -2
  26. package/.agent-src/commands/optimize/skills.md +9 -9
  27. package/.agent-src/commands/post-as.md +1 -1
  28. package/.agent-src/commands/project-analyze.md +2 -2
  29. package/.agent-src/commands/project-health.md +3 -2
  30. package/.agent-src/commands/research/deep.md +1 -1
  31. package/.agent-src/commands/research/report.md +1 -1
  32. package/.agent-src/commands/research.md +1 -1
  33. package/.agent-src/commands/roadmap/ai-council.md +1 -1
  34. package/.agent-src/commands/roadmap/create.md +9 -4
  35. package/.agent-src/commands/rule-compliance-audit.md +1 -1
  36. package/.agent-src/commands/upstream-contribute.md +14 -14
  37. package/.agent-src/commands/video/from-script.md +1 -1
  38. package/.agent-src/commands/video/scene.md +1 -1
  39. package/.agent-src/commands/video/stitch.md +1 -1
  40. package/.agent-src/commands/video/storyboard.md +1 -1
  41. package/.agent-src/commands/video.md +1 -1
  42. package/.agent-src/contexts/augment-infrastructure.md +1 -1
  43. package/.agent-src/contexts/authority/commit-mechanics.md +15 -0
  44. package/.agent-src/contexts/authority/kernel-rule-edits.md +3 -3
  45. package/.agent-src/contexts/authority/scope-mechanics.md +1 -1
  46. package/.agent-src/contexts/communication/rules-auto/augment-source-of-truth-mechanics.md +28 -28
  47. package/.agent-src/contexts/communication/rules-auto/skill-quality-mechanics.md +4 -4
  48. package/.agent-src/contexts/communication/rules-auto/think-before-action-mechanics.md +2 -2
  49. package/.agent-src/contexts/contracts/artifact-engagement-flow.md +6 -6
  50. package/.agent-src/contexts/contracts/command-suggestion-flow.md +3 -3
  51. package/.agent-src/contexts/contracts/emergency-triage-block.md +4 -4
  52. package/.agent-src/contexts/contracts/frugality-charter.md +3 -3
  53. package/.agent-src/contexts/documentation-hierarchy.md +14 -7
  54. package/.agent-src/contexts/execution/autonomy-examples.md +1 -1
  55. package/.agent-src/contexts/execution/cheap-question-mechanics.md +39 -2
  56. package/.agent-src/contexts/execution/roadmap-process-loop.md +28 -5
  57. package/.agent-src/contexts/override-system.md +5 -5
  58. package/.agent-src/ghostwriter/fictional-fixture-v1.md +1 -1
  59. package/.agent-src/personas/advisors/first-principles.md +1 -1
  60. package/.agent-src/personas/hollywood-director.md +1 -1
  61. package/.agent-src/rules/architecture.md +5 -1
  62. package/.agent-src/rules/augment-edit-discipline.md +5 -5
  63. package/.agent-src/rules/augment-source-of-truth.md +15 -15
  64. package/.agent-src/rules/commit-conventions.md +1 -1
  65. package/.agent-src/rules/commit-policy.md +10 -0
  66. package/.agent-src/rules/domain-adoption-policy.md +3 -3
  67. package/.agent-src/rules/fast-path-marker-visibility.md +3 -3
  68. package/.agent-src/rules/finance-safety-floor.md +1 -1
  69. package/.agent-src/rules/framework-neutrality-in-generic-skills.md +8 -8
  70. package/.agent-src/rules/git-history-discipline.md +1 -1
  71. package/.agent-src/rules/improve-before-implement.md +2 -2
  72. package/.agent-src/rules/language-and-tone.md +2 -2
  73. package/.agent-src/rules/media-governance-routing.md +5 -5
  74. package/.agent-src/rules/no-attribution-footers.md +1 -0
  75. package/.agent-src/rules/no-cheap-questions.md +3 -0
  76. package/.agent-src/rules/no-decorative-emojis-in-git-surfaces.md +111 -0
  77. package/.agent-src/rules/no-pr-progress-comments.md +118 -0
  78. package/.agent-src/rules/no-roadmap-references.md +3 -3
  79. package/.agent-src/rules/non-destructive-by-default.md +1 -1
  80. package/.agent-src/rules/persona-governance.md +3 -3
  81. package/.agent-src/rules/preservation-guard.md +15 -15
  82. package/.agent-src/rules/roadmap-ci-steps-policy.md +7 -3
  83. package/.agent-src/rules/rule-type-governance.md +1 -1
  84. package/.agent-src/rules/skill-quality.md +1 -1
  85. package/.agent-src/rules/{caveman-speak.md → telegraph-speak.md} +15 -15
  86. package/.agent-src/rules/token-optimizer-maintenance.md +6 -6
  87. package/.agent-src/skills/agent-docs-writing/SKILL.md +17 -11
  88. package/.agent-src/skills/agents-md-thin-root/SKILL.md +9 -9
  89. package/.agent-src/skills/check-refs/SKILL.md +2 -2
  90. package/.agent-src/skills/code-refactoring/SKILL.md +2 -2
  91. package/.agent-src/skills/command-writing/SKILL.md +19 -19
  92. package/.agent-src/skills/comp-banding/SKILL.md +1 -1
  93. package/.agent-src/skills/condense-memory/SKILL.md +131 -0
  94. package/.agent-src/skills/context-authoring/SKILL.md +2 -2
  95. package/.agent-src/skills/context-document/SKILL.md +5 -3
  96. package/.agent-src/skills/copilot-agents-optimization/SKILL.md +3 -3
  97. package/.agent-src/skills/description-assist/SKILL.md +2 -2
  98. package/.agent-src/skills/git-workflow/SKILL.md +1 -1
  99. package/.agent-src/skills/guideline-writing/SKILL.md +5 -5
  100. package/.agent-src/skills/learning-to-rule-or-skill/SKILL.md +4 -4
  101. package/.agent-src/skills/lint-skills/SKILL.md +3 -3
  102. package/.agent-src/skills/md-language-check/SKILL.md +2 -2
  103. package/.agent-src/skills/module-detect-on-the-fly/SKILL.md +138 -0
  104. package/.agent-src/skills/module-management/SKILL.md +166 -94
  105. package/.agent-src/skills/override-management/SKILL.md +1 -1
  106. package/.agent-src/skills/persona-writing/SKILL.md +5 -5
  107. package/.agent-src/skills/positioning-strategy/SKILL.md +1 -1
  108. package/.agent-src/skills/project-docs/SKILL.md +6 -4
  109. package/.agent-src/skills/readme-reviewer/SKILL.md +2 -2
  110. package/.agent-src/skills/roadmap-management/SKILL.md +13 -1
  111. package/.agent-src/skills/roadmap-writing/SKILL.md +4 -2
  112. package/.agent-src/skills/rule-refactor/SKILL.md +5 -5
  113. package/.agent-src/skills/rule-writing/SKILL.md +18 -18
  114. package/.agent-src/skills/script-writing/SKILL.md +1 -1
  115. package/.agent-src/skills/skill-improvement-pipeline/SKILL.md +6 -6
  116. package/.agent-src/skills/skill-management/SKILL.md +21 -21
  117. package/.agent-src/skills/skill-reviewer/SKILL.md +2 -2
  118. package/.agent-src/skills/skill-writing/SKILL.md +8 -8
  119. package/.agent-src/skills/skill-writing/evals/triggers.json +1 -1
  120. package/.agent-src/skills/token-optimizer/SKILL.md +4 -4
  121. package/.agent-src/skills/unit-economics-modeling/SKILL.md +1 -1
  122. package/.agent-src/skills/upstream-contribute/SKILL.md +17 -17
  123. package/.agent-src/templates/AGENTS.md +1 -0
  124. package/.agent-src/templates/agent-settings.md +24 -13
  125. package/.agent-src/templates/agents/agent-project-settings.example.yml +61 -2
  126. package/.agent-src/templates/command.md +5 -5
  127. package/.agent-src/templates/contexts.md +1 -1
  128. package/.agent-src/templates/copilot-instructions.md +8 -8
  129. package/.agent-src/templates/features.md +1 -1
  130. package/.agent-src/templates/hooks/pre-commit-frontmatter +2 -2
  131. package/.agent-src/templates/hooks/pre-commit-roadmap-progress +3 -3
  132. package/.agent-src/templates/persona.md +2 -2
  133. package/.agent-src/templates/roadmaps.md +1 -1
  134. package/.agent-src/templates/rule.md +13 -13
  135. package/.agent-src/templates/scripts/memory_lookup.py +1 -1
  136. package/.agent-src/templates/scripts/memory_status.py +2 -2
  137. package/.agent-src/templates/scripts/work_engine/_lib/agent_settings.py +195 -1
  138. package/.agent-src/templates/scripts/work_engine/orchestration.py +1 -1
  139. package/.agent-src/templates/skill-archive-note.md +5 -5
  140. package/.agent-src/templates/skill.md +1 -1
  141. package/.claude-plugin/marketplace.json +4 -4
  142. package/AGENTS.md +16 -16
  143. package/CHANGELOG.md +204 -2
  144. package/CONTRIBUTING.md +31 -12
  145. package/README.md +18 -10
  146. package/config/agent-settings.template.yml +22 -2
  147. package/config/discovery/unassigned-artefacts.yml +24 -24
  148. package/config/profiles/full.ini +1 -1
  149. package/dist/cli/agent-config.js +52 -3
  150. package/dist/cli/agent-config.js.map +1 -1
  151. package/dist/cli/commands/uiServe.js +9 -0
  152. package/dist/cli/commands/uiServe.js.map +1 -1
  153. package/dist/cli/registry.js +2 -1
  154. package/dist/cli/registry.js.map +1 -1
  155. package/dist/discovery/deprecation-report.md +1 -1
  156. package/dist/discovery/discovery-manifest.json +649 -606
  157. package/dist/discovery/discovery-manifest.json.sha256 +1 -1
  158. package/dist/discovery/discovery-manifest.summary.md +4 -4
  159. package/dist/discovery/orphan-report.md +1 -1
  160. package/dist/discovery/packs.json +439 -437
  161. package/dist/discovery/trust-report.md +5 -5
  162. package/dist/discovery/workspaces.json +450 -448
  163. package/dist/install/apply.js +238 -0
  164. package/dist/install/apply.js.map +1 -0
  165. package/dist/install/atomic.js +92 -0
  166. package/dist/install/atomic.js.map +1 -0
  167. package/dist/install/bridges/augment.js +20 -0
  168. package/dist/install/bridges/augment.js.map +1 -0
  169. package/dist/install/bridges/claude.js +44 -0
  170. package/dist/install/bridges/claude.js.map +1 -0
  171. package/dist/install/bridges/cline.js +69 -0
  172. package/dist/install/bridges/cline.js.map +1 -0
  173. package/dist/install/bridges/copilot.js +28 -0
  174. package/dist/install/bridges/copilot.js.map +1 -0
  175. package/dist/install/bridges/cursor.js +34 -0
  176. package/dist/install/bridges/cursor.js.map +1 -0
  177. package/dist/install/bridges/gemini.js +39 -0
  178. package/dist/install/bridges/gemini.js.map +1 -0
  179. package/dist/install/bridges/index.js +88 -0
  180. package/dist/install/bridges/index.js.map +1 -0
  181. package/dist/install/bridges/marker-content.js +153 -0
  182. package/dist/install/bridges/marker-content.js.map +1 -0
  183. package/dist/install/bridges/markers.js +42 -0
  184. package/dist/install/bridges/markers.js.map +1 -0
  185. package/dist/install/bridges/types.js +31 -0
  186. package/dist/install/bridges/types.js.map +1 -0
  187. package/dist/install/bridges/vscode.js +26 -0
  188. package/dist/install/bridges/vscode.js.map +1 -0
  189. package/dist/install/bridges/windsurf.js +35 -0
  190. package/dist/install/bridges/windsurf.js.map +1 -0
  191. package/dist/install/conflict.js +196 -0
  192. package/dist/install/conflict.js.map +1 -0
  193. package/dist/install/detect.js +218 -0
  194. package/dist/install/detect.js.map +1 -0
  195. package/dist/install/paths.js +82 -0
  196. package/dist/install/paths.js.map +1 -0
  197. package/dist/install/plan.js +157 -0
  198. package/dist/install/plan.js.map +1 -0
  199. package/dist/install/txlog.js +140 -0
  200. package/dist/install/txlog.js.map +1 -0
  201. package/dist/install/types.js +19 -0
  202. package/dist/install/types.js.map +1 -0
  203. package/dist/install/wizard-plan.js +184 -0
  204. package/dist/install/wizard-plan.js.map +1 -0
  205. package/dist/mcp/registry-manifest.json +4 -4
  206. package/dist/router.json +67 -19
  207. package/dist/server/app.js +6 -0
  208. package/dist/server/app.js.map +1 -1
  209. package/dist/server/routes/install.js +547 -0
  210. package/dist/server/routes/install.js.map +1 -0
  211. package/dist/server/routes/wizard.js +301 -6
  212. package/dist/server/routes/wizard.js.map +1 -1
  213. package/dist/server/routes/workspace.js +396 -0
  214. package/dist/server/routes/workspace.js.map +1 -0
  215. package/dist/server/schemas/settings.js +4 -3
  216. package/dist/server/schemas/settings.js.map +1 -1
  217. package/dist/ui/assets/index-BXZILUxe.css +1 -0
  218. package/dist/ui/assets/index-DLEuEW1V.js +35 -0
  219. package/dist/ui/assets/index-DLEuEW1V.js.map +1 -0
  220. package/dist/ui/index.html +2 -2
  221. package/docs/MIGRATION.md +1 -1
  222. package/docs/adrs/cost/0001-hard-stop-hook.md +1 -1
  223. package/docs/adrs/router/0001-three-tier-routing.md +4 -4
  224. package/docs/adrs/schema/0001-json-schema-frontmatter.md +1 -1
  225. package/docs/adrs/smoke/0001-per-tier-smoke-scripts.md +4 -4
  226. package/docs/adrs/{caveman → telegraph}/0001-default-off-until-bench.md +9 -9
  227. package/docs/adrs/telegraph/README.md +9 -0
  228. package/docs/architecture/augment-projection.md +4 -4
  229. package/docs/architecture/claude-bundle.md +1 -1
  230. package/docs/architecture/current-onboard-baseline.md +3 -3
  231. package/docs/architecture/multi-tool-projection.md +10 -10
  232. package/docs/architecture/source-projection.md +27 -27
  233. package/docs/architecture.md +19 -15
  234. package/docs/archive/CHANGELOG-pre-2.11.0.md +2 -2
  235. package/docs/archive/CHANGELOG-pre-2.15.0.md +3 -3
  236. package/docs/archive/CHANGELOG-pre-2.16.0.md +1 -1
  237. package/docs/archive/CHANGELOG-pre-2.2.0.md +70 -70
  238. package/docs/archive/CHANGELOG-pre-2.20.0.md +2 -2
  239. package/docs/archive/CHANGELOG-pre-2.25.0.md +15 -15
  240. package/docs/archive/CHANGELOG-pre-3.0.0.md +4 -4
  241. package/docs/archive/CHANGELOG-pre-3.1.0.md +2 -2
  242. package/docs/archive/CHANGELOG-pre-3.2.0.md +3 -3
  243. package/docs/benchmark.md +65 -0
  244. package/docs/benchmarks.md +18 -18
  245. package/docs/catalog.md +17 -15
  246. package/docs/contracts/CHANGELOG-conventions.md +2 -2
  247. package/docs/contracts/STABILITY.md +2 -2
  248. package/docs/contracts/adoption-signal-floor.md +110 -0
  249. package/docs/contracts/adr-chat-history-split.md +4 -4
  250. package/docs/contracts/adr-command-suggestion.md +4 -4
  251. package/docs/contracts/adr-gtm-context-spine.md +1 -1
  252. package/docs/contracts/adr-implement-ticket-runtime.md +4 -4
  253. package/docs/contracts/adr-install-user-type-axis.md +1 -1
  254. package/docs/contracts/adr-layout.md +2 -2
  255. package/docs/contracts/adr-mcp-runtime.md +1 -1
  256. package/docs/contracts/adr-product-ui-track.md +10 -10
  257. package/docs/contracts/adr-user-types-axis.md +3 -3
  258. package/docs/contracts/adr-wing4-context-spine.md +1 -1
  259. package/docs/contracts/agent-memory-contract.md +3 -3
  260. package/docs/contracts/agents-md-tech-stack.md +2 -2
  261. package/docs/contracts/ai-council-config.md +2 -2
  262. package/docs/contracts/at-rest-encryption.md +4 -0
  263. package/docs/contracts/audit-log-v1.md +1 -1
  264. package/docs/contracts/benchmark-ab-contract.md +101 -0
  265. package/docs/contracts/benchmark-corpus-spec.md +4 -4
  266. package/docs/contracts/benchmark-report-schema.md +5 -5
  267. package/docs/contracts/branch-protection-policy.md +98 -0
  268. package/docs/contracts/ci-cost-budget.md +106 -0
  269. package/docs/contracts/ci-green-floor.md +141 -0
  270. package/docs/contracts/command-clusters.md +6 -6
  271. package/docs/contracts/command-surface-tiers.md +2 -2
  272. package/docs/contracts/command-taxonomy.md +2 -2
  273. package/docs/contracts/{compression-default-kill-criterion.md → condensation-default-kill-criterion.md} +29 -29
  274. package/docs/contracts/config-presets.md +1 -1
  275. package/docs/contracts/context-paths.md +3 -3
  276. package/docs/contracts/context-spine.md +1 -1
  277. package/docs/contracts/cost-enforcement.md +1 -1
  278. package/docs/contracts/cost-summary-schema.md +12 -12
  279. package/docs/contracts/cross-wing-handoff.md +4 -4
  280. package/docs/contracts/daily-workspace.md +4 -0
  281. package/docs/contracts/decision-trace-v1.md +2 -2
  282. package/docs/contracts/discovery-manifest.md +4 -4
  283. package/docs/contracts/explain-modes.md +4 -0
  284. package/docs/contracts/file-ownership-matrix.json +3493 -3318
  285. package/docs/contracts/file-ownership-matrix.md +3 -3
  286. package/docs/contracts/frontmatter-contract.md +4 -4
  287. package/docs/contracts/ghostwriter-schema.md +3 -3
  288. package/docs/contracts/gui-wizard.md +1 -1
  289. package/docs/contracts/harness-expectations.md +123 -0
  290. package/docs/contracts/host-agent-protocol.md +4 -0
  291. package/docs/contracts/implement-ticket-flow.md +9 -9
  292. package/docs/contracts/install-scopes.md +77 -0
  293. package/docs/contracts/iron-law-overrides.txt +1 -1
  294. package/docs/contracts/kernel-membership.md +26 -26
  295. package/docs/contracts/linear-ai-rules-inclusion.md +1 -1
  296. package/docs/contracts/linter-structural-model.md +2 -2
  297. package/docs/contracts/load-context-budget-model.md +4 -4
  298. package/docs/contracts/load-context-schema.md +13 -13
  299. package/docs/contracts/local-analytics.md +4 -0
  300. package/docs/contracts/local-knowledge-ingestion.md +1 -1
  301. package/docs/contracts/mcp-beta-criteria.md +1 -1
  302. package/docs/contracts/mcp-cloud-scope.md +6 -6
  303. package/docs/contracts/mcp-phase-1-scope.md +3 -3
  304. package/docs/contracts/mcp-registry-manifest.schema.json +1 -1
  305. package/docs/contracts/mcp-tool-inventory.md +1 -1
  306. package/docs/contracts/mcp-tool-stub-envelope.md +1 -1
  307. package/docs/contracts/measurement-baseline.md +11 -11
  308. package/docs/contracts/mental-models.md +30 -30
  309. package/docs/contracts/multi-tool-projection-fidelity.md +4 -4
  310. package/docs/contracts/namespace.md +4 -4
  311. package/docs/contracts/orchestration-dsl-v1.md +7 -7
  312. package/docs/contracts/package-self-orientation.md +12 -12
  313. package/docs/contracts/persona-schema.md +6 -6
  314. package/docs/contracts/pilot/language-and-tone.md +1 -1
  315. package/docs/contracts/plain-language-surface.md +117 -0
  316. package/docs/contracts/profile-system.md +3 -3
  317. package/docs/contracts/release-pr-gating.md +103 -0
  318. package/docs/contracts/role-experience.md +3 -3
  319. package/docs/contracts/rule-classification.md +13 -13
  320. package/docs/contracts/rule-interactions.md +4 -4
  321. package/docs/contracts/rule-interactions.yml +30 -30
  322. package/docs/contracts/rule-priority-hierarchy.md +13 -13
  323. package/docs/contracts/rule-router.md +2 -2
  324. package/docs/contracts/safety-model.md +1 -1
  325. package/docs/contracts/skill-distribution-channels.md +61 -0
  326. package/docs/contracts/skill-domains.md +2 -2
  327. package/docs/contracts/smoke-contracts.md +5 -5
  328. package/docs/contracts/telegraph-telemetry.md +83 -0
  329. package/docs/contracts/trust-and-safety.md +5 -5
  330. package/docs/contracts/ui-stack-extension.md +7 -7
  331. package/docs/contracts/ui-track-flow.md +9 -9
  332. package/docs/contracts/user-type-schema.md +4 -4
  333. package/docs/contracts/workflow-packs.md +4 -4
  334. package/docs/contracts/workspace-documents.md +4 -0
  335. package/docs/customization.md +28 -8
  336. package/docs/decisions/ADR-001-kernel-swap-deferred.md +6 -6
  337. package/docs/decisions/ADR-002-kernel-bucket-overrides.md +11 -11
  338. package/docs/decisions/ADR-003-flat-cluster-subs-and-colon-syntax.md +2 -2
  339. package/docs/decisions/ADR-004-rule-governance-pruning.md +4 -4
  340. package/docs/decisions/ADR-005-subagent-worktrees.md +7 -7
  341. package/docs/decisions/ADR-011-domain-pack-readiness.md +6 -6
  342. package/docs/decisions/ADR-013-discovery-frontmatter-contract.md +3 -3
  343. package/docs/decisions/ADR-015-discovery-manifest-contract.md +3 -3
  344. package/docs/decisions/ADR-017-monorepo-physical-layout.md +10 -10
  345. package/docs/decisions/ADR-018-trust-and-safety-layer.md +6 -6
  346. package/docs/decisions/ADR-019-router-json-dist-location.md +2 -2
  347. package/docs/decisions/ADR-020-global-only-consumer-scope.md +2 -2
  348. package/docs/decisions/ADR-021-deployment-shape.md +1 -1
  349. package/docs/decisions/ADR-022-daily-workspace-decomposition.md +1 -1
  350. package/docs/decisions/ADR-027-changelog-machine-vs-manual.md +129 -0
  351. package/docs/decisions/ADR-028-root-layout.md +147 -0
  352. package/docs/decisions/ADR-029-multi-workspace-deferred.md +122 -0
  353. package/docs/decisions/ADR-rule-kernel-and-router.md +5 -5
  354. package/docs/decisions/INDEX.md +8 -0
  355. package/docs/deploy/team-deployment-posture.md +20 -0
  356. package/docs/development.md +17 -17
  357. package/docs/distribution/registries.md +32 -0
  358. package/docs/distribution/registry-submissions.md +85 -0
  359. package/docs/distribution/telemetry-schema.md +1 -1
  360. package/docs/getting-started-by-role.md +45 -3
  361. package/docs/getting-started.md +2 -2
  362. package/docs/guidelines/agent-infra/5w2h-analysis.md +3 -3
  363. package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +1 -1
  364. package/docs/guidelines/agent-infra/asking-and-brevity-examples.md +3 -3
  365. package/docs/guidelines/agent-infra/carve-out-predicates.md +3 -3
  366. package/docs/guidelines/agent-infra/critical-thinking.md +4 -4
  367. package/docs/guidelines/agent-infra/direct-answers-demos.md +1 -1
  368. package/docs/guidelines/agent-infra/first-principles.md +2 -2
  369. package/docs/guidelines/agent-infra/inversion-thinking.md +5 -5
  370. package/docs/guidelines/agent-infra/layered-settings.md +56 -2
  371. package/docs/guidelines/agent-infra/mental-models.md +3 -3
  372. package/docs/guidelines/agent-infra/roadmap-progress-mechanics.md +2 -2
  373. package/docs/guidelines/agent-infra/rule-type-governance.md +1 -1
  374. package/docs/guidelines/agent-infra/scqa-framework.md +5 -5
  375. package/docs/guidelines/agent-infra/self-improvement-pipeline.md +2 -2
  376. package/docs/guidelines/agent-infra/six-hats.md +3 -3
  377. package/docs/guidelines/agent-infra/skill-quality-checklist.md +5 -5
  378. package/docs/guidelines/agent-infra/systems-thinking.md +1 -1
  379. package/docs/guidelines/agent-infra/verify-before-complete-demos.md +1 -1
  380. package/docs/guidelines/augment-portability-patterns.md +4 -4
  381. package/docs/guidelines/cross-role-handoff.md +2 -2
  382. package/docs/guidelines/php/php-coding-patterns.md +1 -1
  383. package/docs/guidelines/prompt-templates.md +6 -6
  384. package/docs/maintainers/dev-mode.md +1 -1
  385. package/docs/mcp-server.md +1 -1
  386. package/docs/mcp.md +1 -1
  387. package/docs/parity/bench-ruflo.json +3 -3
  388. package/docs/parity/bench.json +3 -3
  389. package/docs/parity/ruflo.md +3 -3
  390. package/docs/profiles.md +11 -11
  391. package/docs/quality.md +11 -11
  392. package/docs/safety.md +3 -3
  393. package/docs/setup/mcp-client-config.md +2 -2
  394. package/docs/setup/mcp-cloud-endpoints.md +1 -1
  395. package/docs/setup/mcp-cloud-setup.md +2 -2
  396. package/docs/setup/mcp-r2-bootstrap.md +2 -2
  397. package/docs/setup/mcp-server-docker.md +3 -3
  398. package/docs/setup/per-ide/windsurf.md +1 -1
  399. package/docs/skills-catalog.md +8 -7
  400. package/docs/troubleshooting.md +1 -1
  401. package/docs/walkthroughs/daily-workspace-a11y.md +87 -0
  402. package/llms.txt +7 -6
  403. package/package.json +1 -1
  404. package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
  405. package/scripts/_archive/README.md +2 -2
  406. package/scripts/_archive/_backfill_skill_domains.py +3 -3
  407. package/scripts/_archive/_bootstrap_tier_frontmatter.py +3 -3
  408. package/scripts/_archive/_p43_bodies.py +10 -10
  409. package/scripts/_archive/{_p43_compress.py → _p43_condense.py} +5 -5
  410. package/scripts/_archive/_p4_migrate.py +7 -7
  411. package/scripts/_archive/_phase2_shim_helper.py +1 -1
  412. package/scripts/_archive/_pilot_council_question.py +5 -5
  413. package/scripts/_cli/explain_last/inputs.py +1 -1
  414. package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
  415. package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
  416. package/scripts/_lib/agent_settings.py +195 -1
  417. package/scripts/_lib/agent_src.py +19 -19
  418. package/scripts/_lib/bench_ab_cache.py +162 -0
  419. package/scripts/_lib/bench_ab_scoring.py +209 -0
  420. package/scripts/_lib/bench_cost.py +2 -2
  421. package/scripts/_lib/bench_report.py +2 -2
  422. package/scripts/_lib/{bench_caveman.py → bench_telegraph.py} +21 -21
  423. package/scripts/_lib/{bench_caveman_report.py → bench_telegraph_report.py} +22 -22
  424. package/scripts/_lib/claude_desktop_bundler.py +5 -5
  425. package/scripts/_lib/module_detection.py +223 -0
  426. package/scripts/_lib/scope_guard.sh +162 -0
  427. package/scripts/_phase4_bucket.py +3 -3
  428. package/scripts/_pilot_measure.py +4 -4
  429. package/scripts/_tmp_scan_framework_leakage.py +1 -1
  430. package/scripts/adoption_report.py +195 -0
  431. package/scripts/adoption_snapshot.py +219 -0
  432. package/scripts/adoption_status.py +166 -0
  433. package/scripts/ai-video/lib/parse-blueprint.sh +1 -1
  434. package/scripts/ai_council/advisors.py +5 -5
  435. package/scripts/ai_council/compile_corpus.py +1 -1
  436. package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +3 -3
  437. package/scripts/ai_council/one_off_archive/2026-05/_one_off_context_layer_v1_review.py +2 -2
  438. package/scripts/ai_council/one_off_archive/2026-05/_one_off_inject_quiet_flag.py +1 -1
  439. package/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_v2.sh +1 -1
  440. package/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_verbosity.sh +1 -1
  441. package/scripts/ai_council/one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py +3 -3
  442. package/scripts/ai_council/one_off_archive/2026-05/_one_off_per_task.sh +1 -1
  443. package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py +1 -1
  444. package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py +6 -6
  445. package/scripts/ai_council/one_off_archive/2026-05/_one_off_rebalancing_audit.py +1 -1
  446. package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +6 -6
  447. package/scripts/annotate_discovery.py +13 -13
  448. package/scripts/apply_modules_config.py +290 -0
  449. package/scripts/audit_adr_coverage.py +2 -2
  450. package/scripts/audit_auto_rules.py +2 -2
  451. package/scripts/audit_cloud_compatibility.py +3 -3
  452. package/scripts/audit_command_surface.py +9 -9
  453. package/scripts/audit_likelihood.py +2 -2
  454. package/scripts/audit_mcp_tools.py +1 -1
  455. package/scripts/audit_user_type_axis.py +2 -2
  456. package/scripts/bench_ab_cache_dispatch.py +68 -0
  457. package/scripts/bench_ab_clone.py +170 -0
  458. package/scripts/bench_ab_diff.py +227 -0
  459. package/scripts/bench_ab_integrity.py +143 -0
  460. package/scripts/bench_ab_run.py +235 -0
  461. package/scripts/bench_ab_task_runner.py +369 -0
  462. package/scripts/bench_ab_tracka_run.py +202 -0
  463. package/scripts/bench_baseline_ready.py +3 -3
  464. package/scripts/{bench_compress_memory.py → bench_condense_memory.py} +16 -16
  465. package/scripts/bench_drift_check.py +2 -2
  466. package/scripts/bench_per_tool.py +2 -2
  467. package/scripts/bench_run.py +36 -36
  468. package/scripts/bench_runner.py +2 -2
  469. package/scripts/bootstrap.sh +99 -0
  470. package/scripts/build_cloud_bundle.py +6 -6
  471. package/scripts/build_discovery_manifest.py +7 -7
  472. package/scripts/build_linear_digest.py +3 -3
  473. package/scripts/build_mcp_registry_manifest.py +2 -2
  474. package/scripts/build_rule_trigger_matrix.py +8 -8
  475. package/scripts/chat_history.py +5 -5
  476. package/scripts/check_always_budget.py +11 -5
  477. package/scripts/check_augment_description_cap.py +3 -3
  478. package/scripts/check_cluster_patterns.py +2 -2
  479. package/scripts/check_command_count_messaging.py +3 -3
  480. package/scripts/{check_compression.py → check_condensation.py} +34 -34
  481. package/scripts/{check_compressed_paths.py → check_condensed_paths.py} +8 -8
  482. package/scripts/check_context_paths.py +7 -7
  483. package/scripts/check_council_layout.py +2 -2
  484. package/scripts/check_council_references.py +9 -9
  485. package/scripts/check_iron_law_prominence.py +2 -2
  486. package/scripts/check_kernel_rule_bundle.py +2 -2
  487. package/scripts/check_module_management_neutral.py +149 -0
  488. package/scripts/check_no_roadmap_refs.py +9 -9
  489. package/scripts/check_portability.py +3 -3
  490. package/scripts/check_public_catalog_links.py +4 -4
  491. package/scripts/check_references.py +7 -6
  492. package/scripts/check_release_pr_shape.py +112 -0
  493. package/scripts/check_reply_consistency.py +3 -3
  494. package/scripts/check_safety_floor_untouched.py +1 -1
  495. package/scripts/check_template_pin_drift.py +5 -5
  496. package/scripts/check_token_optimizer_freshness.py +3 -3
  497. package/scripts/ci_status.py +301 -0
  498. package/scripts/ci_time_ratio.py +1 -1
  499. package/scripts/cleanup_other_scope.sh +146 -0
  500. package/scripts/compile_router.py +10 -10
  501. package/scripts/{compress.py → condense.py} +64 -64
  502. package/scripts/condense.sh +18 -0
  503. package/scripts/{compress_memory.py → condense_memory.py} +33 -33
  504. package/scripts/config/presets.py +2 -2
  505. package/scripts/config/profiles.py +1 -1
  506. package/scripts/cost_by_conversation.py +3 -3
  507. package/scripts/cost_summary.py +7 -7
  508. package/scripts/count_token_optimizer_usage.sh +1 -1
  509. package/scripts/gen_discovery_baseline.py +5 -5
  510. package/scripts/generate_index.py +6 -6
  511. package/scripts/generate_ownership_matrix.py +10 -10
  512. package/scripts/generate_pack_manifests.py +1 -1
  513. package/scripts/ghostwriter_fixture_allowlist.txt +1 -1
  514. package/scripts/install +3 -3
  515. package/scripts/install-hooks.sh +6 -6
  516. package/scripts/install.py +76 -11
  517. package/scripts/install.sh +187 -1
  518. package/scripts/inventory_frontmatter.py +2 -2
  519. package/scripts/iron_law_sha.py +3 -3
  520. package/scripts/lint_agents_layout.py +14 -7
  521. package/scripts/lint_agents_md.py +4 -4
  522. package/scripts/lint_archived_skills.py +3 -3
  523. package/scripts/lint_artefact_frontmatter.py +2 -2
  524. package/scripts/lint_bench_ab.py +172 -0
  525. package/scripts/lint_bench_corpus.py +1 -1
  526. package/scripts/lint_command_tiers.py +5 -5
  527. package/scripts/lint_context_spine_usage.py +1 -1
  528. package/scripts/lint_framework_leakage.py +7 -7
  529. package/scripts/lint_framework_leakage_allowlist.json +144 -84
  530. package/scripts/lint_ghostwriter_source.py +3 -3
  531. package/scripts/lint_handoffs.py +1 -1
  532. package/scripts/lint_load_context.py +11 -11
  533. package/scripts/lint_media_policy_linkage.py +5 -5
  534. package/scripts/lint_namespace.py +1 -1
  535. package/scripts/lint_no_new_atomic_commands.py +2 -2
  536. package/scripts/lint_orchestration_dsl.py +1 -1
  537. package/scripts/lint_pack_boundaries.py +2 -2
  538. package/scripts/lint_persona_governance.py +4 -4
  539. package/scripts/lint_role_experiences.py +237 -0
  540. package/scripts/lint_rule_interactions.py +2 -2
  541. package/scripts/lint_rule_tiers.py +1 -1
  542. package/scripts/lint_trust_coherence.py +2 -2
  543. package/scripts/mcp_registry_submit.sh +187 -0
  544. package/scripts/mcp_server/__init__.py +1 -1
  545. package/scripts/mcp_server/catalog.py +1 -1
  546. package/scripts/mcp_server/consumer_tool_catalog.json +1 -1
  547. package/scripts/mcp_server/tools.py +2 -2
  548. package/scripts/measure_frugality_savings.py +10 -10
  549. package/scripts/measure_patterns.py +1 -1
  550. package/scripts/measure_projection_bytes.py +5 -5
  551. package/scripts/measure_rule_budget.py +3 -3
  552. package/scripts/measure_skill_reduction.py +1 -1
  553. package/scripts/memory_lookup.py +1 -1
  554. package/scripts/memory_status.py +2 -2
  555. package/scripts/migrate_command_suggestions.py +3 -3
  556. package/scripts/mine_session.py +1 -1
  557. package/scripts/move_artefact.py +3 -3
  558. package/scripts/new_skill.py +2 -2
  559. package/scripts/pack_mcp_content.py +14 -14
  560. package/scripts/plan_physical_move.py +6 -6
  561. package/scripts/print_required_checks.py +196 -0
  562. package/scripts/probe_skill_registration.py +413 -0
  563. package/scripts/propose_modules_config.py +145 -0
  564. package/scripts/prototype_lint_contradictions.py +1 -1
  565. package/scripts/recruit_preflight.sh +152 -0
  566. package/scripts/refine_ticket_detect.py +3 -3
  567. package/scripts/release.py +20 -0
  568. package/scripts/render_benchmark_md.py +308 -0
  569. package/scripts/roadmap_progress_hook.py +1 -1
  570. package/scripts/run_skill_evals.py +2 -2
  571. package/scripts/runtime_registry.py +4 -4
  572. package/scripts/schemas/command.schema.json +4 -4
  573. package/scripts/schemas/rule.schema.json +5 -5
  574. package/scripts/schemas/skill.schema.json +3 -3
  575. package/scripts/schemas/user-type.schema.json +1 -1
  576. package/scripts/score_skill_selection.py +1 -1
  577. package/scripts/skill_collision_clusters.py +2 -2
  578. package/scripts/skill_linter.py +81 -81
  579. package/scripts/skill_overlap.py +5 -5
  580. package/scripts/skill_tools/audit_persona_coverage.py +2 -2
  581. package/scripts/skill_tools/audit_user_type_coverage.py +2 -2
  582. package/scripts/skill_tools/run_block_d_eval.py +1 -1
  583. package/scripts/skill_tools/score_skill_relevance.py +1 -1
  584. package/scripts/skill_tools/suggest_skill_for_task.py +1 -1
  585. package/scripts/skill_trigger_eval.py +5 -5
  586. package/scripts/smoke/kernel.sh +7 -1
  587. package/scripts/smoke/router.sh +5 -5
  588. package/scripts/smoke/skills.sh +1 -1
  589. package/scripts/smoke_quickstart.py +1 -1
  590. package/scripts/snapshot_agent_outputs.py +3 -3
  591. package/scripts/spotcheck_thin_root.py +1 -1
  592. package/scripts/{caveman_stats.py → telegraph_stats.py} +18 -18
  593. package/scripts/update_counts.py +1 -1
  594. package/scripts/validate_decision_engine.py +1 -1
  595. package/scripts/validate_frontmatter.py +1 -1
  596. package/scripts/validate_safe_paths.py +3 -3
  597. package/scripts/{validate_caveman_carveouts.py → validate_telegraph_carveouts.py} +7 -7
  598. package/scripts/verify_roadmap_closure.py +6 -6
  599. package/templates/consumer-settings/ONBOARDING.md +41 -0
  600. package/.agent-src/commands/install-via-agent.md +0 -129
  601. package/.agent-src/skills/compress-memory/SKILL.md +0 -131
  602. package/dist/ui/assets/index-D-DY1ywI.js +0 -35
  603. package/dist/ui/assets/index-D-DY1ywI.js.map +0 -1
  604. package/dist/ui/assets/index-Dqfhmg-d.css +0 -1
  605. package/docs/adrs/caveman/README.md +0 -9
  606. package/docs/contracts/caveman-telemetry.md +0 -83
  607. package/scripts/compress.sh +0 -18
@@ -0,0 +1,235 @@
1
+ #!/usr/bin/env python3
2
+ """Top-level orchestrator for the package-impact A/B bench.
3
+
4
+ Phase 2 Step 1 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
5
+
6
+ A thin wrapper around the per-track runners (Track A behavioural eval,
7
+ Track B task corpus). Owns:
8
+
9
+ - the `--variant {with,without}` axis,
10
+ - the cache lookup that decides whether the `without` arm runs at all,
11
+ - the report-header convention (cache key, variant, corpus, timestamp),
12
+ - the report-path convention `internal/bench/reports/ab/{stamp}-{corpus}-{variant}.json`.
13
+
14
+ Track A's actual runner lands in Phase 3; Track B's in Phase 4. Until then
15
+ this script writes stub reports so the cache and diff plumbing can be
16
+ exercised end-to-end.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import json
22
+ import sys
23
+ import time
24
+ from datetime import datetime, timezone
25
+ from pathlib import Path
26
+
27
+ REPO_ROOT = Path(__file__).resolve().parent.parent
28
+ sys.path.insert(0, str(REPO_ROOT / "scripts"))
29
+
30
+ from _lib import bench_ab_cache # type: ignore[import-not-found] # noqa: E402
31
+
32
+ REPORTS_DIR = REPO_ROOT / "internal" / "bench" / "reports" / "ab"
33
+ CORPUS_DIR = REPO_ROOT / "internal" / "bench" / "corpora"
34
+ CLONES_DIR = REPO_ROOT / "internal" / "bench" / "ab" / "clones"
35
+
36
+ # Supported corpora (created in Phases 3 + 4).
37
+ KNOWN_CORPORA = ("ab-tracka", "ab-trackb")
38
+
39
+ REPORT_SCHEMA_VERSION = "ab-bench/0.1"
40
+
41
+
42
+ def utc_stamp() -> str:
43
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
44
+
45
+
46
+ def corpus_path(corpus: str) -> Path:
47
+ return CORPUS_DIR / f"{corpus}.yaml"
48
+
49
+
50
+ def report_path(stamp: str, corpus: str, variant: str) -> Path:
51
+ return REPORTS_DIR / f"{stamp}-{corpus}-{variant}.json"
52
+
53
+
54
+ def ensure_clone(variant: str) -> Path:
55
+ """Make sure the clone exists; do NOT --refresh — that's a user-driven choice."""
56
+ target = CLONES_DIR / variant
57
+ if not target.exists():
58
+ # Lazy-import so the dependency stays explicit
59
+ import importlib.util
60
+
61
+ spec = importlib.util.spec_from_file_location(
62
+ "bench_ab_clone", REPO_ROOT / "scripts" / "bench_ab_clone.py"
63
+ )
64
+ if spec is None or spec.loader is None:
65
+ raise RuntimeError("cannot load bench_ab_clone helper")
66
+ module = importlib.util.module_from_spec(spec)
67
+ spec.loader.exec_module(module)
68
+ module.clone(variant, refresh=False) # type: ignore[attr-defined]
69
+ return target
70
+
71
+
72
+ def run_track_stub(variant: str, corpus: str, clone_root: Path) -> dict[str, object]:
73
+ """Phase-2 placeholder.
74
+
75
+ Returns a minimal results block. Phase 3 + Phase 4 plug their real
76
+ runners into this dispatch.
77
+ """
78
+ return {
79
+ "track": corpus,
80
+ "status": "stub",
81
+ "note": (
82
+ "Phase 2 plumbing only. The real runner lands in Phase 3 (Track A) "
83
+ "or Phase 4 (Track B). See road-to-package-impact-benchmark.md."
84
+ ),
85
+ "clone_root": str(clone_root.relative_to(REPO_ROOT)),
86
+ "variant": variant,
87
+ }
88
+
89
+
90
+ def write_report(
91
+ *,
92
+ variant: str,
93
+ corpus: str,
94
+ stamp: str,
95
+ cache_key: bench_ab_cache.CacheKey,
96
+ results: dict[str, object],
97
+ duration_seconds: float,
98
+ ) -> Path:
99
+ REPORTS_DIR.mkdir(parents=True, exist_ok=True)
100
+ report = {
101
+ "schema": REPORT_SCHEMA_VERSION,
102
+ "stamp": stamp,
103
+ "variant": variant,
104
+ "corpus": corpus,
105
+ "cache_key": cache_key.to_dict(),
106
+ "duration_seconds": round(duration_seconds, 3),
107
+ "results": results,
108
+ }
109
+ json_path = report_path(stamp, corpus, variant)
110
+ json_path.write_text(json.dumps(report, indent=2) + "\n")
111
+ md_path = json_path.with_suffix(".md")
112
+ md_path.write_text(render_markdown(report))
113
+ return json_path
114
+
115
+
116
+ def render_markdown(report: dict[str, object]) -> str:
117
+ lines = [
118
+ f"# A/B Bench Report — {report['variant']} · {report['corpus']}",
119
+ "",
120
+ f"- Stamp: `{report['stamp']}`",
121
+ f"- Duration: {report['duration_seconds']}s",
122
+ "",
123
+ "## Cache key",
124
+ "",
125
+ ]
126
+ for k, v in (report.get("cache_key") or {}).items(): # type: ignore[union-attr]
127
+ lines.append(f"- `{k}`: `{v}`")
128
+ lines.append("")
129
+ lines.append("## Results")
130
+ lines.append("")
131
+ lines.append("```json")
132
+ lines.append(json.dumps(report.get("results"), indent=2))
133
+ lines.append("```")
134
+ lines.append("")
135
+ return "\n".join(lines)
136
+
137
+
138
+ def parse_args(argv: list[str]) -> argparse.Namespace:
139
+ parser = argparse.ArgumentParser(
140
+ description="Run one arm of the package-impact A/B bench."
141
+ )
142
+ parser.add_argument(
143
+ "--variant",
144
+ choices=("with", "without"),
145
+ required=True,
146
+ help="Which target clone to run against.",
147
+ )
148
+ parser.add_argument(
149
+ "--corpus",
150
+ choices=KNOWN_CORPORA,
151
+ required=True,
152
+ help="Which corpus to execute.",
153
+ )
154
+ parser.add_argument(
155
+ "--non-interactive",
156
+ action="store_true",
157
+ help="Never prompt; assume defaults on cache decisions.",
158
+ )
159
+ parser.add_argument(
160
+ "--reuse-cache",
161
+ action="store_true",
162
+ help=(
163
+ "If a fresh cached `without` report exists, skip re-running and "
164
+ "exit 0 without writing a new report. Only meaningful for "
165
+ "--variant without."
166
+ ),
167
+ )
168
+ return parser.parse_args(argv)
169
+
170
+
171
+ def main(argv: list[str] | None = None) -> int:
172
+ args = parse_args(argv if argv is not None else sys.argv[1:])
173
+
174
+ corpus_file = corpus_path(args.corpus)
175
+ if not corpus_file.exists():
176
+ sys.stdout.write(
177
+ f"bench_ab_run: corpus '{args.corpus}' missing at {corpus_file} — "
178
+ "Phase 3 (track A) or Phase 4 (track B) author it. Writing a "
179
+ "placeholder run with the synthetic corpus hash so cache plumbing "
180
+ "remains exercisable.\n"
181
+ )
182
+
183
+ cache_key_value = bench_ab_cache.CacheKey(
184
+ corpus_hash=(
185
+ bench_ab_cache.hash_file(corpus_file)
186
+ if corpus_file.exists()
187
+ else "missing-corpus"
188
+ ),
189
+ claude_cli_version=bench_ab_cache.claude_cli_version(),
190
+ target_shape_hash=bench_ab_cache.target_shape_hash(),
191
+ )
192
+
193
+ if args.variant == "without" and args.reuse_cache and corpus_file.exists():
194
+ lookup = bench_ab_cache.lookup(corpus_file)
195
+ if lookup.fresh and lookup.report_path is not None:
196
+ sys.stdout.write(
197
+ f"bench_ab_run: reusing fresh cached `without` report at "
198
+ f"{lookup.report_path.relative_to(REPO_ROOT)}\n"
199
+ )
200
+ return 0
201
+ if lookup.found and not lookup.fresh:
202
+ sys.stdout.write(
203
+ f"bench_ab_run: cached `without` report stale ({lookup.reason})\n"
204
+ )
205
+ if args.non_interactive:
206
+ sys.stdout.write(
207
+ "bench_ab_run: --non-interactive — reusing stale baseline "
208
+ "and flagging the run.\n"
209
+ )
210
+ return 0
211
+ sys.stdout.write(
212
+ "bench_ab_run: continuing with a fresh run "
213
+ "(set --reuse-cache off and use --non-interactive to keep the stale baseline)\n"
214
+ )
215
+
216
+ clone_root = ensure_clone(args.variant)
217
+ started = time.monotonic()
218
+ results = run_track_stub(args.variant, args.corpus, clone_root)
219
+ duration = time.monotonic() - started
220
+ path = write_report(
221
+ variant=args.variant,
222
+ corpus=args.corpus,
223
+ stamp=utc_stamp(),
224
+ cache_key=cache_key_value,
225
+ results=results,
226
+ duration_seconds=duration,
227
+ )
228
+ sys.stdout.write(
229
+ f"bench_ab_run: wrote {path.relative_to(REPO_ROOT)}\n"
230
+ )
231
+ return 0
232
+
233
+
234
+ if __name__ == "__main__":
235
+ raise SystemExit(main())
@@ -0,0 +1,369 @@
1
+ #!/usr/bin/env python3
2
+ """Track B — task runner for the package-impact A/B bench.
3
+
4
+ Phase 4 Step 2 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
5
+
6
+ For each task in `internal/bench/corpora/ab-trackb.yaml`, in each variant:
7
+
8
+ 1. Snapshot the variant clone's file tree.
9
+ 2. Invoke the `claude` CLI with the task prompt — OR dry-run, depending
10
+ on `--mode`.
11
+ 3. Capture the transcript, tool-call events, wall-time, and (if available)
12
+ token + cost counts.
13
+ 4. Snapshot the post-run tree.
14
+ 5. Score the task via scripts/_lib/bench_ab_scoring.py.
15
+
16
+ Modes:
17
+
18
+ - `dry-run` (default) — record the would-run shell command, write a stub
19
+ transcript naming the variant, score against the unchanged tree. The
20
+ result is structural-zero for every check that requires a file write,
21
+ but the scoring + reporting pipeline runs end-to-end. This is what the
22
+ bench produces in CI by default — fast, free, repeatable.
23
+ - `live` — actually invoke the `claude` CLI with `--print` (one-shot
24
+ mode) and the task prompt. Reads `CLAUDE_CLI` from env if set, falls
25
+ back to `claude` on PATH. Captures stdout as the transcript. Honors
26
+ `--samples N` for repeated runs.
27
+
28
+ The runner ALWAYS resets the clone to a clean state before each task and
29
+ ALWAYS records the mode in the report header so a reader can never mistake
30
+ a dry-run report for a real measurement.
31
+ """
32
+ from __future__ import annotations
33
+
34
+ import argparse
35
+ import hashlib
36
+ import json
37
+ import os
38
+ import shutil
39
+ import subprocess
40
+ import sys
41
+ import time
42
+ from datetime import datetime, timezone
43
+ from pathlib import Path
44
+
45
+ REPO_ROOT = Path(__file__).resolve().parent.parent
46
+ sys.path.insert(0, str(REPO_ROOT / "scripts"))
47
+
48
+ from _lib import bench_ab_cache # type: ignore[import-not-found] # noqa: E402
49
+ from _lib import bench_ab_scoring # type: ignore[import-not-found] # noqa: E402
50
+
51
+ try:
52
+ import yaml
53
+ except ImportError:
54
+ sys.stderr.write("bench_ab_task_runner: PyYAML required (pip install pyyaml)\n")
55
+ raise SystemExit(2)
56
+
57
+ CORPUS_PATH = REPO_ROOT / "internal" / "bench" / "corpora" / "ab-trackb.yaml"
58
+ CLONES_DIR = REPO_ROOT / "internal" / "bench" / "ab" / "clones"
59
+ REPORTS_DIR = REPO_ROOT / "internal" / "bench" / "reports" / "ab"
60
+
61
+ # How far we descend into a clone when snapshotting. The fixture is shallow.
62
+ SNAPSHOT_MAX_DEPTH = 6
63
+
64
+
65
+ def utc_stamp() -> str:
66
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
67
+
68
+
69
+ def snapshot_clone(clone_root: Path, *, max_depth: int = SNAPSHOT_MAX_DEPTH) -> dict[str, str]:
70
+ """Return {relpath: sha256-short} for every fixture file under the clone.
71
+
72
+ Skips the agent-config surface (.claude, .augment, AGENTS.md, CLAUDE.md, manifest)
73
+ because that's the variant axis, not the task surface.
74
+ """
75
+ skip_roots = {".claude", ".augment"}
76
+ skip_files = {"AGENTS.md", "CLAUDE.md", ".bench-ab-manifest.json"}
77
+ out: dict[str, str] = {}
78
+ for path in sorted(clone_root.rglob("*")):
79
+ if not path.is_file():
80
+ continue
81
+ rel = path.relative_to(clone_root)
82
+ parts = rel.parts
83
+ if parts and parts[0] in skip_roots:
84
+ continue
85
+ if rel.as_posix() in skip_files:
86
+ continue
87
+ if len(parts) > max_depth:
88
+ continue
89
+ h = hashlib.sha256()
90
+ try:
91
+ h.update(path.read_bytes())
92
+ except OSError:
93
+ continue
94
+ out[rel.as_posix()] = h.hexdigest()[:16]
95
+ return out
96
+
97
+
98
+ def reset_clone(variant: str) -> Path:
99
+ """Rebuild the clone so each task starts from the same state."""
100
+ import importlib.util
101
+
102
+ spec = importlib.util.spec_from_file_location(
103
+ "bench_ab_clone", REPO_ROOT / "scripts" / "bench_ab_clone.py"
104
+ )
105
+ if spec is None or spec.loader is None:
106
+ raise RuntimeError("cannot load bench_ab_clone helper")
107
+ module = importlib.util.module_from_spec(spec)
108
+ spec.loader.exec_module(module)
109
+ return module.clone(variant, refresh=True) # type: ignore[attr-defined]
110
+
111
+
112
+ def claude_executable() -> str | None:
113
+ """Resolve the claude CLI binary (env override → PATH)."""
114
+ override = os.environ.get("CLAUDE_CLI")
115
+ if override:
116
+ return override
117
+ if shutil.which("claude") is not None:
118
+ return "claude"
119
+ return None
120
+
121
+
122
+ def run_live(task: dict, clone_root: Path, *, timeout_s: int) -> dict:
123
+ """Invoke claude in print/one-shot mode against the task prompt."""
124
+ binary = claude_executable()
125
+ if binary is None:
126
+ return {
127
+ "mode": "live-skipped",
128
+ "reason": "claude CLI not found; set CLAUDE_CLI or install it",
129
+ "transcript": "",
130
+ "exit_code": None,
131
+ "wall_time_seconds": 0.0,
132
+ }
133
+ prompt = task.get("prompt", "")
134
+ cmd = [binary, "--print", "--", prompt]
135
+ started = time.monotonic()
136
+ try:
137
+ proc = subprocess.run(
138
+ cmd,
139
+ cwd=clone_root,
140
+ capture_output=True,
141
+ text=True,
142
+ timeout=timeout_s,
143
+ check=False,
144
+ )
145
+ except subprocess.TimeoutExpired as exc:
146
+ return {
147
+ "mode": "live",
148
+ "reason": f"timeout after {timeout_s}s",
149
+ "transcript": (exc.stdout or "") + "\n[TIMEOUT]",
150
+ "exit_code": -1,
151
+ "wall_time_seconds": round(time.monotonic() - started, 3),
152
+ }
153
+ duration = time.monotonic() - started
154
+ return {
155
+ "mode": "live",
156
+ "reason": "ok",
157
+ "transcript": proc.stdout + "\n" + proc.stderr,
158
+ "exit_code": proc.returncode,
159
+ "wall_time_seconds": round(duration, 3),
160
+ }
161
+
162
+
163
+ def run_dry(task: dict, clone_root: Path, variant: str) -> dict:
164
+ """Record what would have run; produce a deterministic stub transcript.
165
+
166
+ The stub deliberately does NOT echo the user prompt: doing so would let
167
+ transcript-keyword criteria spuriously match against the prompt text
168
+ instead of the agent's response. The stub is therefore inert for every
169
+ `transcript_contains_*` criterion, which is the honest dry-run signal.
170
+ """
171
+ stub_transcript = (
172
+ "[bench_ab_task_runner dry-run]\n"
173
+ f"variant={variant}\n"
174
+ f"clone={clone_root}\n"
175
+ f"task_id={task.get('id')}\n"
176
+ "[no claude invocation; --mode live to execute for real]\n"
177
+ )
178
+ return {
179
+ "mode": "dry-run",
180
+ "reason": "ok",
181
+ "transcript": stub_transcript,
182
+ "exit_code": 0,
183
+ "wall_time_seconds": 0.0,
184
+ }
185
+
186
+
187
+ def count_ask_events(transcript: str) -> dict[str, int]:
188
+ """Crude ask-vs-act heuristic over the transcript."""
189
+ if not transcript:
190
+ return {"asked": 0, "acted_with_commit": 0, "ratio": 0}
191
+ lt = transcript.lower()
192
+ ask_markers = ["should i", "do you want", "shall i", "soll ich", "möchtest du"]
193
+ asked = sum(lt.count(m) for m in ask_markers)
194
+ commit_markers = ["git commit", "git push", "gh pr create", "gh pr merge"]
195
+ acted = sum(lt.count(m) for m in commit_markers)
196
+ total = asked + acted
197
+ ratio = round(asked / total, 3) if total else 0
198
+ return {"asked": asked, "acted_with_commit": acted, "ratio": ratio}
199
+
200
+
201
+ def per_category_aggregate(per_task: list[dict]) -> dict[str, dict]:
202
+ by_cat: dict[str, list[dict]] = {}
203
+ for entry in per_task:
204
+ by_cat.setdefault(entry.get("category", "unknown"), []).append(entry)
205
+ out: dict[str, dict] = {}
206
+ for cat, entries in by_cat.items():
207
+ passed = sum(1 for e in entries if e.get("score", {}).get("passed"))
208
+ total = len(entries)
209
+ out[cat] = {
210
+ "passed": passed,
211
+ "total": total,
212
+ "completion_rate": round(passed / total, 4) if total else 0,
213
+ "mean_wall_time": round(
214
+ sum(e.get("wall_time_seconds", 0) for e in entries) / total, 3
215
+ )
216
+ if total
217
+ else 0,
218
+ }
219
+ return out
220
+
221
+
222
+ def write_report(
223
+ variant: str,
224
+ *,
225
+ mode: str,
226
+ per_task: list[dict],
227
+ duration: float,
228
+ ) -> Path:
229
+ REPORTS_DIR.mkdir(parents=True, exist_ok=True)
230
+ cache_key = bench_ab_cache.CacheKey(
231
+ corpus_hash=bench_ab_cache.hash_file(CORPUS_PATH),
232
+ claude_cli_version=bench_ab_cache.claude_cli_version(),
233
+ target_shape_hash=bench_ab_cache.target_shape_hash(),
234
+ )
235
+ total = len(per_task)
236
+ passed = sum(1 for e in per_task if e.get("score", {}).get("passed"))
237
+ results = {
238
+ "mode": mode,
239
+ "completion_rate": round(passed / total, 4) if total else 0,
240
+ "passed": passed,
241
+ "total": total,
242
+ "per_category": per_category_aggregate(per_task),
243
+ "mean_wall_time": round(
244
+ sum(e.get("wall_time_seconds", 0) for e in per_task) / total, 3
245
+ )
246
+ if total
247
+ else 0,
248
+ "ask_vs_act_ratio": round(
249
+ sum(e.get("ask_events", {}).get("ratio", 0) for e in per_task) / total, 3
250
+ )
251
+ if total
252
+ else 0,
253
+ "per_task": per_task,
254
+ }
255
+ stamp = utc_stamp()
256
+ payload = {
257
+ "schema": "ab-bench/0.1",
258
+ "stamp": stamp,
259
+ "variant": variant,
260
+ "corpus": "ab-trackb",
261
+ "cache_key": cache_key.to_dict(),
262
+ "duration_seconds": round(duration, 3),
263
+ "results": results,
264
+ }
265
+ path = REPORTS_DIR / f"{stamp}-ab-trackb-{variant}.json"
266
+ path.write_text(json.dumps(payload, indent=2) + "\n")
267
+ md = path.with_suffix(".md")
268
+ md.write_text(
269
+ f"# Track B · {variant} · {mode}\n\n"
270
+ f"- Stamp: `{stamp}`\n"
271
+ f"- Completion rate: **{results['completion_rate'] * 100:.1f}%**"
272
+ f" ({passed}/{total})\n"
273
+ f"- Mean wall-time: {results['mean_wall_time']}s\n"
274
+ f"- Ask vs. act ratio: {results['ask_vs_act_ratio']}\n"
275
+ f"\n## Per-category\n\n"
276
+ + "\n".join(
277
+ f"- `{cat}` — {info['passed']}/{info['total']} "
278
+ f"({info['completion_rate'] * 100:.1f}%)"
279
+ for cat, info in results["per_category"].items()
280
+ )
281
+ + "\n"
282
+ )
283
+ return path
284
+
285
+
286
+ def run_variant(variant: str, tasks: list[dict], *, mode: str, timeout_s: int) -> dict:
287
+ started = time.monotonic()
288
+ per_task: list[dict] = []
289
+ for task in tasks:
290
+ clone_root = reset_clone(variant)
291
+ pre = snapshot_clone(clone_root)
292
+ if mode == "live":
293
+ run_result = run_live(task, clone_root, timeout_s=timeout_s)
294
+ else:
295
+ run_result = run_dry(task, clone_root, variant)
296
+ post = snapshot_clone(clone_root)
297
+ score = bench_ab_scoring.score_task(
298
+ task,
299
+ pre_snapshot=pre,
300
+ post_snapshot=post,
301
+ clone_root=clone_root,
302
+ transcript=run_result.get("transcript", ""),
303
+ )
304
+ per_task.append(
305
+ {
306
+ "id": task.get("id"),
307
+ "category": task.get("category"),
308
+ "score": score,
309
+ "wall_time_seconds": run_result.get("wall_time_seconds", 0.0),
310
+ "exit_code": run_result.get("exit_code"),
311
+ "mode": run_result.get("mode", mode),
312
+ "reason": run_result.get("reason", ""),
313
+ "ask_events": count_ask_events(run_result.get("transcript", "")),
314
+ }
315
+ )
316
+ duration = time.monotonic() - started
317
+ path = write_report(variant, mode=mode, per_task=per_task, duration=duration)
318
+ sys.stdout.write(
319
+ f"bench_ab_task_runner: {variant} ({mode}) → "
320
+ f"{sum(1 for e in per_task if e['score']['passed'])}/{len(per_task)} "
321
+ f"passed — {path.relative_to(REPO_ROOT)}\n"
322
+ )
323
+ return {"path": path, "per_task": per_task, "duration": duration}
324
+
325
+
326
+ def parse_args(argv: list[str]) -> argparse.Namespace:
327
+ parser = argparse.ArgumentParser(description="Run Track B tasks per variant.")
328
+ parser.add_argument(
329
+ "--variant",
330
+ choices=("with", "without", "both"),
331
+ default="both",
332
+ help="Which variant to run (default: both).",
333
+ )
334
+ parser.add_argument(
335
+ "--mode",
336
+ choices=("dry-run", "live"),
337
+ default="dry-run",
338
+ help=(
339
+ "dry-run: stub transcript, no CLI invocation (fast, free). "
340
+ "live: invoke `claude --print` per task (cost-bearing)."
341
+ ),
342
+ )
343
+ parser.add_argument(
344
+ "--timeout",
345
+ type=int,
346
+ default=120,
347
+ help="Live mode: per-task timeout in seconds (default 120).",
348
+ )
349
+ return parser.parse_args(argv)
350
+
351
+
352
+ def main(argv: list[str] | None = None) -> int:
353
+ args = parse_args(argv if argv is not None else sys.argv[1:])
354
+ if not CORPUS_PATH.exists():
355
+ sys.stderr.write(f"bench_ab_task_runner: corpus missing at {CORPUS_PATH}\n")
356
+ return 1
357
+ data = yaml.safe_load(CORPUS_PATH.read_text())
358
+ tasks = data.get("tasks") or []
359
+ if not tasks:
360
+ sys.stderr.write("bench_ab_task_runner: corpus has no tasks\n")
361
+ return 1
362
+ variants = ("with", "without") if args.variant == "both" else (args.variant,)
363
+ for variant in variants:
364
+ run_variant(variant, tasks, mode=args.mode, timeout_s=args.timeout)
365
+ return 0
366
+
367
+
368
+ if __name__ == "__main__":
369
+ raise SystemExit(main())