npm - @event4u/agent-config - Versions diffs - 3.2.0 → 4.1.0 - Mend

@event4u/agent-config 3.2.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (607) hide show

package/.agent-src/README.md +2 -2
package/.agent-src/commands/agent-handoff.md +31 -2
package/.agent-src/commands/agent-status.md +6 -6
package/.agent-src/commands/agents/audit.md +8 -8
package/.agent-src/commands/agents/init.md +25 -1
package/.agent-src/commands/agents/optimize.md +3 -3
package/.agent-src/commands/agents/user.md +1 -1
package/.agent-src/commands/agents.md +1 -1
package/.agent-src/commands/analyze-reference-repo.md +1 -1
package/.agent-src/commands/check-current-md.md +8 -8
package/.agent-src/commands/{compress.md → condense.md} +55 -55
package/.agent-src/commands/context/create.md +7 -4
package/.agent-src/commands/context/refactor.md +3 -1
package/.agent-src/commands/feature/dev.md +1 -1
package/.agent-src/commands/feature/explore.md +1 -1
package/.agent-src/commands/feature/plan.md +10 -8
package/.agent-src/commands/feature/refactor.md +3 -1
package/.agent-src/commands/feature/roadmap.md +7 -4
package/.agent-src/commands/fix/portability.md +3 -3
package/.agent-src/commands/fix/refs.md +4 -4
package/.agent-src/commands/ghostwriter.md +2 -2
package/.agent-src/commands/memory/learn-low-impact.md +3 -3
package/.agent-src/commands/module/explore.md +34 -8
package/.agent-src/commands/optimize/agents-dir.md +9 -7
package/.agent-src/commands/optimize/augmentignore.md +2 -2
package/.agent-src/commands/optimize/skills.md +9 -9
package/.agent-src/commands/post-as.md +1 -1
package/.agent-src/commands/project-analyze.md +2 -2
package/.agent-src/commands/project-health.md +3 -2
package/.agent-src/commands/research/deep.md +1 -1
package/.agent-src/commands/research/report.md +1 -1
package/.agent-src/commands/research.md +1 -1
package/.agent-src/commands/roadmap/ai-council.md +1 -1
package/.agent-src/commands/roadmap/create.md +9 -4
package/.agent-src/commands/rule-compliance-audit.md +1 -1
package/.agent-src/commands/upstream-contribute.md +14 -14
package/.agent-src/commands/video/from-script.md +1 -1
package/.agent-src/commands/video/scene.md +1 -1
package/.agent-src/commands/video/stitch.md +1 -1
package/.agent-src/commands/video/storyboard.md +1 -1
package/.agent-src/commands/video.md +1 -1
package/.agent-src/contexts/augment-infrastructure.md +1 -1
package/.agent-src/contexts/authority/commit-mechanics.md +15 -0
package/.agent-src/contexts/authority/kernel-rule-edits.md +3 -3
package/.agent-src/contexts/authority/scope-mechanics.md +1 -1
package/.agent-src/contexts/communication/rules-auto/augment-source-of-truth-mechanics.md +28 -28
package/.agent-src/contexts/communication/rules-auto/skill-quality-mechanics.md +4 -4
package/.agent-src/contexts/communication/rules-auto/think-before-action-mechanics.md +2 -2
package/.agent-src/contexts/contracts/artifact-engagement-flow.md +6 -6
package/.agent-src/contexts/contracts/command-suggestion-flow.md +3 -3
package/.agent-src/contexts/contracts/emergency-triage-block.md +4 -4
package/.agent-src/contexts/contracts/frugality-charter.md +3 -3
package/.agent-src/contexts/documentation-hierarchy.md +14 -7
package/.agent-src/contexts/execution/autonomy-examples.md +1 -1
package/.agent-src/contexts/execution/cheap-question-mechanics.md +39 -2
package/.agent-src/contexts/execution/roadmap-process-loop.md +28 -5
package/.agent-src/contexts/override-system.md +5 -5
package/.agent-src/ghostwriter/fictional-fixture-v1.md +1 -1
package/.agent-src/personas/advisors/first-principles.md +1 -1
package/.agent-src/personas/hollywood-director.md +1 -1
package/.agent-src/rules/architecture.md +5 -1
package/.agent-src/rules/augment-edit-discipline.md +5 -5
package/.agent-src/rules/augment-source-of-truth.md +15 -15
package/.agent-src/rules/commit-conventions.md +1 -1
package/.agent-src/rules/commit-policy.md +10 -0
package/.agent-src/rules/domain-adoption-policy.md +3 -3
package/.agent-src/rules/fast-path-marker-visibility.md +3 -3
package/.agent-src/rules/finance-safety-floor.md +1 -1
package/.agent-src/rules/framework-neutrality-in-generic-skills.md +8 -8
package/.agent-src/rules/git-history-discipline.md +1 -1
package/.agent-src/rules/improve-before-implement.md +2 -2
package/.agent-src/rules/language-and-tone.md +2 -2
package/.agent-src/rules/media-governance-routing.md +5 -5
package/.agent-src/rules/no-attribution-footers.md +1 -0
package/.agent-src/rules/no-cheap-questions.md +3 -0
package/.agent-src/rules/no-decorative-emojis-in-git-surfaces.md +111 -0
package/.agent-src/rules/no-pr-progress-comments.md +118 -0
package/.agent-src/rules/no-roadmap-references.md +3 -3
package/.agent-src/rules/non-destructive-by-default.md +1 -1
package/.agent-src/rules/persona-governance.md +3 -3
package/.agent-src/rules/preservation-guard.md +15 -15
package/.agent-src/rules/roadmap-ci-steps-policy.md +7 -3
package/.agent-src/rules/rule-type-governance.md +1 -1
package/.agent-src/rules/skill-quality.md +1 -1
package/.agent-src/rules/{caveman-speak.md → telegraph-speak.md} +15 -15
package/.agent-src/rules/token-optimizer-maintenance.md +6 -6
package/.agent-src/skills/agent-docs-writing/SKILL.md +17 -11
package/.agent-src/skills/agents-md-thin-root/SKILL.md +9 -9
package/.agent-src/skills/check-refs/SKILL.md +2 -2
package/.agent-src/skills/code-refactoring/SKILL.md +2 -2
package/.agent-src/skills/command-writing/SKILL.md +19 -19
package/.agent-src/skills/comp-banding/SKILL.md +1 -1
package/.agent-src/skills/condense-memory/SKILL.md +131 -0
package/.agent-src/skills/context-authoring/SKILL.md +2 -2
package/.agent-src/skills/context-document/SKILL.md +5 -3
package/.agent-src/skills/copilot-agents-optimization/SKILL.md +3 -3
package/.agent-src/skills/description-assist/SKILL.md +2 -2
package/.agent-src/skills/git-workflow/SKILL.md +1 -1
package/.agent-src/skills/guideline-writing/SKILL.md +5 -5
package/.agent-src/skills/learning-to-rule-or-skill/SKILL.md +4 -4
package/.agent-src/skills/lint-skills/SKILL.md +3 -3
package/.agent-src/skills/md-language-check/SKILL.md +2 -2
package/.agent-src/skills/module-detect-on-the-fly/SKILL.md +138 -0
package/.agent-src/skills/module-management/SKILL.md +166 -94
package/.agent-src/skills/override-management/SKILL.md +1 -1
package/.agent-src/skills/persona-writing/SKILL.md +5 -5
package/.agent-src/skills/positioning-strategy/SKILL.md +1 -1
package/.agent-src/skills/project-docs/SKILL.md +6 -4
package/.agent-src/skills/readme-reviewer/SKILL.md +2 -2
package/.agent-src/skills/roadmap-management/SKILL.md +13 -1
package/.agent-src/skills/roadmap-writing/SKILL.md +4 -2
package/.agent-src/skills/rule-refactor/SKILL.md +5 -5
package/.agent-src/skills/rule-writing/SKILL.md +18 -18
package/.agent-src/skills/script-writing/SKILL.md +1 -1
package/.agent-src/skills/skill-improvement-pipeline/SKILL.md +6 -6
package/.agent-src/skills/skill-management/SKILL.md +21 -21
package/.agent-src/skills/skill-reviewer/SKILL.md +2 -2
package/.agent-src/skills/skill-writing/SKILL.md +8 -8
package/.agent-src/skills/skill-writing/evals/triggers.json +1 -1
package/.agent-src/skills/token-optimizer/SKILL.md +4 -4
package/.agent-src/skills/unit-economics-modeling/SKILL.md +1 -1
package/.agent-src/skills/upstream-contribute/SKILL.md +17 -17
package/.agent-src/templates/AGENTS.md +1 -0
package/.agent-src/templates/agent-settings.md +24 -13
package/.agent-src/templates/agents/agent-project-settings.example.yml +61 -2
package/.agent-src/templates/command.md +5 -5
package/.agent-src/templates/contexts.md +1 -1
package/.agent-src/templates/copilot-instructions.md +8 -8
package/.agent-src/templates/features.md +1 -1
package/.agent-src/templates/hooks/pre-commit-frontmatter +2 -2
package/.agent-src/templates/hooks/pre-commit-roadmap-progress +3 -3
package/.agent-src/templates/persona.md +2 -2
package/.agent-src/templates/roadmaps.md +1 -1
package/.agent-src/templates/rule.md +13 -13
package/.agent-src/templates/scripts/memory_lookup.py +1 -1
package/.agent-src/templates/scripts/memory_status.py +2 -2
package/.agent-src/templates/scripts/work_engine/_lib/agent_settings.py +195 -1
package/.agent-src/templates/scripts/work_engine/orchestration.py +1 -1
package/.agent-src/templates/skill-archive-note.md +5 -5
package/.agent-src/templates/skill.md +1 -1
package/.claude-plugin/marketplace.json +4 -4
package/AGENTS.md +16 -16
package/CHANGELOG.md +204 -2
package/CONTRIBUTING.md +31 -12
package/README.md +18 -10
package/config/agent-settings.template.yml +22 -2
package/config/discovery/unassigned-artefacts.yml +24 -24
package/config/profiles/full.ini +1 -1
package/dist/cli/agent-config.js +52 -3
package/dist/cli/agent-config.js.map +1 -1
package/dist/cli/commands/uiServe.js +9 -0
package/dist/cli/commands/uiServe.js.map +1 -1
package/dist/cli/registry.js +2 -1
package/dist/cli/registry.js.map +1 -1
package/dist/discovery/deprecation-report.md +1 -1
package/dist/discovery/discovery-manifest.json +649 -606
package/dist/discovery/discovery-manifest.json.sha256 +1 -1
package/dist/discovery/discovery-manifest.summary.md +4 -4
package/dist/discovery/orphan-report.md +1 -1
package/dist/discovery/packs.json +439 -437
package/dist/discovery/trust-report.md +5 -5
package/dist/discovery/workspaces.json +450 -448
package/dist/install/apply.js +238 -0
package/dist/install/apply.js.map +1 -0
package/dist/install/atomic.js +92 -0
package/dist/install/atomic.js.map +1 -0
package/dist/install/bridges/augment.js +20 -0
package/dist/install/bridges/augment.js.map +1 -0
package/dist/install/bridges/claude.js +44 -0
package/dist/install/bridges/claude.js.map +1 -0
package/dist/install/bridges/cline.js +69 -0
package/dist/install/bridges/cline.js.map +1 -0
package/dist/install/bridges/copilot.js +28 -0
package/dist/install/bridges/copilot.js.map +1 -0
package/dist/install/bridges/cursor.js +34 -0
package/dist/install/bridges/cursor.js.map +1 -0
package/dist/install/bridges/gemini.js +39 -0
package/dist/install/bridges/gemini.js.map +1 -0
package/dist/install/bridges/index.js +88 -0
package/dist/install/bridges/index.js.map +1 -0
package/dist/install/bridges/marker-content.js +153 -0
package/dist/install/bridges/marker-content.js.map +1 -0
package/dist/install/bridges/markers.js +42 -0
package/dist/install/bridges/markers.js.map +1 -0
package/dist/install/bridges/types.js +31 -0
package/dist/install/bridges/types.js.map +1 -0
package/dist/install/bridges/vscode.js +26 -0
package/dist/install/bridges/vscode.js.map +1 -0
package/dist/install/bridges/windsurf.js +35 -0
package/dist/install/bridges/windsurf.js.map +1 -0
package/dist/install/conflict.js +196 -0
package/dist/install/conflict.js.map +1 -0
package/dist/install/detect.js +218 -0
package/dist/install/detect.js.map +1 -0
package/dist/install/paths.js +82 -0
package/dist/install/paths.js.map +1 -0
package/dist/install/plan.js +157 -0
package/dist/install/plan.js.map +1 -0
package/dist/install/txlog.js +140 -0
package/dist/install/txlog.js.map +1 -0
package/dist/install/types.js +19 -0
package/dist/install/types.js.map +1 -0
package/dist/install/wizard-plan.js +184 -0
package/dist/install/wizard-plan.js.map +1 -0
package/dist/mcp/registry-manifest.json +4 -4
package/dist/router.json +67 -19
package/dist/server/app.js +6 -0
package/dist/server/app.js.map +1 -1
package/dist/server/routes/install.js +547 -0
package/dist/server/routes/install.js.map +1 -0
package/dist/server/routes/wizard.js +301 -6
package/dist/server/routes/wizard.js.map +1 -1
package/dist/server/routes/workspace.js +396 -0
package/dist/server/routes/workspace.js.map +1 -0
package/dist/server/schemas/settings.js +4 -3
package/dist/server/schemas/settings.js.map +1 -1
package/dist/ui/assets/index-BXZILUxe.css +1 -0
package/dist/ui/assets/index-DLEuEW1V.js +35 -0
package/dist/ui/assets/index-DLEuEW1V.js.map +1 -0
package/dist/ui/index.html +2 -2
package/docs/MIGRATION.md +1 -1
package/docs/adrs/cost/0001-hard-stop-hook.md +1 -1
package/docs/adrs/router/0001-three-tier-routing.md +4 -4
package/docs/adrs/schema/0001-json-schema-frontmatter.md +1 -1
package/docs/adrs/smoke/0001-per-tier-smoke-scripts.md +4 -4
package/docs/adrs/{caveman → telegraph}/0001-default-off-until-bench.md +9 -9
package/docs/adrs/telegraph/README.md +9 -0
package/docs/architecture/augment-projection.md +4 -4
package/docs/architecture/claude-bundle.md +1 -1
package/docs/architecture/current-onboard-baseline.md +3 -3
package/docs/architecture/multi-tool-projection.md +10 -10
package/docs/architecture/source-projection.md +27 -27
package/docs/architecture.md +19 -15
package/docs/archive/CHANGELOG-pre-2.11.0.md +2 -2
package/docs/archive/CHANGELOG-pre-2.15.0.md +3 -3
package/docs/archive/CHANGELOG-pre-2.16.0.md +1 -1
package/docs/archive/CHANGELOG-pre-2.2.0.md +70 -70
package/docs/archive/CHANGELOG-pre-2.20.0.md +2 -2
package/docs/archive/CHANGELOG-pre-2.25.0.md +15 -15
package/docs/archive/CHANGELOG-pre-3.0.0.md +4 -4
package/docs/archive/CHANGELOG-pre-3.1.0.md +2 -2
package/docs/archive/CHANGELOG-pre-3.2.0.md +3 -3
package/docs/benchmark.md +65 -0
package/docs/benchmarks.md +18 -18
package/docs/catalog.md +17 -15
package/docs/contracts/CHANGELOG-conventions.md +2 -2
package/docs/contracts/STABILITY.md +2 -2
package/docs/contracts/adoption-signal-floor.md +110 -0
package/docs/contracts/adr-chat-history-split.md +4 -4
package/docs/contracts/adr-command-suggestion.md +4 -4
package/docs/contracts/adr-gtm-context-spine.md +1 -1
package/docs/contracts/adr-implement-ticket-runtime.md +4 -4
package/docs/contracts/adr-install-user-type-axis.md +1 -1
package/docs/contracts/adr-layout.md +2 -2
package/docs/contracts/adr-mcp-runtime.md +1 -1
package/docs/contracts/adr-product-ui-track.md +10 -10
package/docs/contracts/adr-user-types-axis.md +3 -3
package/docs/contracts/adr-wing4-context-spine.md +1 -1
package/docs/contracts/agent-memory-contract.md +3 -3
package/docs/contracts/agents-md-tech-stack.md +2 -2
package/docs/contracts/ai-council-config.md +2 -2
package/docs/contracts/at-rest-encryption.md +4 -0
package/docs/contracts/audit-log-v1.md +1 -1
package/docs/contracts/benchmark-ab-contract.md +101 -0
package/docs/contracts/benchmark-corpus-spec.md +4 -4
package/docs/contracts/benchmark-report-schema.md +5 -5
package/docs/contracts/branch-protection-policy.md +98 -0
package/docs/contracts/ci-cost-budget.md +106 -0
package/docs/contracts/ci-green-floor.md +141 -0
package/docs/contracts/command-clusters.md +6 -6
package/docs/contracts/command-surface-tiers.md +2 -2
package/docs/contracts/command-taxonomy.md +2 -2
package/docs/contracts/{compression-default-kill-criterion.md → condensation-default-kill-criterion.md} +29 -29
package/docs/contracts/config-presets.md +1 -1
package/docs/contracts/context-paths.md +3 -3
package/docs/contracts/context-spine.md +1 -1
package/docs/contracts/cost-enforcement.md +1 -1
package/docs/contracts/cost-summary-schema.md +12 -12
package/docs/contracts/cross-wing-handoff.md +4 -4
package/docs/contracts/daily-workspace.md +4 -0
package/docs/contracts/decision-trace-v1.md +2 -2
package/docs/contracts/discovery-manifest.md +4 -4
package/docs/contracts/explain-modes.md +4 -0
package/docs/contracts/file-ownership-matrix.json +3493 -3318
package/docs/contracts/file-ownership-matrix.md +3 -3
package/docs/contracts/frontmatter-contract.md +4 -4
package/docs/contracts/ghostwriter-schema.md +3 -3
package/docs/contracts/gui-wizard.md +1 -1
package/docs/contracts/harness-expectations.md +123 -0
package/docs/contracts/host-agent-protocol.md +4 -0
package/docs/contracts/implement-ticket-flow.md +9 -9
package/docs/contracts/install-scopes.md +77 -0
package/docs/contracts/iron-law-overrides.txt +1 -1
package/docs/contracts/kernel-membership.md +26 -26
package/docs/contracts/linear-ai-rules-inclusion.md +1 -1
package/docs/contracts/linter-structural-model.md +2 -2
package/docs/contracts/load-context-budget-model.md +4 -4
package/docs/contracts/load-context-schema.md +13 -13
package/docs/contracts/local-analytics.md +4 -0
package/docs/contracts/local-knowledge-ingestion.md +1 -1
package/docs/contracts/mcp-beta-criteria.md +1 -1
package/docs/contracts/mcp-cloud-scope.md +6 -6
package/docs/contracts/mcp-phase-1-scope.md +3 -3
package/docs/contracts/mcp-registry-manifest.schema.json +1 -1
package/docs/contracts/mcp-tool-inventory.md +1 -1
package/docs/contracts/mcp-tool-stub-envelope.md +1 -1
package/docs/contracts/measurement-baseline.md +11 -11
package/docs/contracts/mental-models.md +30 -30
package/docs/contracts/multi-tool-projection-fidelity.md +4 -4
package/docs/contracts/namespace.md +4 -4
package/docs/contracts/orchestration-dsl-v1.md +7 -7
package/docs/contracts/package-self-orientation.md +12 -12
package/docs/contracts/persona-schema.md +6 -6
package/docs/contracts/pilot/language-and-tone.md +1 -1
package/docs/contracts/plain-language-surface.md +117 -0
package/docs/contracts/profile-system.md +3 -3
package/docs/contracts/release-pr-gating.md +103 -0
package/docs/contracts/role-experience.md +3 -3
package/docs/contracts/rule-classification.md +13 -13
package/docs/contracts/rule-interactions.md +4 -4
package/docs/contracts/rule-interactions.yml +30 -30
package/docs/contracts/rule-priority-hierarchy.md +13 -13
package/docs/contracts/rule-router.md +2 -2
package/docs/contracts/safety-model.md +1 -1
package/docs/contracts/skill-distribution-channels.md +61 -0
package/docs/contracts/skill-domains.md +2 -2
package/docs/contracts/smoke-contracts.md +5 -5
package/docs/contracts/telegraph-telemetry.md +83 -0
package/docs/contracts/trust-and-safety.md +5 -5
package/docs/contracts/ui-stack-extension.md +7 -7
package/docs/contracts/ui-track-flow.md +9 -9
package/docs/contracts/user-type-schema.md +4 -4
package/docs/contracts/workflow-packs.md +4 -4
package/docs/contracts/workspace-documents.md +4 -0
package/docs/customization.md +28 -8
package/docs/decisions/ADR-001-kernel-swap-deferred.md +6 -6
package/docs/decisions/ADR-002-kernel-bucket-overrides.md +11 -11
package/docs/decisions/ADR-003-flat-cluster-subs-and-colon-syntax.md +2 -2
package/docs/decisions/ADR-004-rule-governance-pruning.md +4 -4
package/docs/decisions/ADR-005-subagent-worktrees.md +7 -7
package/docs/decisions/ADR-011-domain-pack-readiness.md +6 -6
package/docs/decisions/ADR-013-discovery-frontmatter-contract.md +3 -3
package/docs/decisions/ADR-015-discovery-manifest-contract.md +3 -3
package/docs/decisions/ADR-017-monorepo-physical-layout.md +10 -10
package/docs/decisions/ADR-018-trust-and-safety-layer.md +6 -6
package/docs/decisions/ADR-019-router-json-dist-location.md +2 -2
package/docs/decisions/ADR-020-global-only-consumer-scope.md +2 -2
package/docs/decisions/ADR-021-deployment-shape.md +1 -1
package/docs/decisions/ADR-022-daily-workspace-decomposition.md +1 -1
package/docs/decisions/ADR-027-changelog-machine-vs-manual.md +129 -0
package/docs/decisions/ADR-028-root-layout.md +147 -0
package/docs/decisions/ADR-029-multi-workspace-deferred.md +122 -0
package/docs/decisions/ADR-rule-kernel-and-router.md +5 -5
package/docs/decisions/INDEX.md +8 -0
package/docs/deploy/team-deployment-posture.md +20 -0
package/docs/development.md +17 -17
package/docs/distribution/registries.md +32 -0
package/docs/distribution/registry-submissions.md +85 -0
package/docs/distribution/telemetry-schema.md +1 -1
package/docs/getting-started-by-role.md +45 -3
package/docs/getting-started.md +2 -2
package/docs/guidelines/agent-infra/5w2h-analysis.md +3 -3
package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +1 -1
package/docs/guidelines/agent-infra/asking-and-brevity-examples.md +3 -3
package/docs/guidelines/agent-infra/carve-out-predicates.md +3 -3
package/docs/guidelines/agent-infra/critical-thinking.md +4 -4
package/docs/guidelines/agent-infra/direct-answers-demos.md +1 -1
package/docs/guidelines/agent-infra/first-principles.md +2 -2
package/docs/guidelines/agent-infra/inversion-thinking.md +5 -5
package/docs/guidelines/agent-infra/layered-settings.md +56 -2
package/docs/guidelines/agent-infra/mental-models.md +3 -3
package/docs/guidelines/agent-infra/roadmap-progress-mechanics.md +2 -2
package/docs/guidelines/agent-infra/rule-type-governance.md +1 -1
package/docs/guidelines/agent-infra/scqa-framework.md +5 -5
package/docs/guidelines/agent-infra/self-improvement-pipeline.md +2 -2
package/docs/guidelines/agent-infra/six-hats.md +3 -3
package/docs/guidelines/agent-infra/skill-quality-checklist.md +5 -5
package/docs/guidelines/agent-infra/systems-thinking.md +1 -1
package/docs/guidelines/agent-infra/verify-before-complete-demos.md +1 -1
package/docs/guidelines/augment-portability-patterns.md +4 -4
package/docs/guidelines/cross-role-handoff.md +2 -2
package/docs/guidelines/php/php-coding-patterns.md +1 -1
package/docs/guidelines/prompt-templates.md +6 -6
package/docs/maintainers/dev-mode.md +1 -1
package/docs/mcp-server.md +1 -1
package/docs/mcp.md +1 -1
package/docs/parity/bench-ruflo.json +3 -3
package/docs/parity/bench.json +3 -3
package/docs/parity/ruflo.md +3 -3
package/docs/profiles.md +11 -11
package/docs/quality.md +11 -11
package/docs/safety.md +3 -3
package/docs/setup/mcp-client-config.md +2 -2
package/docs/setup/mcp-cloud-endpoints.md +1 -1
package/docs/setup/mcp-cloud-setup.md +2 -2
package/docs/setup/mcp-r2-bootstrap.md +2 -2
package/docs/setup/mcp-server-docker.md +3 -3
package/docs/setup/per-ide/windsurf.md +1 -1
package/docs/skills-catalog.md +8 -7
package/docs/troubleshooting.md +1 -1
package/docs/walkthroughs/daily-workspace-a11y.md +87 -0
package/llms.txt +7 -6
package/package.json +1 -1
package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
package/scripts/_archive/README.md +2 -2
package/scripts/_archive/_backfill_skill_domains.py +3 -3
package/scripts/_archive/_bootstrap_tier_frontmatter.py +3 -3
package/scripts/_archive/_p43_bodies.py +10 -10
package/scripts/_archive/{_p43_compress.py → _p43_condense.py} +5 -5
package/scripts/_archive/_p4_migrate.py +7 -7
package/scripts/_archive/_phase2_shim_helper.py +1 -1
package/scripts/_archive/_pilot_council_question.py +5 -5
package/scripts/_cli/explain_last/inputs.py +1 -1
package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
package/scripts/_lib/agent_settings.py +195 -1
package/scripts/_lib/agent_src.py +19 -19
package/scripts/_lib/bench_ab_cache.py +162 -0
package/scripts/_lib/bench_ab_scoring.py +209 -0
package/scripts/_lib/bench_cost.py +2 -2
package/scripts/_lib/bench_report.py +2 -2
package/scripts/_lib/{bench_caveman.py → bench_telegraph.py} +21 -21
package/scripts/_lib/{bench_caveman_report.py → bench_telegraph_report.py} +22 -22
package/scripts/_lib/claude_desktop_bundler.py +5 -5
package/scripts/_lib/module_detection.py +223 -0
package/scripts/_lib/scope_guard.sh +162 -0
package/scripts/_phase4_bucket.py +3 -3
package/scripts/_pilot_measure.py +4 -4
package/scripts/_tmp_scan_framework_leakage.py +1 -1
package/scripts/adoption_report.py +195 -0
package/scripts/adoption_snapshot.py +219 -0
package/scripts/adoption_status.py +166 -0
package/scripts/ai-video/lib/parse-blueprint.sh +1 -1
package/scripts/ai_council/advisors.py +5 -5
package/scripts/ai_council/compile_corpus.py +1 -1
package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +3 -3
package/scripts/ai_council/one_off_archive/2026-05/_one_off_context_layer_v1_review.py +2 -2
package/scripts/ai_council/one_off_archive/2026-05/_one_off_inject_quiet_flag.py +1 -1
package/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_v2.sh +1 -1
package/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_verbosity.sh +1 -1
package/scripts/ai_council/one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py +3 -3
package/scripts/ai_council/one_off_archive/2026-05/_one_off_per_task.sh +1 -1
package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py +1 -1
package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py +6 -6
package/scripts/ai_council/one_off_archive/2026-05/_one_off_rebalancing_audit.py +1 -1
package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +6 -6
package/scripts/annotate_discovery.py +13 -13
package/scripts/apply_modules_config.py +290 -0
package/scripts/audit_adr_coverage.py +2 -2
package/scripts/audit_auto_rules.py +2 -2
package/scripts/audit_cloud_compatibility.py +3 -3
package/scripts/audit_command_surface.py +9 -9
package/scripts/audit_likelihood.py +2 -2
package/scripts/audit_mcp_tools.py +1 -1
package/scripts/audit_user_type_axis.py +2 -2
package/scripts/bench_ab_cache_dispatch.py +68 -0
package/scripts/bench_ab_clone.py +170 -0
package/scripts/bench_ab_diff.py +227 -0
package/scripts/bench_ab_integrity.py +143 -0
package/scripts/bench_ab_run.py +235 -0
package/scripts/bench_ab_task_runner.py +369 -0
package/scripts/bench_ab_tracka_run.py +202 -0
package/scripts/bench_baseline_ready.py +3 -3
package/scripts/{bench_compress_memory.py → bench_condense_memory.py} +16 -16
package/scripts/bench_drift_check.py +2 -2
package/scripts/bench_per_tool.py +2 -2
package/scripts/bench_run.py +36 -36
package/scripts/bench_runner.py +2 -2
package/scripts/bootstrap.sh +99 -0
package/scripts/build_cloud_bundle.py +6 -6
package/scripts/build_discovery_manifest.py +7 -7
package/scripts/build_linear_digest.py +3 -3
package/scripts/build_mcp_registry_manifest.py +2 -2
package/scripts/build_rule_trigger_matrix.py +8 -8
package/scripts/chat_history.py +5 -5
package/scripts/check_always_budget.py +11 -5
package/scripts/check_augment_description_cap.py +3 -3
package/scripts/check_cluster_patterns.py +2 -2
package/scripts/check_command_count_messaging.py +3 -3
package/scripts/{check_compression.py → check_condensation.py} +34 -34
package/scripts/{check_compressed_paths.py → check_condensed_paths.py} +8 -8
package/scripts/check_context_paths.py +7 -7
package/scripts/check_council_layout.py +2 -2
package/scripts/check_council_references.py +9 -9
package/scripts/check_iron_law_prominence.py +2 -2
package/scripts/check_kernel_rule_bundle.py +2 -2
package/scripts/check_module_management_neutral.py +149 -0
package/scripts/check_no_roadmap_refs.py +9 -9
package/scripts/check_portability.py +3 -3
package/scripts/check_public_catalog_links.py +4 -4
package/scripts/check_references.py +7 -6
package/scripts/check_release_pr_shape.py +112 -0
package/scripts/check_reply_consistency.py +3 -3
package/scripts/check_safety_floor_untouched.py +1 -1
package/scripts/check_template_pin_drift.py +5 -5
package/scripts/check_token_optimizer_freshness.py +3 -3
package/scripts/ci_status.py +301 -0
package/scripts/ci_time_ratio.py +1 -1
package/scripts/cleanup_other_scope.sh +146 -0
package/scripts/compile_router.py +10 -10
package/scripts/{compress.py → condense.py} +64 -64
package/scripts/condense.sh +18 -0
package/scripts/{compress_memory.py → condense_memory.py} +33 -33
package/scripts/config/presets.py +2 -2
package/scripts/config/profiles.py +1 -1
package/scripts/cost_by_conversation.py +3 -3
package/scripts/cost_summary.py +7 -7
package/scripts/count_token_optimizer_usage.sh +1 -1
package/scripts/gen_discovery_baseline.py +5 -5
package/scripts/generate_index.py +6 -6
package/scripts/generate_ownership_matrix.py +10 -10
package/scripts/generate_pack_manifests.py +1 -1
package/scripts/ghostwriter_fixture_allowlist.txt +1 -1
package/scripts/install +3 -3
package/scripts/install-hooks.sh +6 -6
package/scripts/install.py +76 -11
package/scripts/install.sh +187 -1
package/scripts/inventory_frontmatter.py +2 -2
package/scripts/iron_law_sha.py +3 -3
package/scripts/lint_agents_layout.py +14 -7
package/scripts/lint_agents_md.py +4 -4
package/scripts/lint_archived_skills.py +3 -3
package/scripts/lint_artefact_frontmatter.py +2 -2
package/scripts/lint_bench_ab.py +172 -0
package/scripts/lint_bench_corpus.py +1 -1
package/scripts/lint_command_tiers.py +5 -5
package/scripts/lint_context_spine_usage.py +1 -1
package/scripts/lint_framework_leakage.py +7 -7
package/scripts/lint_framework_leakage_allowlist.json +144 -84
package/scripts/lint_ghostwriter_source.py +3 -3
package/scripts/lint_handoffs.py +1 -1
package/scripts/lint_load_context.py +11 -11
package/scripts/lint_media_policy_linkage.py +5 -5
package/scripts/lint_namespace.py +1 -1
package/scripts/lint_no_new_atomic_commands.py +2 -2
package/scripts/lint_orchestration_dsl.py +1 -1
package/scripts/lint_pack_boundaries.py +2 -2
package/scripts/lint_persona_governance.py +4 -4
package/scripts/lint_role_experiences.py +237 -0
package/scripts/lint_rule_interactions.py +2 -2
package/scripts/lint_rule_tiers.py +1 -1
package/scripts/lint_trust_coherence.py +2 -2
package/scripts/mcp_registry_submit.sh +187 -0
package/scripts/mcp_server/__init__.py +1 -1
package/scripts/mcp_server/catalog.py +1 -1
package/scripts/mcp_server/consumer_tool_catalog.json +1 -1
package/scripts/mcp_server/tools.py +2 -2
package/scripts/measure_frugality_savings.py +10 -10
package/scripts/measure_patterns.py +1 -1
package/scripts/measure_projection_bytes.py +5 -5
package/scripts/measure_rule_budget.py +3 -3
package/scripts/measure_skill_reduction.py +1 -1
package/scripts/memory_lookup.py +1 -1
package/scripts/memory_status.py +2 -2
package/scripts/migrate_command_suggestions.py +3 -3
package/scripts/mine_session.py +1 -1
package/scripts/move_artefact.py +3 -3
package/scripts/new_skill.py +2 -2
package/scripts/pack_mcp_content.py +14 -14
package/scripts/plan_physical_move.py +6 -6
package/scripts/print_required_checks.py +196 -0
package/scripts/probe_skill_registration.py +413 -0
package/scripts/propose_modules_config.py +145 -0
package/scripts/prototype_lint_contradictions.py +1 -1
package/scripts/recruit_preflight.sh +152 -0
package/scripts/refine_ticket_detect.py +3 -3
package/scripts/release.py +20 -0
package/scripts/render_benchmark_md.py +308 -0
package/scripts/roadmap_progress_hook.py +1 -1
package/scripts/run_skill_evals.py +2 -2
package/scripts/runtime_registry.py +4 -4
package/scripts/schemas/command.schema.json +4 -4
package/scripts/schemas/rule.schema.json +5 -5
package/scripts/schemas/skill.schema.json +3 -3
package/scripts/schemas/user-type.schema.json +1 -1
package/scripts/score_skill_selection.py +1 -1
package/scripts/skill_collision_clusters.py +2 -2
package/scripts/skill_linter.py +81 -81
package/scripts/skill_overlap.py +5 -5
package/scripts/skill_tools/audit_persona_coverage.py +2 -2
package/scripts/skill_tools/audit_user_type_coverage.py +2 -2
package/scripts/skill_tools/run_block_d_eval.py +1 -1
package/scripts/skill_tools/score_skill_relevance.py +1 -1
package/scripts/skill_tools/suggest_skill_for_task.py +1 -1
package/scripts/skill_trigger_eval.py +5 -5
package/scripts/smoke/kernel.sh +7 -1
package/scripts/smoke/router.sh +5 -5
package/scripts/smoke/skills.sh +1 -1
package/scripts/smoke_quickstart.py +1 -1
package/scripts/snapshot_agent_outputs.py +3 -3
package/scripts/spotcheck_thin_root.py +1 -1
package/scripts/{caveman_stats.py → telegraph_stats.py} +18 -18
package/scripts/update_counts.py +1 -1
package/scripts/validate_decision_engine.py +1 -1
package/scripts/validate_frontmatter.py +1 -1
package/scripts/validate_safe_paths.py +3 -3
package/scripts/{validate_caveman_carveouts.py → validate_telegraph_carveouts.py} +7 -7
package/scripts/verify_roadmap_closure.py +6 -6
package/templates/consumer-settings/ONBOARDING.md +41 -0
package/.agent-src/commands/install-via-agent.md +0 -129
package/.agent-src/skills/compress-memory/SKILL.md +0 -131
package/dist/ui/assets/index-D-DY1ywI.js +0 -35
package/dist/ui/assets/index-D-DY1ywI.js.map +0 -1
package/dist/ui/assets/index-Dqfhmg-d.css +0 -1
package/docs/adrs/caveman/README.md +0 -9
package/docs/contracts/caveman-telemetry.md +0 -83
package/scripts/compress.sh +0 -18

package/scripts/bench_ab_run.py ADDED Viewed

@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+"""Top-level orchestrator for the package-impact A/B bench.
+Phase 2 Step 1 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
+A thin wrapper around the per-track runners (Track A behavioural eval,
+Track B task corpus). Owns:
+- the `--variant {with,without}` axis,
+- the cache lookup that decides whether the `without` arm runs at all,
+- the report-header convention (cache key, variant, corpus, timestamp),
+- the report-path convention `internal/bench/reports/ab/{stamp}-{corpus}-{variant}.json`.
+Track A's actual runner lands in Phase 3; Track B's in Phase 4. Until then
+this script writes stub reports so the cache and diff plumbing can be
+exercised end-to-end.
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+REPO_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(REPO_ROOT / "scripts"))
+from _lib import bench_ab_cache  # type: ignore[import-not-found]  # noqa: E402
+REPORTS_DIR = REPO_ROOT / "internal" / "bench" / "reports" / "ab"
+CORPUS_DIR = REPO_ROOT / "internal" / "bench" / "corpora"
+CLONES_DIR = REPO_ROOT / "internal" / "bench" / "ab" / "clones"
+# Supported corpora (created in Phases 3 + 4).
+KNOWN_CORPORA = ("ab-tracka", "ab-trackb")
+REPORT_SCHEMA_VERSION = "ab-bench/0.1"
+def utc_stamp() -> str:
+    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
+def corpus_path(corpus: str) -> Path:
+    return CORPUS_DIR / f"{corpus}.yaml"
+def report_path(stamp: str, corpus: str, variant: str) -> Path:
+    return REPORTS_DIR / f"{stamp}-{corpus}-{variant}.json"
+def ensure_clone(variant: str) -> Path:
+    """Make sure the clone exists; do NOT --refresh — that's a user-driven choice."""
+    target = CLONES_DIR / variant
+    if not target.exists():
+        # Lazy-import so the dependency stays explicit
+        import importlib.util
+        spec = importlib.util.spec_from_file_location(
+            "bench_ab_clone", REPO_ROOT / "scripts" / "bench_ab_clone.py"
+        )
+        if spec is None or spec.loader is None:
+            raise RuntimeError("cannot load bench_ab_clone helper")
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+        module.clone(variant, refresh=False)  # type: ignore[attr-defined]
+    return target
+def run_track_stub(variant: str, corpus: str, clone_root: Path) -> dict[str, object]:
+    """Phase-2 placeholder.
+    Returns a minimal results block. Phase 3 + Phase 4 plug their real
+    runners into this dispatch.
+    """
+    return {
+        "track": corpus,
+        "status": "stub",
+        "note": (
+            "Phase 2 plumbing only. The real runner lands in Phase 3 (Track A) "
+            "or Phase 4 (Track B). See road-to-package-impact-benchmark.md."
+        ),
+        "clone_root": str(clone_root.relative_to(REPO_ROOT)),
+        "variant": variant,
+    }
+def write_report(
+    *,
+    variant: str,
+    corpus: str,
+    stamp: str,
+    cache_key: bench_ab_cache.CacheKey,
+    results: dict[str, object],
+    duration_seconds: float,
+) -> Path:
+    REPORTS_DIR.mkdir(parents=True, exist_ok=True)
+    report = {
+        "schema": REPORT_SCHEMA_VERSION,
+        "stamp": stamp,
+        "variant": variant,
+        "corpus": corpus,
+        "cache_key": cache_key.to_dict(),
+        "duration_seconds": round(duration_seconds, 3),
+        "results": results,
+    }
+    json_path = report_path(stamp, corpus, variant)
+    json_path.write_text(json.dumps(report, indent=2) + "\n")
+    md_path = json_path.with_suffix(".md")
+    md_path.write_text(render_markdown(report))
+    return json_path
+def render_markdown(report: dict[str, object]) -> str:
+    lines = [
+        f"# A/B Bench Report — {report['variant']} · {report['corpus']}",
+        "",
+        f"- Stamp: `{report['stamp']}`",
+        f"- Duration: {report['duration_seconds']}s",
+        "",
+        "## Cache key",
+        "",
+    ]
+    for k, v in (report.get("cache_key") or {}).items():  # type: ignore[union-attr]
+        lines.append(f"- `{k}`: `{v}`")
+    lines.append("")
+    lines.append("## Results")
+    lines.append("")
+    lines.append("```json")
+    lines.append(json.dumps(report.get("results"), indent=2))
+    lines.append("```")
+    lines.append("")
+    return "\n".join(lines)
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Run one arm of the package-impact A/B bench."
+    )
+    parser.add_argument(
+        "--variant",
+        choices=("with", "without"),
+        required=True,
+        help="Which target clone to run against.",
+    )
+    parser.add_argument(
+        "--corpus",
+        choices=KNOWN_CORPORA,
+        required=True,
+        help="Which corpus to execute.",
+    )
+    parser.add_argument(
+        "--non-interactive",
+        action="store_true",
+        help="Never prompt; assume defaults on cache decisions.",
+    )
+    parser.add_argument(
+        "--reuse-cache",
+        action="store_true",
+        help=(
+            "If a fresh cached `without` report exists, skip re-running and "
+            "exit 0 without writing a new report. Only meaningful for "
+            "--variant without."
+        ),
+    )
+    return parser.parse_args(argv)
+def main(argv: list[str] | None = None) -> int:
+    args = parse_args(argv if argv is not None else sys.argv[1:])
+    corpus_file = corpus_path(args.corpus)
+    if not corpus_file.exists():
+        sys.stdout.write(
+            f"bench_ab_run: corpus '{args.corpus}' missing at {corpus_file} — "
+            "Phase 3 (track A) or Phase 4 (track B) author it. Writing a "
+            "placeholder run with the synthetic corpus hash so cache plumbing "
+            "remains exercisable.\n"
+        )
+    cache_key_value = bench_ab_cache.CacheKey(
+        corpus_hash=(
+            bench_ab_cache.hash_file(corpus_file)
+            if corpus_file.exists()
+            else "missing-corpus"
+        ),
+        claude_cli_version=bench_ab_cache.claude_cli_version(),
+        target_shape_hash=bench_ab_cache.target_shape_hash(),
+    )
+    if args.variant == "without" and args.reuse_cache and corpus_file.exists():
+        lookup = bench_ab_cache.lookup(corpus_file)
+        if lookup.fresh and lookup.report_path is not None:
+            sys.stdout.write(
+                f"bench_ab_run: reusing fresh cached `without` report at "
+                f"{lookup.report_path.relative_to(REPO_ROOT)}\n"
+            )
+            return 0
+        if lookup.found and not lookup.fresh:
+            sys.stdout.write(
+                f"bench_ab_run: cached `without` report stale ({lookup.reason})\n"
+            )
+            if args.non_interactive:
+                sys.stdout.write(
+                    "bench_ab_run: --non-interactive — reusing stale baseline "
+                    "and flagging the run.\n"
+                )
+                return 0
+            sys.stdout.write(
+                "bench_ab_run: continuing with a fresh run "
+                "(set --reuse-cache off and use --non-interactive to keep the stale baseline)\n"
+            )
+    clone_root = ensure_clone(args.variant)
+    started = time.monotonic()
+    results = run_track_stub(args.variant, args.corpus, clone_root)
+    duration = time.monotonic() - started
+    path = write_report(
+        variant=args.variant,
+        corpus=args.corpus,
+        stamp=utc_stamp(),
+        cache_key=cache_key_value,
+        results=results,
+        duration_seconds=duration,
+    )
+    sys.stdout.write(
+        f"bench_ab_run: wrote {path.relative_to(REPO_ROOT)}\n"
+    )
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

package/scripts/bench_ab_task_runner.py ADDED Viewed

@@ -0,0 +1,369 @@
+#!/usr/bin/env python3
+"""Track B — task runner for the package-impact A/B bench.
+Phase 4 Step 2 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
+For each task in `internal/bench/corpora/ab-trackb.yaml`, in each variant:
+1. Snapshot the variant clone's file tree.
+2. Invoke the `claude` CLI with the task prompt — OR dry-run, depending
+   on `--mode`.
+3. Capture the transcript, tool-call events, wall-time, and (if available)
+   token + cost counts.
+4. Snapshot the post-run tree.
+5. Score the task via scripts/_lib/bench_ab_scoring.py.
+Modes:
+- `dry-run` (default) — record the would-run shell command, write a stub
+  transcript naming the variant, score against the unchanged tree. The
+  result is structural-zero for every check that requires a file write,
+  but the scoring + reporting pipeline runs end-to-end. This is what the
+  bench produces in CI by default — fast, free, repeatable.
+- `live` — actually invoke the `claude` CLI with `--print` (one-shot
+  mode) and the task prompt. Reads `CLAUDE_CLI` from env if set, falls
+  back to `claude` on PATH. Captures stdout as the transcript. Honors
+  `--samples N` for repeated runs.
+The runner ALWAYS resets the clone to a clean state before each task and
+ALWAYS records the mode in the report header so a reader can never mistake
+a dry-run report for a real measurement.
+"""
+from __future__ import annotations
+import argparse
+import hashlib
+import json
+import os
+import shutil
+import subprocess
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+REPO_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(REPO_ROOT / "scripts"))
+from _lib import bench_ab_cache  # type: ignore[import-not-found]  # noqa: E402
+from _lib import bench_ab_scoring  # type: ignore[import-not-found]  # noqa: E402
+try:
+    import yaml
+except ImportError:
+    sys.stderr.write("bench_ab_task_runner: PyYAML required (pip install pyyaml)\n")
+    raise SystemExit(2)
+CORPUS_PATH = REPO_ROOT / "internal" / "bench" / "corpora" / "ab-trackb.yaml"
+CLONES_DIR = REPO_ROOT / "internal" / "bench" / "ab" / "clones"
+REPORTS_DIR = REPO_ROOT / "internal" / "bench" / "reports" / "ab"
+# How far we descend into a clone when snapshotting. The fixture is shallow.
+SNAPSHOT_MAX_DEPTH = 6
+def utc_stamp() -> str:
+    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
+def snapshot_clone(clone_root: Path, *, max_depth: int = SNAPSHOT_MAX_DEPTH) -> dict[str, str]:
+    """Return {relpath: sha256-short} for every fixture file under the clone.
+    Skips the agent-config surface (.claude, .augment, AGENTS.md, CLAUDE.md, manifest)
+    because that's the variant axis, not the task surface.
+    """
+    skip_roots = {".claude", ".augment"}
+    skip_files = {"AGENTS.md", "CLAUDE.md", ".bench-ab-manifest.json"}
+    out: dict[str, str] = {}
+    for path in sorted(clone_root.rglob("*")):
+        if not path.is_file():
+            continue
+        rel = path.relative_to(clone_root)
+        parts = rel.parts
+        if parts and parts[0] in skip_roots:
+            continue
+        if rel.as_posix() in skip_files:
+            continue
+        if len(parts) > max_depth:
+            continue
+        h = hashlib.sha256()
+        try:
+            h.update(path.read_bytes())
+        except OSError:
+            continue
+        out[rel.as_posix()] = h.hexdigest()[:16]
+    return out
+def reset_clone(variant: str) -> Path:
+    """Rebuild the clone so each task starts from the same state."""
+    import importlib.util
+    spec = importlib.util.spec_from_file_location(
+        "bench_ab_clone", REPO_ROOT / "scripts" / "bench_ab_clone.py"
+    )
+    if spec is None or spec.loader is None:
+        raise RuntimeError("cannot load bench_ab_clone helper")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module.clone(variant, refresh=True)  # type: ignore[attr-defined]
+def claude_executable() -> str | None:
+    """Resolve the claude CLI binary (env override → PATH)."""
+    override = os.environ.get("CLAUDE_CLI")
+    if override:
+        return override
+    if shutil.which("claude") is not None:
+        return "claude"
+    return None
+def run_live(task: dict, clone_root: Path, *, timeout_s: int) -> dict:
+    """Invoke claude in print/one-shot mode against the task prompt."""
+    binary = claude_executable()
+    if binary is None:
+        return {
+            "mode": "live-skipped",
+            "reason": "claude CLI not found; set CLAUDE_CLI or install it",
+            "transcript": "",
+            "exit_code": None,
+            "wall_time_seconds": 0.0,
+        }
+    prompt = task.get("prompt", "")
+    cmd = [binary, "--print", "--", prompt]
+    started = time.monotonic()
+    try:
+        proc = subprocess.run(
+            cmd,
+            cwd=clone_root,
+            capture_output=True,
+            text=True,
+            timeout=timeout_s,
+            check=False,
+        )
+    except subprocess.TimeoutExpired as exc:
+        return {
+            "mode": "live",
+            "reason": f"timeout after {timeout_s}s",
+            "transcript": (exc.stdout or "") + "\n[TIMEOUT]",
+            "exit_code": -1,
+            "wall_time_seconds": round(time.monotonic() - started, 3),
+        }
+    duration = time.monotonic() - started
+    return {
+        "mode": "live",
+        "reason": "ok",
+        "transcript": proc.stdout + "\n" + proc.stderr,
+        "exit_code": proc.returncode,
+        "wall_time_seconds": round(duration, 3),
+    }
+def run_dry(task: dict, clone_root: Path, variant: str) -> dict:
+    """Record what would have run; produce a deterministic stub transcript.
+    The stub deliberately does NOT echo the user prompt: doing so would let
+    transcript-keyword criteria spuriously match against the prompt text
+    instead of the agent's response. The stub is therefore inert for every
+    `transcript_contains_*` criterion, which is the honest dry-run signal.
+    """
+    stub_transcript = (
+        "[bench_ab_task_runner dry-run]\n"
+        f"variant={variant}\n"
+        f"clone={clone_root}\n"
+        f"task_id={task.get('id')}\n"
+        "[no claude invocation; --mode live to execute for real]\n"
+    )
+    return {
+        "mode": "dry-run",
+        "reason": "ok",
+        "transcript": stub_transcript,
+        "exit_code": 0,
+        "wall_time_seconds": 0.0,
+    }
+def count_ask_events(transcript: str) -> dict[str, int]:
+    """Crude ask-vs-act heuristic over the transcript."""
+    if not transcript:
+        return {"asked": 0, "acted_with_commit": 0, "ratio": 0}
+    lt = transcript.lower()
+    ask_markers = ["should i", "do you want", "shall i", "soll ich", "möchtest du"]
+    asked = sum(lt.count(m) for m in ask_markers)
+    commit_markers = ["git commit", "git push", "gh pr create", "gh pr merge"]
+    acted = sum(lt.count(m) for m in commit_markers)
+    total = asked + acted
+    ratio = round(asked / total, 3) if total else 0
+    return {"asked": asked, "acted_with_commit": acted, "ratio": ratio}
+def per_category_aggregate(per_task: list[dict]) -> dict[str, dict]:
+    by_cat: dict[str, list[dict]] = {}
+    for entry in per_task:
+        by_cat.setdefault(entry.get("category", "unknown"), []).append(entry)
+    out: dict[str, dict] = {}
+    for cat, entries in by_cat.items():
+        passed = sum(1 for e in entries if e.get("score", {}).get("passed"))
+        total = len(entries)
+        out[cat] = {
+            "passed": passed,
+            "total": total,
+            "completion_rate": round(passed / total, 4) if total else 0,
+            "mean_wall_time": round(
+                sum(e.get("wall_time_seconds", 0) for e in entries) / total, 3
+            )
+            if total
+            else 0,
+        }
+    return out
+def write_report(
+    variant: str,
+    *,
+    mode: str,
+    per_task: list[dict],
+    duration: float,
+) -> Path:
+    REPORTS_DIR.mkdir(parents=True, exist_ok=True)
+    cache_key = bench_ab_cache.CacheKey(
+        corpus_hash=bench_ab_cache.hash_file(CORPUS_PATH),
+        claude_cli_version=bench_ab_cache.claude_cli_version(),
+        target_shape_hash=bench_ab_cache.target_shape_hash(),
+    )
+    total = len(per_task)
+    passed = sum(1 for e in per_task if e.get("score", {}).get("passed"))
+    results = {
+        "mode": mode,
+        "completion_rate": round(passed / total, 4) if total else 0,
+        "passed": passed,
+        "total": total,
+        "per_category": per_category_aggregate(per_task),
+        "mean_wall_time": round(
+            sum(e.get("wall_time_seconds", 0) for e in per_task) / total, 3
+        )
+        if total
+        else 0,
+        "ask_vs_act_ratio": round(
+            sum(e.get("ask_events", {}).get("ratio", 0) for e in per_task) / total, 3
+        )
+        if total
+        else 0,
+        "per_task": per_task,
+    }
+    stamp = utc_stamp()
+    payload = {
+        "schema": "ab-bench/0.1",
+        "stamp": stamp,
+        "variant": variant,
+        "corpus": "ab-trackb",
+        "cache_key": cache_key.to_dict(),
+        "duration_seconds": round(duration, 3),
+        "results": results,
+    }
+    path = REPORTS_DIR / f"{stamp}-ab-trackb-{variant}.json"
+    path.write_text(json.dumps(payload, indent=2) + "\n")
+    md = path.with_suffix(".md")
+    md.write_text(
+        f"# Track B · {variant} · {mode}\n\n"
+        f"- Stamp: `{stamp}`\n"
+        f"- Completion rate: **{results['completion_rate'] * 100:.1f}%**"
+        f" ({passed}/{total})\n"
+        f"- Mean wall-time: {results['mean_wall_time']}s\n"
+        f"- Ask vs. act ratio: {results['ask_vs_act_ratio']}\n"
+        f"\n## Per-category\n\n"
+        + "\n".join(
+            f"- `{cat}` — {info['passed']}/{info['total']} "
+            f"({info['completion_rate'] * 100:.1f}%)"
+            for cat, info in results["per_category"].items()
+        )
+        + "\n"
+    )
+    return path
+def run_variant(variant: str, tasks: list[dict], *, mode: str, timeout_s: int) -> dict:
+    started = time.monotonic()
+    per_task: list[dict] = []
+    for task in tasks:
+        clone_root = reset_clone(variant)
+        pre = snapshot_clone(clone_root)
+        if mode == "live":
+            run_result = run_live(task, clone_root, timeout_s=timeout_s)
+        else:
+            run_result = run_dry(task, clone_root, variant)
+        post = snapshot_clone(clone_root)
+        score = bench_ab_scoring.score_task(
+            task,
+            pre_snapshot=pre,
+            post_snapshot=post,
+            clone_root=clone_root,
+            transcript=run_result.get("transcript", ""),
+        )
+        per_task.append(
+            {
+                "id": task.get("id"),
+                "category": task.get("category"),
+                "score": score,
+                "wall_time_seconds": run_result.get("wall_time_seconds", 0.0),
+                "exit_code": run_result.get("exit_code"),
+                "mode": run_result.get("mode", mode),
+                "reason": run_result.get("reason", ""),
+                "ask_events": count_ask_events(run_result.get("transcript", "")),
+            }
+        )
+    duration = time.monotonic() - started
+    path = write_report(variant, mode=mode, per_task=per_task, duration=duration)
+    sys.stdout.write(
+        f"bench_ab_task_runner: {variant} ({mode}) → "
+        f"{sum(1 for e in per_task if e['score']['passed'])}/{len(per_task)} "
+        f"passed — {path.relative_to(REPO_ROOT)}\n"
+    )
+    return {"path": path, "per_task": per_task, "duration": duration}
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run Track B tasks per variant.")
+    parser.add_argument(
+        "--variant",
+        choices=("with", "without", "both"),
+        default="both",
+        help="Which variant to run (default: both).",
+    )
+    parser.add_argument(
+        "--mode",
+        choices=("dry-run", "live"),
+        default="dry-run",
+        help=(
+            "dry-run: stub transcript, no CLI invocation (fast, free). "
+            "live: invoke `claude --print` per task (cost-bearing)."
+        ),
+    )
+    parser.add_argument(
+        "--timeout",
+        type=int,
+        default=120,
+        help="Live mode: per-task timeout in seconds (default 120).",
+    )
+    return parser.parse_args(argv)
+def main(argv: list[str] | None = None) -> int:
+    args = parse_args(argv if argv is not None else sys.argv[1:])
+    if not CORPUS_PATH.exists():
+        sys.stderr.write(f"bench_ab_task_runner: corpus missing at {CORPUS_PATH}\n")
+        return 1
+    data = yaml.safe_load(CORPUS_PATH.read_text())
+    tasks = data.get("tasks") or []
+    if not tasks:
+        sys.stderr.write("bench_ab_task_runner: corpus has no tasks\n")
+        return 1
+    variants = ("with", "without") if args.variant == "both" else (args.variant,)
+    for variant in variants:
+        run_variant(variant, tasks, mode=args.mode, timeout_s=args.timeout)
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())