@event4u/agent-config 3.3.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/README.md +2 -2
- package/.agent-src/commands/agent-handoff.md +31 -2
- package/.agent-src/commands/agent-status.md +5 -5
- package/.agent-src/commands/agents/audit.md +8 -8
- package/.agent-src/commands/agents/init.md +25 -1
- package/.agent-src/commands/agents/optimize.md +3 -3
- package/.agent-src/commands/agents/user.md +1 -1
- package/.agent-src/commands/agents.md +1 -1
- package/.agent-src/commands/analyze-reference-repo.md +1 -1
- package/.agent-src/commands/check-current-md.md +8 -8
- package/.agent-src/commands/{compress.md → condense.md} +55 -55
- package/.agent-src/commands/context/create.md +7 -4
- package/.agent-src/commands/context/refactor.md +3 -1
- package/.agent-src/commands/feature/dev.md +1 -1
- package/.agent-src/commands/feature/explore.md +1 -1
- package/.agent-src/commands/feature/plan.md +10 -8
- package/.agent-src/commands/feature/refactor.md +3 -1
- package/.agent-src/commands/feature/roadmap.md +7 -4
- package/.agent-src/commands/fix/portability.md +3 -3
- package/.agent-src/commands/fix/refs.md +4 -4
- package/.agent-src/commands/ghostwriter.md +2 -2
- package/.agent-src/commands/memory/learn-low-impact.md +3 -3
- package/.agent-src/commands/module/explore.md +34 -8
- package/.agent-src/commands/optimize/agents-dir.md +9 -7
- package/.agent-src/commands/optimize/augmentignore.md +2 -2
- package/.agent-src/commands/optimize/skills.md +9 -9
- package/.agent-src/commands/post-as.md +1 -1
- package/.agent-src/commands/project-analyze.md +2 -2
- package/.agent-src/commands/project-health.md +3 -2
- package/.agent-src/commands/research/deep.md +1 -1
- package/.agent-src/commands/research/report.md +1 -1
- package/.agent-src/commands/research.md +1 -1
- package/.agent-src/commands/roadmap/ai-council.md +1 -1
- package/.agent-src/commands/roadmap/create.md +9 -4
- package/.agent-src/commands/rule-compliance-audit.md +1 -1
- package/.agent-src/commands/upstream-contribute.md +14 -14
- package/.agent-src/commands/video/from-script.md +1 -1
- package/.agent-src/commands/video/scene.md +1 -1
- package/.agent-src/commands/video/stitch.md +1 -1
- package/.agent-src/commands/video/storyboard.md +1 -1
- package/.agent-src/commands/video.md +1 -1
- package/.agent-src/contexts/augment-infrastructure.md +1 -1
- package/.agent-src/contexts/authority/commit-mechanics.md +15 -0
- package/.agent-src/contexts/authority/kernel-rule-edits.md +3 -3
- package/.agent-src/contexts/authority/scope-mechanics.md +1 -1
- package/.agent-src/contexts/communication/rules-auto/augment-source-of-truth-mechanics.md +28 -28
- package/.agent-src/contexts/communication/rules-auto/skill-quality-mechanics.md +4 -4
- package/.agent-src/contexts/communication/rules-auto/think-before-action-mechanics.md +2 -2
- package/.agent-src/contexts/contracts/artifact-engagement-flow.md +6 -6
- package/.agent-src/contexts/contracts/command-suggestion-flow.md +3 -3
- package/.agent-src/contexts/contracts/emergency-triage-block.md +4 -4
- package/.agent-src/contexts/contracts/frugality-charter.md +3 -3
- package/.agent-src/contexts/documentation-hierarchy.md +14 -7
- package/.agent-src/contexts/execution/autonomy-examples.md +1 -1
- package/.agent-src/contexts/execution/cheap-question-mechanics.md +39 -2
- package/.agent-src/contexts/execution/roadmap-process-loop.md +28 -5
- package/.agent-src/contexts/override-system.md +5 -5
- package/.agent-src/ghostwriter/fictional-fixture-v1.md +1 -1
- package/.agent-src/personas/advisors/first-principles.md +1 -1
- package/.agent-src/personas/hollywood-director.md +1 -1
- package/.agent-src/rules/architecture.md +5 -1
- package/.agent-src/rules/augment-edit-discipline.md +5 -5
- package/.agent-src/rules/augment-source-of-truth.md +15 -15
- package/.agent-src/rules/commit-conventions.md +1 -1
- package/.agent-src/rules/commit-policy.md +10 -0
- package/.agent-src/rules/domain-adoption-policy.md +3 -3
- package/.agent-src/rules/fast-path-marker-visibility.md +3 -3
- package/.agent-src/rules/finance-safety-floor.md +1 -1
- package/.agent-src/rules/framework-neutrality-in-generic-skills.md +8 -8
- package/.agent-src/rules/git-history-discipline.md +1 -1
- package/.agent-src/rules/improve-before-implement.md +2 -2
- package/.agent-src/rules/language-and-tone.md +2 -2
- package/.agent-src/rules/media-governance-routing.md +5 -5
- package/.agent-src/rules/no-attribution-footers.md +1 -0
- package/.agent-src/rules/no-cheap-questions.md +3 -0
- package/.agent-src/rules/no-decorative-emojis-in-git-surfaces.md +111 -0
- package/.agent-src/rules/no-pr-progress-comments.md +118 -0
- package/.agent-src/rules/no-roadmap-references.md +3 -3
- package/.agent-src/rules/non-destructive-by-default.md +1 -1
- package/.agent-src/rules/persona-governance.md +3 -3
- package/.agent-src/rules/preservation-guard.md +15 -15
- package/.agent-src/rules/roadmap-ci-steps-policy.md +7 -3
- package/.agent-src/rules/rule-type-governance.md +1 -1
- package/.agent-src/rules/skill-quality.md +1 -1
- package/.agent-src/rules/{caveman-speak.md → telegraph-speak.md} +15 -15
- package/.agent-src/rules/token-optimizer-maintenance.md +6 -6
- package/.agent-src/skills/agent-docs-writing/SKILL.md +17 -11
- package/.agent-src/skills/agents-md-thin-root/SKILL.md +9 -9
- package/.agent-src/skills/check-refs/SKILL.md +2 -2
- package/.agent-src/skills/code-refactoring/SKILL.md +2 -2
- package/.agent-src/skills/command-writing/SKILL.md +19 -19
- package/.agent-src/skills/comp-banding/SKILL.md +1 -1
- package/.agent-src/skills/condense-memory/SKILL.md +131 -0
- package/.agent-src/skills/context-authoring/SKILL.md +2 -2
- package/.agent-src/skills/context-document/SKILL.md +5 -3
- package/.agent-src/skills/copilot-agents-optimization/SKILL.md +3 -3
- package/.agent-src/skills/description-assist/SKILL.md +2 -2
- package/.agent-src/skills/git-workflow/SKILL.md +1 -1
- package/.agent-src/skills/guideline-writing/SKILL.md +5 -5
- package/.agent-src/skills/learning-to-rule-or-skill/SKILL.md +4 -4
- package/.agent-src/skills/lint-skills/SKILL.md +3 -3
- package/.agent-src/skills/md-language-check/SKILL.md +2 -2
- package/.agent-src/skills/module-detect-on-the-fly/SKILL.md +138 -0
- package/.agent-src/skills/module-management/SKILL.md +166 -94
- package/.agent-src/skills/override-management/SKILL.md +1 -1
- package/.agent-src/skills/persona-writing/SKILL.md +5 -5
- package/.agent-src/skills/positioning-strategy/SKILL.md +1 -1
- package/.agent-src/skills/project-docs/SKILL.md +6 -4
- package/.agent-src/skills/readme-reviewer/SKILL.md +2 -2
- package/.agent-src/skills/roadmap-management/SKILL.md +13 -1
- package/.agent-src/skills/roadmap-writing/SKILL.md +4 -2
- package/.agent-src/skills/rule-refactor/SKILL.md +5 -5
- package/.agent-src/skills/rule-writing/SKILL.md +18 -18
- package/.agent-src/skills/script-writing/SKILL.md +1 -1
- package/.agent-src/skills/skill-improvement-pipeline/SKILL.md +6 -6
- package/.agent-src/skills/skill-management/SKILL.md +21 -21
- package/.agent-src/skills/skill-reviewer/SKILL.md +2 -2
- package/.agent-src/skills/skill-writing/SKILL.md +8 -8
- package/.agent-src/skills/skill-writing/evals/triggers.json +1 -1
- package/.agent-src/skills/token-optimizer/SKILL.md +4 -4
- package/.agent-src/skills/unit-economics-modeling/SKILL.md +1 -1
- package/.agent-src/skills/upstream-contribute/SKILL.md +17 -17
- package/.agent-src/templates/AGENTS.md +1 -0
- package/.agent-src/templates/agent-settings.md +24 -13
- package/.agent-src/templates/agents/agent-project-settings.example.yml +61 -2
- package/.agent-src/templates/command.md +5 -5
- package/.agent-src/templates/contexts.md +1 -1
- package/.agent-src/templates/copilot-instructions.md +8 -8
- package/.agent-src/templates/features.md +1 -1
- package/.agent-src/templates/hooks/pre-commit-frontmatter +2 -2
- package/.agent-src/templates/hooks/pre-commit-roadmap-progress +3 -3
- package/.agent-src/templates/persona.md +2 -2
- package/.agent-src/templates/roadmaps.md +1 -1
- package/.agent-src/templates/rule.md +13 -13
- package/.agent-src/templates/scripts/memory_lookup.py +1 -1
- package/.agent-src/templates/scripts/memory_status.py +2 -2
- package/.agent-src/templates/scripts/work_engine/_lib/agent_settings.py +195 -1
- package/.agent-src/templates/scripts/work_engine/orchestration.py +1 -1
- package/.agent-src/templates/skill-archive-note.md +5 -5
- package/.agent-src/templates/skill.md +1 -1
- package/.claude-plugin/marketplace.json +4 -4
- package/AGENTS.md +16 -17
- package/CHANGELOG.md +181 -3
- package/CONTRIBUTING.md +31 -12
- package/README.md +18 -10
- package/config/agent-settings.template.yml +22 -2
- package/config/discovery/unassigned-artefacts.yml +24 -24
- package/config/profiles/full.ini +1 -1
- package/dist/cli/agent-config.js +52 -3
- package/dist/cli/agent-config.js.map +1 -1
- package/dist/cli/commands/uiServe.js +9 -0
- package/dist/cli/commands/uiServe.js.map +1 -1
- package/dist/cli/registry.js +2 -1
- package/dist/cli/registry.js.map +1 -1
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +649 -606
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +4 -4
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +439 -437
- package/dist/discovery/trust-report.md +5 -5
- package/dist/discovery/workspaces.json +450 -448
- package/dist/install/apply.js +238 -0
- package/dist/install/apply.js.map +1 -0
- package/dist/install/atomic.js +92 -0
- package/dist/install/atomic.js.map +1 -0
- package/dist/install/bridges/augment.js +20 -0
- package/dist/install/bridges/augment.js.map +1 -0
- package/dist/install/bridges/claude.js +44 -0
- package/dist/install/bridges/claude.js.map +1 -0
- package/dist/install/bridges/cline.js +69 -0
- package/dist/install/bridges/cline.js.map +1 -0
- package/dist/install/bridges/copilot.js +28 -0
- package/dist/install/bridges/copilot.js.map +1 -0
- package/dist/install/bridges/cursor.js +34 -0
- package/dist/install/bridges/cursor.js.map +1 -0
- package/dist/install/bridges/gemini.js +39 -0
- package/dist/install/bridges/gemini.js.map +1 -0
- package/dist/install/bridges/index.js +88 -0
- package/dist/install/bridges/index.js.map +1 -0
- package/dist/install/bridges/marker-content.js +153 -0
- package/dist/install/bridges/marker-content.js.map +1 -0
- package/dist/install/bridges/markers.js +42 -0
- package/dist/install/bridges/markers.js.map +1 -0
- package/dist/install/bridges/types.js +31 -0
- package/dist/install/bridges/types.js.map +1 -0
- package/dist/install/bridges/vscode.js +26 -0
- package/dist/install/bridges/vscode.js.map +1 -0
- package/dist/install/bridges/windsurf.js +35 -0
- package/dist/install/bridges/windsurf.js.map +1 -0
- package/dist/install/conflict.js +196 -0
- package/dist/install/conflict.js.map +1 -0
- package/dist/install/detect.js +218 -0
- package/dist/install/detect.js.map +1 -0
- package/dist/install/paths.js +82 -0
- package/dist/install/paths.js.map +1 -0
- package/dist/install/plan.js +157 -0
- package/dist/install/plan.js.map +1 -0
- package/dist/install/txlog.js +140 -0
- package/dist/install/txlog.js.map +1 -0
- package/dist/install/types.js +19 -0
- package/dist/install/types.js.map +1 -0
- package/dist/install/wizard-plan.js +184 -0
- package/dist/install/wizard-plan.js.map +1 -0
- package/dist/mcp/registry-manifest.json +4 -4
- package/dist/router.json +67 -19
- package/dist/server/app.js +6 -0
- package/dist/server/app.js.map +1 -1
- package/dist/server/routes/install.js +547 -0
- package/dist/server/routes/install.js.map +1 -0
- package/dist/server/routes/wizard.js +301 -6
- package/dist/server/routes/wizard.js.map +1 -1
- package/dist/server/routes/workspace.js +396 -0
- package/dist/server/routes/workspace.js.map +1 -0
- package/dist/server/schemas/settings.js +4 -3
- package/dist/server/schemas/settings.js.map +1 -1
- package/dist/ui/assets/index-BXZILUxe.css +1 -0
- package/dist/ui/assets/index-DLEuEW1V.js +35 -0
- package/dist/ui/assets/index-DLEuEW1V.js.map +1 -0
- package/dist/ui/index.html +2 -2
- package/docs/MIGRATION.md +1 -1
- package/docs/adrs/cost/0001-hard-stop-hook.md +1 -1
- package/docs/adrs/router/0001-three-tier-routing.md +4 -4
- package/docs/adrs/schema/0001-json-schema-frontmatter.md +1 -1
- package/docs/adrs/smoke/0001-per-tier-smoke-scripts.md +4 -4
- package/docs/adrs/{caveman → telegraph}/0001-default-off-until-bench.md +9 -9
- package/docs/adrs/telegraph/README.md +9 -0
- package/docs/architecture/augment-projection.md +4 -4
- package/docs/architecture/claude-bundle.md +1 -1
- package/docs/architecture/current-onboard-baseline.md +3 -3
- package/docs/architecture/multi-tool-projection.md +10 -10
- package/docs/architecture/source-projection.md +27 -27
- package/docs/architecture.md +19 -15
- package/docs/archive/CHANGELOG-pre-2.11.0.md +2 -2
- package/docs/archive/CHANGELOG-pre-2.15.0.md +3 -3
- package/docs/archive/CHANGELOG-pre-2.16.0.md +1 -1
- package/docs/archive/CHANGELOG-pre-2.2.0.md +70 -70
- package/docs/archive/CHANGELOG-pre-2.20.0.md +2 -2
- package/docs/archive/CHANGELOG-pre-2.25.0.md +15 -15
- package/docs/archive/CHANGELOG-pre-3.0.0.md +4 -4
- package/docs/archive/CHANGELOG-pre-3.1.0.md +2 -2
- package/docs/archive/CHANGELOG-pre-3.2.0.md +3 -3
- package/docs/benchmark.md +65 -0
- package/docs/benchmarks.md +16 -16
- package/docs/catalog.md +17 -15
- package/docs/contracts/CHANGELOG-conventions.md +1 -1
- package/docs/contracts/STABILITY.md +2 -2
- package/docs/contracts/adoption-signal-floor.md +110 -0
- package/docs/contracts/adr-chat-history-split.md +4 -4
- package/docs/contracts/adr-command-suggestion.md +4 -4
- package/docs/contracts/adr-gtm-context-spine.md +1 -1
- package/docs/contracts/adr-implement-ticket-runtime.md +4 -4
- package/docs/contracts/adr-install-user-type-axis.md +1 -1
- package/docs/contracts/adr-layout.md +2 -2
- package/docs/contracts/adr-product-ui-track.md +10 -10
- package/docs/contracts/adr-user-types-axis.md +3 -3
- package/docs/contracts/adr-wing4-context-spine.md +1 -1
- package/docs/contracts/agent-memory-contract.md +3 -3
- package/docs/contracts/agents-md-tech-stack.md +2 -2
- package/docs/contracts/ai-council-config.md +2 -2
- package/docs/contracts/at-rest-encryption.md +4 -0
- package/docs/contracts/audit-log-v1.md +1 -1
- package/docs/contracts/benchmark-ab-contract.md +101 -0
- package/docs/contracts/benchmark-corpus-spec.md +1 -1
- package/docs/contracts/branch-protection-policy.md +98 -0
- package/docs/contracts/ci-cost-budget.md +106 -0
- package/docs/contracts/ci-green-floor.md +141 -0
- package/docs/contracts/command-clusters.md +6 -6
- package/docs/contracts/command-surface-tiers.md +2 -2
- package/docs/contracts/command-taxonomy.md +2 -2
- package/docs/contracts/{compression-default-kill-criterion.md → condensation-default-kill-criterion.md} +29 -29
- package/docs/contracts/config-presets.md +1 -1
- package/docs/contracts/context-paths.md +3 -3
- package/docs/contracts/context-spine.md +1 -1
- package/docs/contracts/cost-summary-schema.md +12 -12
- package/docs/contracts/cross-wing-handoff.md +4 -4
- package/docs/contracts/daily-workspace.md +4 -0
- package/docs/contracts/decision-trace-v1.md +2 -2
- package/docs/contracts/discovery-manifest.md +4 -4
- package/docs/contracts/explain-modes.md +4 -0
- package/docs/contracts/file-ownership-matrix.json +3493 -3318
- package/docs/contracts/file-ownership-matrix.md +3 -3
- package/docs/contracts/frontmatter-contract.md +4 -4
- package/docs/contracts/ghostwriter-schema.md +3 -3
- package/docs/contracts/gui-wizard.md +1 -1
- package/docs/contracts/harness-expectations.md +123 -0
- package/docs/contracts/host-agent-protocol.md +4 -0
- package/docs/contracts/implement-ticket-flow.md +9 -9
- package/docs/contracts/install-scopes.md +77 -0
- package/docs/contracts/iron-law-overrides.txt +1 -1
- package/docs/contracts/kernel-membership.md +26 -26
- package/docs/contracts/linear-ai-rules-inclusion.md +1 -1
- package/docs/contracts/linter-structural-model.md +2 -2
- package/docs/contracts/load-context-budget-model.md +4 -4
- package/docs/contracts/load-context-schema.md +13 -13
- package/docs/contracts/local-analytics.md +4 -0
- package/docs/contracts/local-knowledge-ingestion.md +1 -1
- package/docs/contracts/mcp-cloud-scope.md +2 -2
- package/docs/contracts/mcp-phase-1-scope.md +3 -3
- package/docs/contracts/measurement-baseline.md +5 -5
- package/docs/contracts/mental-models.md +30 -30
- package/docs/contracts/multi-tool-projection-fidelity.md +4 -4
- package/docs/contracts/namespace.md +4 -4
- package/docs/contracts/orchestration-dsl-v1.md +7 -7
- package/docs/contracts/package-self-orientation.md +12 -12
- package/docs/contracts/persona-schema.md +6 -6
- package/docs/contracts/pilot/language-and-tone.md +1 -1
- package/docs/contracts/plain-language-surface.md +117 -0
- package/docs/contracts/profile-system.md +3 -3
- package/docs/contracts/release-pr-gating.md +103 -0
- package/docs/contracts/role-experience.md +3 -3
- package/docs/contracts/rule-classification.md +13 -13
- package/docs/contracts/rule-interactions.md +4 -4
- package/docs/contracts/rule-interactions.yml +30 -30
- package/docs/contracts/rule-priority-hierarchy.md +13 -13
- package/docs/contracts/rule-router.md +2 -2
- package/docs/contracts/safety-model.md +1 -1
- package/docs/contracts/skill-distribution-channels.md +61 -0
- package/docs/contracts/skill-domains.md +2 -2
- package/docs/contracts/smoke-contracts.md +5 -5
- package/docs/contracts/telegraph-telemetry.md +83 -0
- package/docs/contracts/trust-and-safety.md +5 -5
- package/docs/contracts/ui-stack-extension.md +7 -7
- package/docs/contracts/ui-track-flow.md +9 -9
- package/docs/contracts/user-type-schema.md +4 -4
- package/docs/contracts/workflow-packs.md +4 -4
- package/docs/contracts/workspace-documents.md +4 -0
- package/docs/customization.md +28 -8
- package/docs/decisions/ADR-001-kernel-swap-deferred.md +6 -6
- package/docs/decisions/ADR-002-kernel-bucket-overrides.md +11 -11
- package/docs/decisions/ADR-003-flat-cluster-subs-and-colon-syntax.md +2 -2
- package/docs/decisions/ADR-004-rule-governance-pruning.md +4 -4
- package/docs/decisions/ADR-005-subagent-worktrees.md +7 -7
- package/docs/decisions/ADR-011-domain-pack-readiness.md +6 -6
- package/docs/decisions/ADR-013-discovery-frontmatter-contract.md +3 -3
- package/docs/decisions/ADR-015-discovery-manifest-contract.md +3 -3
- package/docs/decisions/ADR-017-monorepo-physical-layout.md +10 -10
- package/docs/decisions/ADR-018-trust-and-safety-layer.md +6 -6
- package/docs/decisions/ADR-019-router-json-dist-location.md +2 -2
- package/docs/decisions/ADR-020-global-only-consumer-scope.md +2 -2
- package/docs/decisions/ADR-021-deployment-shape.md +1 -1
- package/docs/decisions/ADR-022-daily-workspace-decomposition.md +1 -1
- package/docs/decisions/ADR-027-changelog-machine-vs-manual.md +2 -2
- package/docs/decisions/ADR-028-root-layout.md +7 -7
- package/docs/decisions/ADR-029-multi-workspace-deferred.md +2 -2
- package/docs/decisions/ADR-rule-kernel-and-router.md +5 -5
- package/docs/deploy/team-deployment-posture.md +20 -0
- package/docs/development.md +17 -17
- package/docs/distribution/registries.md +32 -0
- package/docs/distribution/registry-submissions.md +85 -0
- package/docs/distribution/telemetry-schema.md +1 -1
- package/docs/getting-started-by-role.md +45 -3
- package/docs/getting-started.md +2 -2
- package/docs/guidelines/agent-infra/5w2h-analysis.md +3 -3
- package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +1 -1
- package/docs/guidelines/agent-infra/asking-and-brevity-examples.md +3 -3
- package/docs/guidelines/agent-infra/carve-out-predicates.md +3 -3
- package/docs/guidelines/agent-infra/critical-thinking.md +4 -4
- package/docs/guidelines/agent-infra/direct-answers-demos.md +1 -1
- package/docs/guidelines/agent-infra/first-principles.md +2 -2
- package/docs/guidelines/agent-infra/inversion-thinking.md +5 -5
- package/docs/guidelines/agent-infra/layered-settings.md +56 -2
- package/docs/guidelines/agent-infra/mental-models.md +3 -3
- package/docs/guidelines/agent-infra/roadmap-progress-mechanics.md +2 -2
- package/docs/guidelines/agent-infra/rule-type-governance.md +1 -1
- package/docs/guidelines/agent-infra/scqa-framework.md +5 -5
- package/docs/guidelines/agent-infra/self-improvement-pipeline.md +2 -2
- package/docs/guidelines/agent-infra/six-hats.md +3 -3
- package/docs/guidelines/agent-infra/skill-quality-checklist.md +5 -5
- package/docs/guidelines/agent-infra/systems-thinking.md +1 -1
- package/docs/guidelines/agent-infra/verify-before-complete-demos.md +1 -1
- package/docs/guidelines/augment-portability-patterns.md +4 -4
- package/docs/guidelines/cross-role-handoff.md +2 -2
- package/docs/guidelines/php/php-coding-patterns.md +1 -1
- package/docs/guidelines/prompt-templates.md +6 -6
- package/docs/maintainers/dev-mode.md +1 -1
- package/docs/mcp.md +1 -1
- package/docs/parity/bench.json +3 -3
- package/docs/parity/ruflo.md +2 -2
- package/docs/profiles.md +11 -11
- package/docs/quality.md +11 -11
- package/docs/safety.md +3 -3
- package/docs/setup/mcp-client-config.md +1 -1
- package/docs/setup/mcp-r2-bootstrap.md +1 -1
- package/docs/setup/mcp-server-docker.md +3 -3
- package/docs/setup/per-ide/windsurf.md +1 -1
- package/docs/skills-catalog.md +8 -7
- package/docs/troubleshooting.md +1 -1
- package/docs/walkthroughs/daily-workspace-a11y.md +87 -0
- package/llms.txt +7 -6
- package/package.json +1 -1
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_archive/README.md +2 -2
- package/scripts/_archive/_backfill_skill_domains.py +3 -3
- package/scripts/_archive/_bootstrap_tier_frontmatter.py +3 -3
- package/scripts/_archive/_p43_bodies.py +10 -10
- package/scripts/_archive/{_p43_compress.py → _p43_condense.py} +5 -5
- package/scripts/_archive/_p4_migrate.py +7 -7
- package/scripts/_archive/_phase2_shim_helper.py +1 -1
- package/scripts/_archive/_pilot_council_question.py +5 -5
- package/scripts/_cli/explain_last/inputs.py +1 -1
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/agent_settings.py +195 -1
- package/scripts/_lib/agent_src.py +19 -19
- package/scripts/_lib/bench_ab_cache.py +162 -0
- package/scripts/_lib/bench_ab_scoring.py +209 -0
- package/scripts/_lib/{bench_caveman.py → bench_telegraph.py} +21 -21
- package/scripts/_lib/{bench_caveman_report.py → bench_telegraph_report.py} +21 -21
- package/scripts/_lib/claude_desktop_bundler.py +5 -5
- package/scripts/_lib/module_detection.py +223 -0
- package/scripts/_lib/scope_guard.sh +162 -0
- package/scripts/_phase4_bucket.py +3 -3
- package/scripts/_pilot_measure.py +4 -4
- package/scripts/_tmp_scan_framework_leakage.py +1 -1
- package/scripts/adoption_report.py +195 -0
- package/scripts/adoption_snapshot.py +219 -0
- package/scripts/adoption_status.py +166 -0
- package/scripts/ai-video/lib/parse-blueprint.sh +1 -1
- package/scripts/ai_council/advisors.py +5 -5
- package/scripts/ai_council/compile_corpus.py +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +3 -3
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_context_layer_v1_review.py +2 -2
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_inject_quiet_flag.py +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_v2.sh +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_verbosity.sh +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py +3 -3
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_per_task.sh +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py +6 -6
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_rebalancing_audit.py +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +6 -6
- package/scripts/annotate_discovery.py +13 -13
- package/scripts/apply_modules_config.py +290 -0
- package/scripts/audit_adr_coverage.py +2 -2
- package/scripts/audit_auto_rules.py +2 -2
- package/scripts/audit_cloud_compatibility.py +3 -3
- package/scripts/audit_command_surface.py +9 -9
- package/scripts/audit_likelihood.py +2 -2
- package/scripts/audit_user_type_axis.py +2 -2
- package/scripts/bench_ab_cache_dispatch.py +68 -0
- package/scripts/bench_ab_clone.py +170 -0
- package/scripts/bench_ab_diff.py +227 -0
- package/scripts/bench_ab_integrity.py +143 -0
- package/scripts/bench_ab_run.py +235 -0
- package/scripts/bench_ab_task_runner.py +369 -0
- package/scripts/bench_ab_tracka_run.py +202 -0
- package/scripts/{bench_compress_memory.py → bench_condense_memory.py} +16 -16
- package/scripts/bench_run.py +33 -33
- package/scripts/bench_runner.py +2 -2
- package/scripts/bootstrap.sh +99 -0
- package/scripts/build_cloud_bundle.py +6 -6
- package/scripts/build_discovery_manifest.py +7 -7
- package/scripts/build_linear_digest.py +3 -3
- package/scripts/build_rule_trigger_matrix.py +8 -8
- package/scripts/chat_history.py +5 -5
- package/scripts/check_always_budget.py +11 -5
- package/scripts/check_augment_description_cap.py +3 -3
- package/scripts/check_cluster_patterns.py +2 -2
- package/scripts/check_command_count_messaging.py +3 -3
- package/scripts/{check_compression.py → check_condensation.py} +34 -34
- package/scripts/{check_compressed_paths.py → check_condensed_paths.py} +8 -8
- package/scripts/check_context_paths.py +7 -7
- package/scripts/check_council_layout.py +2 -2
- package/scripts/check_council_references.py +9 -9
- package/scripts/check_iron_law_prominence.py +2 -2
- package/scripts/check_kernel_rule_bundle.py +2 -2
- package/scripts/check_module_management_neutral.py +149 -0
- package/scripts/check_no_roadmap_refs.py +9 -9
- package/scripts/check_portability.py +3 -3
- package/scripts/check_public_catalog_links.py +4 -4
- package/scripts/check_references.py +7 -6
- package/scripts/check_release_pr_shape.py +112 -0
- package/scripts/check_reply_consistency.py +3 -3
- package/scripts/check_safety_floor_untouched.py +1 -1
- package/scripts/check_template_pin_drift.py +5 -5
- package/scripts/check_token_optimizer_freshness.py +3 -3
- package/scripts/ci_status.py +301 -0
- package/scripts/ci_time_ratio.py +1 -1
- package/scripts/cleanup_other_scope.sh +146 -0
- package/scripts/compile_router.py +10 -10
- package/scripts/{compress.py → condense.py} +64 -64
- package/scripts/condense.sh +18 -0
- package/scripts/{compress_memory.py → condense_memory.py} +33 -33
- package/scripts/config/presets.py +2 -2
- package/scripts/config/profiles.py +1 -1
- package/scripts/cost_by_conversation.py +3 -3
- package/scripts/cost_summary.py +7 -7
- package/scripts/count_token_optimizer_usage.sh +1 -1
- package/scripts/gen_discovery_baseline.py +5 -5
- package/scripts/generate_index.py +6 -6
- package/scripts/generate_ownership_matrix.py +10 -10
- package/scripts/generate_pack_manifests.py +1 -1
- package/scripts/ghostwriter_fixture_allowlist.txt +1 -1
- package/scripts/install +3 -3
- package/scripts/install-hooks.sh +6 -6
- package/scripts/install.py +76 -11
- package/scripts/install.sh +187 -1
- package/scripts/inventory_frontmatter.py +2 -2
- package/scripts/iron_law_sha.py +3 -3
- package/scripts/lint_agents_layout.py +14 -7
- package/scripts/lint_agents_md.py +4 -4
- package/scripts/lint_archived_skills.py +3 -3
- package/scripts/lint_artefact_frontmatter.py +2 -2
- package/scripts/lint_bench_ab.py +172 -0
- package/scripts/lint_bench_corpus.py +1 -1
- package/scripts/lint_command_tiers.py +5 -5
- package/scripts/lint_context_spine_usage.py +1 -1
- package/scripts/lint_framework_leakage.py +7 -7
- package/scripts/lint_framework_leakage_allowlist.json +144 -84
- package/scripts/lint_ghostwriter_source.py +3 -3
- package/scripts/lint_handoffs.py +1 -1
- package/scripts/lint_load_context.py +11 -11
- package/scripts/lint_media_policy_linkage.py +5 -5
- package/scripts/lint_namespace.py +1 -1
- package/scripts/lint_no_new_atomic_commands.py +2 -2
- package/scripts/lint_orchestration_dsl.py +1 -1
- package/scripts/lint_pack_boundaries.py +2 -2
- package/scripts/lint_persona_governance.py +4 -4
- package/scripts/lint_role_experiences.py +237 -0
- package/scripts/lint_rule_interactions.py +2 -2
- package/scripts/lint_rule_tiers.py +1 -1
- package/scripts/lint_trust_coherence.py +2 -2
- package/scripts/mcp_registry_submit.sh +187 -0
- package/scripts/mcp_server/tools.py +1 -1
- package/scripts/measure_frugality_savings.py +10 -10
- package/scripts/measure_patterns.py +1 -1
- package/scripts/measure_projection_bytes.py +5 -5
- package/scripts/measure_rule_budget.py +3 -3
- package/scripts/measure_skill_reduction.py +1 -1
- package/scripts/memory_lookup.py +1 -1
- package/scripts/memory_status.py +2 -2
- package/scripts/migrate_command_suggestions.py +3 -3
- package/scripts/mine_session.py +1 -1
- package/scripts/move_artefact.py +3 -3
- package/scripts/new_skill.py +2 -2
- package/scripts/pack_mcp_content.py +9 -9
- package/scripts/plan_physical_move.py +6 -6
- package/scripts/print_required_checks.py +196 -0
- package/scripts/probe_skill_registration.py +413 -0
- package/scripts/propose_modules_config.py +145 -0
- package/scripts/prototype_lint_contradictions.py +1 -1
- package/scripts/recruit_preflight.sh +152 -0
- package/scripts/refine_ticket_detect.py +3 -3
- package/scripts/release.py +20 -0
- package/scripts/render_benchmark_md.py +308 -0
- package/scripts/roadmap_progress_hook.py +1 -1
- package/scripts/run_skill_evals.py +2 -2
- package/scripts/runtime_registry.py +4 -4
- package/scripts/schemas/command.schema.json +4 -4
- package/scripts/schemas/rule.schema.json +5 -5
- package/scripts/schemas/skill.schema.json +3 -3
- package/scripts/schemas/user-type.schema.json +1 -1
- package/scripts/score_skill_selection.py +1 -1
- package/scripts/skill_collision_clusters.py +2 -2
- package/scripts/skill_linter.py +81 -81
- package/scripts/skill_overlap.py +5 -5
- package/scripts/skill_tools/audit_persona_coverage.py +2 -2
- package/scripts/skill_tools/audit_user_type_coverage.py +2 -2
- package/scripts/skill_tools/run_block_d_eval.py +1 -1
- package/scripts/skill_tools/score_skill_relevance.py +1 -1
- package/scripts/skill_tools/suggest_skill_for_task.py +1 -1
- package/scripts/skill_trigger_eval.py +3 -3
- package/scripts/smoke/kernel.sh +7 -1
- package/scripts/smoke/router.sh +5 -5
- package/scripts/smoke/skills.sh +1 -1
- package/scripts/smoke_quickstart.py +1 -1
- package/scripts/snapshot_agent_outputs.py +3 -3
- package/scripts/spotcheck_thin_root.py +1 -1
- package/scripts/{caveman_stats.py → telegraph_stats.py} +18 -18
- package/scripts/update_counts.py +1 -1
- package/scripts/validate_decision_engine.py +1 -1
- package/scripts/validate_frontmatter.py +1 -1
- package/scripts/validate_safe_paths.py +3 -3
- package/scripts/{validate_caveman_carveouts.py → validate_telegraph_carveouts.py} +7 -7
- package/scripts/verify_roadmap_closure.py +6 -6
- package/templates/consumer-settings/ONBOARDING.md +41 -0
- package/.agent-src/commands/install-via-agent.md +0 -129
- package/.agent-src/skills/compress-memory/SKILL.md +0 -131
- package/dist/ui/assets/index-D-DY1ywI.js +0 -35
- package/dist/ui/assets/index-D-DY1ywI.js.map +0 -1
- package/dist/ui/assets/index-Dqfhmg-d.css +0 -1
- package/docs/adrs/caveman/README.md +0 -9
- package/docs/contracts/caveman-telemetry.md +0 -83
- package/scripts/compress.sh +0 -18
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Materialise the `with` and `without` clones for the package-impact A/B bench.
|
|
3
|
+
|
|
4
|
+
Phase 1 Step 2 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
|
|
5
|
+
|
|
6
|
+
The fixture lives at `internal/bench/ab/fixture/`. Both clones are byte-identical
|
|
7
|
+
copies of the fixture; the `with` clone additionally receives the agent-config
|
|
8
|
+
surface (`.claude/`, `.augment/`, `AGENTS.md`, `CLAUDE.md`) so a Claude Code
|
|
9
|
+
session run inside it sees the same files a consumer project would after
|
|
10
|
+
running the installer.
|
|
11
|
+
|
|
12
|
+
Idempotent: re-running without `--refresh` leaves an existing clone alone. With
|
|
13
|
+
`--refresh`, the target clone is removed and rebuilt from scratch.
|
|
14
|
+
|
|
15
|
+
The clones tree (`internal/bench/ab/clones/`) is gitignored — only this script's
|
|
16
|
+
output schema is committed.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import hashlib
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import shutil
|
|
25
|
+
import sys
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
29
|
+
AB_ROOT = REPO_ROOT / "internal" / "bench" / "ab"
|
|
30
|
+
FIXTURE = AB_ROOT / "fixture"
|
|
31
|
+
CLONES = AB_ROOT / "clones"
|
|
32
|
+
|
|
33
|
+
# Surfaces the `with` clone inherits from the package root.
|
|
34
|
+
WITH_SURFACES = (
|
|
35
|
+
".claude",
|
|
36
|
+
".augment",
|
|
37
|
+
"AGENTS.md",
|
|
38
|
+
"CLAUDE.md",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def die(msg: str) -> None:
|
|
43
|
+
sys.stderr.write(f"bench_ab_clone: {msg}\n")
|
|
44
|
+
raise SystemExit(1)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def copytree_preserve(src: Path, dst: Path) -> None:
|
|
48
|
+
"""Copy tree, dereferencing symlinks.
|
|
49
|
+
|
|
50
|
+
The package installs the `.claude/` surface as a tree of symlinks into
|
|
51
|
+
`.agent-src/`. Cloning the surface as symlinks would carry pointers that
|
|
52
|
+
resolve against the package root, not the clone — meaning a Claude Code
|
|
53
|
+
session inside the clone could not actually read the rule bodies. Deref
|
|
54
|
+
at copy time produces standalone files inside the clone.
|
|
55
|
+
"""
|
|
56
|
+
if dst.exists():
|
|
57
|
+
shutil.rmtree(dst)
|
|
58
|
+
shutil.copytree(src, dst, symlinks=False)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def materialise_clone(variant: str, target: Path) -> None:
|
|
62
|
+
"""Copy the fixture into the target, then layer the variant-specific surface."""
|
|
63
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
# Mirror the fixture
|
|
65
|
+
for entry in FIXTURE.iterdir():
|
|
66
|
+
dest = target / entry.name
|
|
67
|
+
if entry.is_dir():
|
|
68
|
+
copytree_preserve(entry, dest)
|
|
69
|
+
else:
|
|
70
|
+
shutil.copy2(entry, dest)
|
|
71
|
+
# Layer the agent-config surface onto the `with` variant
|
|
72
|
+
if variant == "with":
|
|
73
|
+
for surface in WITH_SURFACES:
|
|
74
|
+
src = REPO_ROOT / surface
|
|
75
|
+
if not src.exists():
|
|
76
|
+
# Best-effort: a missing surface is reported but does not fail
|
|
77
|
+
sys.stderr.write(
|
|
78
|
+
f"bench_ab_clone: surface '{surface}' missing in package root; "
|
|
79
|
+
"with-clone may not be representative\n"
|
|
80
|
+
)
|
|
81
|
+
continue
|
|
82
|
+
dest = target / surface
|
|
83
|
+
if src.is_dir():
|
|
84
|
+
copytree_preserve(src, dest)
|
|
85
|
+
else:
|
|
86
|
+
shutil.copy2(src, dest)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def target_shape_hash() -> str:
|
|
90
|
+
"""Stable hash of the fixture tree + the with-surface list.
|
|
91
|
+
|
|
92
|
+
Used by Phase 2's cache key. Recomputing this here keeps the cache code
|
|
93
|
+
and the clone code reading the same surface definition.
|
|
94
|
+
"""
|
|
95
|
+
h = hashlib.sha256()
|
|
96
|
+
h.update(b"with-surfaces:" + json.dumps(WITH_SURFACES).encode() + b"\n")
|
|
97
|
+
for path in sorted(FIXTURE.rglob("*")):
|
|
98
|
+
if not path.is_file():
|
|
99
|
+
continue
|
|
100
|
+
rel = path.relative_to(FIXTURE).as_posix()
|
|
101
|
+
h.update(f"{rel}\n".encode())
|
|
102
|
+
h.update(path.read_bytes())
|
|
103
|
+
h.update(b"\n")
|
|
104
|
+
return h.hexdigest()[:16]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def write_manifest(variant: str, target: Path) -> None:
|
|
108
|
+
"""Drop a small manifest so other scripts can verify the clone shape."""
|
|
109
|
+
manifest = {
|
|
110
|
+
"variant": variant,
|
|
111
|
+
"target_shape_hash": target_shape_hash(),
|
|
112
|
+
"with_surfaces": list(WITH_SURFACES),
|
|
113
|
+
"fixture_relpath": FIXTURE.relative_to(REPO_ROOT).as_posix(),
|
|
114
|
+
}
|
|
115
|
+
(target / ".bench-ab-manifest.json").write_text(
|
|
116
|
+
json.dumps(manifest, indent=2) + "\n"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def clone(variant: str, *, refresh: bool) -> Path:
|
|
121
|
+
target = CLONES / variant
|
|
122
|
+
if target.exists() and not refresh:
|
|
123
|
+
sys.stdout.write(f"bench_ab_clone: {variant} clone already present at {target} (use --refresh to rebuild)\n")
|
|
124
|
+
return target
|
|
125
|
+
if target.exists():
|
|
126
|
+
shutil.rmtree(target)
|
|
127
|
+
materialise_clone(variant, target)
|
|
128
|
+
write_manifest(variant, target)
|
|
129
|
+
sys.stdout.write(f"bench_ab_clone: built {variant} clone at {target}\n")
|
|
130
|
+
return target
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def parse_args(argv: list[str]) -> argparse.Namespace:
|
|
134
|
+
parser = argparse.ArgumentParser(
|
|
135
|
+
description="Materialise `with` and `without` clones for the A/B bench."
|
|
136
|
+
)
|
|
137
|
+
parser.add_argument(
|
|
138
|
+
"--refresh",
|
|
139
|
+
action="store_true",
|
|
140
|
+
help="Force-rebuild even if the clone already exists.",
|
|
141
|
+
)
|
|
142
|
+
parser.add_argument(
|
|
143
|
+
"--variant",
|
|
144
|
+
choices=("with", "without", "both"),
|
|
145
|
+
default="both",
|
|
146
|
+
help="Which clone to materialise (default: both).",
|
|
147
|
+
)
|
|
148
|
+
parser.add_argument(
|
|
149
|
+
"--print-shape-hash",
|
|
150
|
+
action="store_true",
|
|
151
|
+
help="Print the target-shape hash and exit without cloning.",
|
|
152
|
+
)
|
|
153
|
+
return parser.parse_args(argv)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def main(argv: list[str] | None = None) -> int:
|
|
157
|
+
args = parse_args(argv if argv is not None else sys.argv[1:])
|
|
158
|
+
if not FIXTURE.exists():
|
|
159
|
+
die(f"fixture missing at {FIXTURE}")
|
|
160
|
+
if args.print_shape_hash:
|
|
161
|
+
sys.stdout.write(target_shape_hash() + "\n")
|
|
162
|
+
return 0
|
|
163
|
+
variants = ("with", "without") if args.variant == "both" else (args.variant,)
|
|
164
|
+
for v in variants:
|
|
165
|
+
clone(v, refresh=args.refresh)
|
|
166
|
+
return 0
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
if __name__ == "__main__":
|
|
170
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Diff two A/B reports (one per variant) into a comparison artefact.
|
|
3
|
+
|
|
4
|
+
Phase 2 Step 4 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
|
|
5
|
+
|
|
6
|
+
Inputs: two report JSON paths. Output: a JSON artefact under
|
|
7
|
+
`internal/bench/reports/ab/diff/{stamp}-{corpus}-diff.json` plus a matching
|
|
8
|
+
`.md`. Phase 5's renderer consumes this artefact to populate `docs/benchmark.md`.
|
|
9
|
+
|
|
10
|
+
The diff content depends on the corpus:
|
|
11
|
+
|
|
12
|
+
- `ab-tracka` — trigger-accuracy %, false-positive count, per-rule lift.
|
|
13
|
+
- `ab-trackb` — completion-rate per category, wall-time, tokens, cost,
|
|
14
|
+
ask-vs-act ratio, tool-call count.
|
|
15
|
+
|
|
16
|
+
Phase 2 only writes the structural skeleton (delta object with `with`,
|
|
17
|
+
`without`, `delta` keys); Phases 3 and 4 plug their real metrics into
|
|
18
|
+
the `results` blocks the runners emit, and the diff is computed in
|
|
19
|
+
`compute_track_a_diff` / `compute_track_b_diff` here.
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import argparse
|
|
24
|
+
import json
|
|
25
|
+
import sys
|
|
26
|
+
from datetime import datetime, timezone
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
|
|
29
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
30
|
+
REPORTS_DIR = REPO_ROOT / "internal" / "bench" / "reports" / "ab"
|
|
31
|
+
DIFF_DIR = REPORTS_DIR / "diff"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def utc_stamp() -> str:
|
|
35
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def load_report(path: Path) -> dict:
|
|
39
|
+
return json.loads(path.read_text())
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def compute_track_a_diff(with_results: dict, without_results: dict) -> dict:
|
|
43
|
+
"""Track A: trigger accuracy + per-rule lift.
|
|
44
|
+
|
|
45
|
+
Phase 3 populates `triggers`, `per_rule_accuracy`, `false_positives` in the
|
|
46
|
+
`results` block. This helper computes the delta. While Phase 3 is not yet
|
|
47
|
+
landed, we surface what we have and zero what we don't — never invent
|
|
48
|
+
numbers.
|
|
49
|
+
"""
|
|
50
|
+
def take(d: dict, key: str, default: float = 0.0) -> float:
|
|
51
|
+
value = d.get(key, default)
|
|
52
|
+
try:
|
|
53
|
+
return float(value)
|
|
54
|
+
except (TypeError, ValueError):
|
|
55
|
+
return default
|
|
56
|
+
|
|
57
|
+
with_acc = take(with_results, "trigger_accuracy")
|
|
58
|
+
without_acc = take(without_results, "trigger_accuracy")
|
|
59
|
+
return {
|
|
60
|
+
"trigger_accuracy": {
|
|
61
|
+
"with": with_acc,
|
|
62
|
+
"without": without_acc,
|
|
63
|
+
"delta_pct_points": round(with_acc - without_acc, 3),
|
|
64
|
+
},
|
|
65
|
+
"false_positives": {
|
|
66
|
+
"with": with_results.get("false_positives", 0),
|
|
67
|
+
"without": without_results.get("false_positives", 0),
|
|
68
|
+
},
|
|
69
|
+
"per_rule": {
|
|
70
|
+
"with": with_results.get("per_rule_accuracy", {}),
|
|
71
|
+
"without": without_results.get("per_rule_accuracy", {}),
|
|
72
|
+
},
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def compute_track_b_diff(with_results: dict, without_results: dict) -> dict:
|
|
77
|
+
"""Track B: completion rate per category + wall-time + tokens + cost + ask-vs-act."""
|
|
78
|
+
def mean(d: dict, key: str) -> float:
|
|
79
|
+
try:
|
|
80
|
+
return float(d.get(key, 0.0))
|
|
81
|
+
except (TypeError, ValueError):
|
|
82
|
+
return 0.0
|
|
83
|
+
|
|
84
|
+
with_cats = with_results.get("per_category", {})
|
|
85
|
+
without_cats = without_results.get("per_category", {})
|
|
86
|
+
categories = sorted(set(with_cats) | set(without_cats))
|
|
87
|
+
per_category = {}
|
|
88
|
+
for cat in categories:
|
|
89
|
+
per_category[cat] = {
|
|
90
|
+
"with": with_cats.get(cat, {}),
|
|
91
|
+
"without": without_cats.get(cat, {}),
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
"per_category": per_category,
|
|
96
|
+
"wall_time_seconds": {
|
|
97
|
+
"with": mean(with_results, "mean_wall_time"),
|
|
98
|
+
"without": mean(without_results, "mean_wall_time"),
|
|
99
|
+
"delta": round(
|
|
100
|
+
mean(with_results, "mean_wall_time")
|
|
101
|
+
- mean(without_results, "mean_wall_time"),
|
|
102
|
+
3,
|
|
103
|
+
),
|
|
104
|
+
},
|
|
105
|
+
"tokens": {
|
|
106
|
+
"with": mean(with_results, "mean_tokens"),
|
|
107
|
+
"without": mean(without_results, "mean_tokens"),
|
|
108
|
+
"delta": round(
|
|
109
|
+
mean(with_results, "mean_tokens")
|
|
110
|
+
- mean(without_results, "mean_tokens"),
|
|
111
|
+
3,
|
|
112
|
+
),
|
|
113
|
+
},
|
|
114
|
+
"cost_usd": {
|
|
115
|
+
"with": mean(with_results, "mean_cost_usd"),
|
|
116
|
+
"without": mean(without_results, "mean_cost_usd"),
|
|
117
|
+
"delta": round(
|
|
118
|
+
mean(with_results, "mean_cost_usd")
|
|
119
|
+
- mean(without_results, "mean_cost_usd"),
|
|
120
|
+
4,
|
|
121
|
+
),
|
|
122
|
+
},
|
|
123
|
+
"ask_vs_act_ratio": {
|
|
124
|
+
"with": mean(with_results, "ask_vs_act_ratio"),
|
|
125
|
+
"without": mean(without_results, "ask_vs_act_ratio"),
|
|
126
|
+
},
|
|
127
|
+
"tool_calls_per_task": {
|
|
128
|
+
"with": mean(with_results, "mean_tool_calls"),
|
|
129
|
+
"without": mean(without_results, "mean_tool_calls"),
|
|
130
|
+
},
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def render_markdown(diff: dict) -> str:
|
|
135
|
+
lines = [
|
|
136
|
+
f"# A/B Bench Diff — {diff['corpus']}",
|
|
137
|
+
"",
|
|
138
|
+
f"- Stamp: `{diff['stamp']}`",
|
|
139
|
+
f"- With: `{diff['with_report']}`",
|
|
140
|
+
f"- Without: `{diff['without_report']}`",
|
|
141
|
+
"",
|
|
142
|
+
"## Delta",
|
|
143
|
+
"",
|
|
144
|
+
"```json",
|
|
145
|
+
json.dumps(diff.get("delta", {}), indent=2),
|
|
146
|
+
"```",
|
|
147
|
+
"",
|
|
148
|
+
]
|
|
149
|
+
return "\n".join(lines)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def parse_args(argv: list[str]) -> argparse.Namespace:
|
|
153
|
+
parser = argparse.ArgumentParser(
|
|
154
|
+
description="Diff two A/B bench reports (one per variant)."
|
|
155
|
+
)
|
|
156
|
+
parser.add_argument("with_report", type=Path, help="Report JSON for variant=with")
|
|
157
|
+
parser.add_argument("without_report", type=Path, help="Report JSON for variant=without")
|
|
158
|
+
parser.add_argument(
|
|
159
|
+
"--out-dir",
|
|
160
|
+
type=Path,
|
|
161
|
+
default=DIFF_DIR,
|
|
162
|
+
help="Where to write the diff artefact (default: internal/bench/reports/ab/diff/)",
|
|
163
|
+
)
|
|
164
|
+
return parser.parse_args(argv)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def main(argv: list[str] | None = None) -> int:
|
|
168
|
+
args = parse_args(argv if argv is not None else sys.argv[1:])
|
|
169
|
+
if not args.with_report.exists():
|
|
170
|
+
sys.stderr.write(f"bench_ab_diff: missing {args.with_report}\n")
|
|
171
|
+
return 1
|
|
172
|
+
if not args.without_report.exists():
|
|
173
|
+
sys.stderr.write(f"bench_ab_diff: missing {args.without_report}\n")
|
|
174
|
+
return 1
|
|
175
|
+
with_rep = load_report(args.with_report)
|
|
176
|
+
without_rep = load_report(args.without_report)
|
|
177
|
+
if with_rep.get("variant") != "with":
|
|
178
|
+
sys.stderr.write(
|
|
179
|
+
f"bench_ab_diff: {args.with_report} variant is "
|
|
180
|
+
f"{with_rep.get('variant')!r}, expected 'with'\n"
|
|
181
|
+
)
|
|
182
|
+
return 1
|
|
183
|
+
if without_rep.get("variant") != "without":
|
|
184
|
+
sys.stderr.write(
|
|
185
|
+
f"bench_ab_diff: {args.without_report} variant is "
|
|
186
|
+
f"{without_rep.get('variant')!r}, expected 'without'\n"
|
|
187
|
+
)
|
|
188
|
+
return 1
|
|
189
|
+
if with_rep.get("corpus") != without_rep.get("corpus"):
|
|
190
|
+
sys.stderr.write(
|
|
191
|
+
f"bench_ab_diff: corpus mismatch — with={with_rep.get('corpus')} "
|
|
192
|
+
f"without={without_rep.get('corpus')}\n"
|
|
193
|
+
)
|
|
194
|
+
return 1
|
|
195
|
+
corpus = with_rep.get("corpus") or "unknown"
|
|
196
|
+
with_results = with_rep.get("results", {})
|
|
197
|
+
without_results = without_rep.get("results", {})
|
|
198
|
+
if corpus == "ab-tracka":
|
|
199
|
+
delta = compute_track_a_diff(with_results, without_results)
|
|
200
|
+
elif corpus == "ab-trackb":
|
|
201
|
+
delta = compute_track_b_diff(with_results, without_results)
|
|
202
|
+
else:
|
|
203
|
+
delta = {
|
|
204
|
+
"note": f"no diff strategy registered for corpus {corpus!r}",
|
|
205
|
+
"with_results": with_results,
|
|
206
|
+
"without_results": without_results,
|
|
207
|
+
}
|
|
208
|
+
stamp = utc_stamp()
|
|
209
|
+
diff = {
|
|
210
|
+
"schema": "ab-bench-diff/0.1",
|
|
211
|
+
"stamp": stamp,
|
|
212
|
+
"corpus": corpus,
|
|
213
|
+
"with_report": str(args.with_report.resolve().relative_to(REPO_ROOT)),
|
|
214
|
+
"without_report": str(args.without_report.resolve().relative_to(REPO_ROOT)),
|
|
215
|
+
"delta": delta,
|
|
216
|
+
}
|
|
217
|
+
args.out_dir.mkdir(parents=True, exist_ok=True)
|
|
218
|
+
json_path = args.out_dir / f"{stamp}-{corpus}-diff.json"
|
|
219
|
+
md_path = json_path.with_suffix(".md")
|
|
220
|
+
json_path.write_text(json.dumps(diff, indent=2) + "\n")
|
|
221
|
+
md_path.write_text(render_markdown(diff))
|
|
222
|
+
sys.stdout.write(f"bench_ab_diff: wrote {json_path.relative_to(REPO_ROOT)}\n")
|
|
223
|
+
return 0
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
if __name__ == "__main__":
|
|
227
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Assert the two A/B clones differ only in the agent-config surface.
|
|
3
|
+
|
|
4
|
+
Phase 1 Step 3 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
|
|
5
|
+
|
|
6
|
+
The bench's whole validity hinges on the two clones being identical except for
|
|
7
|
+
whether the agent-config surface is present. This script enumerates the file
|
|
8
|
+
trees of both clones and compares byte-by-byte, allowing differences only at
|
|
9
|
+
the documented surface paths (`.claude/`, `.augment/`, `AGENTS.md`,
|
|
10
|
+
`CLAUDE.md`) and the variant manifest.
|
|
11
|
+
|
|
12
|
+
Exit code:
|
|
13
|
+
0 — clones are identical except at the allowed surface
|
|
14
|
+
1 — clone is missing, or a task-target file diverges between variants
|
|
15
|
+
2 — usage error
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import hashlib
|
|
21
|
+
import sys
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
25
|
+
AB_ROOT = REPO_ROOT / "internal" / "bench" / "ab"
|
|
26
|
+
CLONES = AB_ROOT / "clones"
|
|
27
|
+
|
|
28
|
+
# Surfaces where divergence is expected (variant-bearing).
|
|
29
|
+
ALLOWED_DELTA_PATHS = (
|
|
30
|
+
".claude",
|
|
31
|
+
".augment",
|
|
32
|
+
"AGENTS.md",
|
|
33
|
+
"CLAUDE.md",
|
|
34
|
+
)
|
|
35
|
+
# Variant-distinguishing manifest written by bench_ab_clone.
|
|
36
|
+
ALLOWED_DELTA_FILES = (
|
|
37
|
+
".bench-ab-manifest.json",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def is_under_allowed_path(rel: Path) -> bool:
|
|
42
|
+
parts = rel.parts
|
|
43
|
+
if not parts:
|
|
44
|
+
return False
|
|
45
|
+
head = parts[0]
|
|
46
|
+
if head in ALLOWED_DELTA_PATHS:
|
|
47
|
+
return True
|
|
48
|
+
return rel.as_posix() in ALLOWED_DELTA_FILES
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def file_hash(path: Path) -> str:
|
|
52
|
+
h = hashlib.sha256()
|
|
53
|
+
with path.open("rb") as fh:
|
|
54
|
+
for chunk in iter(lambda: fh.read(65536), b""):
|
|
55
|
+
h.update(chunk)
|
|
56
|
+
return h.hexdigest()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def index_clone(root: Path) -> dict[str, str]:
|
|
60
|
+
"""Return {relpath: sha256} for every regular file under `root`."""
|
|
61
|
+
out: dict[str, str] = {}
|
|
62
|
+
for path in sorted(root.rglob("*")):
|
|
63
|
+
if not path.is_file():
|
|
64
|
+
continue
|
|
65
|
+
rel = path.relative_to(root)
|
|
66
|
+
out[rel.as_posix()] = file_hash(path)
|
|
67
|
+
return out
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def parse_args(argv: list[str]) -> argparse.Namespace:
|
|
71
|
+
parser = argparse.ArgumentParser(
|
|
72
|
+
description="Verify the A/B clones differ only in the agent-config surface."
|
|
73
|
+
)
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
"--verbose",
|
|
76
|
+
action="store_true",
|
|
77
|
+
help="Print every checked file (default: only divergences)",
|
|
78
|
+
)
|
|
79
|
+
return parser.parse_args(argv)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def main(argv: list[str] | None = None) -> int:
|
|
83
|
+
args = parse_args(argv if argv is not None else sys.argv[1:])
|
|
84
|
+
|
|
85
|
+
with_root = CLONES / "with"
|
|
86
|
+
without_root = CLONES / "without"
|
|
87
|
+
for label, root in (("with", with_root), ("without", without_root)):
|
|
88
|
+
if not root.exists():
|
|
89
|
+
sys.stderr.write(
|
|
90
|
+
f"bench_ab_integrity: {label} clone missing at {root} — run scripts/bench_ab_clone.py first\n"
|
|
91
|
+
)
|
|
92
|
+
return 1
|
|
93
|
+
|
|
94
|
+
with_index = index_clone(with_root)
|
|
95
|
+
without_index = index_clone(without_root)
|
|
96
|
+
|
|
97
|
+
# Files only in `with` — must all sit under the allowed surface
|
|
98
|
+
only_in_with = sorted(set(with_index) - set(without_index))
|
|
99
|
+
bad_only_with = [
|
|
100
|
+
rel for rel in only_in_with if not is_under_allowed_path(Path(rel))
|
|
101
|
+
]
|
|
102
|
+
# Files only in `without` — there should be none
|
|
103
|
+
only_in_without = sorted(set(without_index) - set(with_index))
|
|
104
|
+
bad_only_without = [
|
|
105
|
+
rel for rel in only_in_without if not is_under_allowed_path(Path(rel))
|
|
106
|
+
]
|
|
107
|
+
# Files present in both — must match byte-for-byte unless under the surface
|
|
108
|
+
shared = sorted(set(with_index) & set(without_index))
|
|
109
|
+
bad_diff = [
|
|
110
|
+
rel
|
|
111
|
+
for rel in shared
|
|
112
|
+
if with_index[rel] != without_index[rel] and not is_under_allowed_path(Path(rel))
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
if args.verbose:
|
|
116
|
+
sys.stdout.write(
|
|
117
|
+
f"bench_ab_integrity: with={len(with_index)} files, without={len(without_index)} files, shared={len(shared)}\n"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if not bad_only_with and not bad_only_without and not bad_diff:
|
|
121
|
+
sys.stdout.write(
|
|
122
|
+
"bench_ab_integrity: clones differ only at the allowed surface (.claude, .augment, AGENTS.md, CLAUDE.md, manifest).\n"
|
|
123
|
+
)
|
|
124
|
+
return 0
|
|
125
|
+
|
|
126
|
+
sys.stderr.write("bench_ab_integrity: INTEGRITY FAILURE\n")
|
|
127
|
+
if bad_only_with:
|
|
128
|
+
sys.stderr.write(" files only in `with` (NOT in allowed surface):\n")
|
|
129
|
+
for rel in bad_only_with:
|
|
130
|
+
sys.stderr.write(f" + {rel}\n")
|
|
131
|
+
if bad_only_without:
|
|
132
|
+
sys.stderr.write(" files only in `without` (NOT in allowed surface):\n")
|
|
133
|
+
for rel in bad_only_without:
|
|
134
|
+
sys.stderr.write(f" - {rel}\n")
|
|
135
|
+
if bad_diff:
|
|
136
|
+
sys.stderr.write(" files present in both but byte-divergent:\n")
|
|
137
|
+
for rel in bad_diff:
|
|
138
|
+
sys.stderr.write(f" ~ {rel}\n")
|
|
139
|
+
return 1
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
if __name__ == "__main__":
|
|
143
|
+
raise SystemExit(main())
|