@event4u/agent-config 3.3.0 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/README.md +2 -2
- package/.agent-src/commands/agent-handoff.md +31 -2
- package/.agent-src/commands/agent-status.md +5 -5
- package/.agent-src/commands/agents/audit.md +8 -8
- package/.agent-src/commands/agents/init.md +25 -1
- package/.agent-src/commands/agents/optimize.md +3 -3
- package/.agent-src/commands/agents/user.md +1 -1
- package/.agent-src/commands/agents.md +1 -1
- package/.agent-src/commands/analyze-reference-repo.md +1 -1
- package/.agent-src/commands/check-current-md.md +8 -8
- package/.agent-src/commands/{compress.md → condense.md} +55 -55
- package/.agent-src/commands/context/create.md +7 -4
- package/.agent-src/commands/context/refactor.md +3 -1
- package/.agent-src/commands/feature/dev.md +1 -1
- package/.agent-src/commands/feature/explore.md +1 -1
- package/.agent-src/commands/feature/plan.md +10 -8
- package/.agent-src/commands/feature/refactor.md +3 -1
- package/.agent-src/commands/feature/roadmap.md +7 -4
- package/.agent-src/commands/fix/portability.md +3 -3
- package/.agent-src/commands/fix/refs.md +4 -4
- package/.agent-src/commands/ghostwriter.md +2 -2
- package/.agent-src/commands/memory/learn-low-impact.md +3 -3
- package/.agent-src/commands/module/explore.md +34 -8
- package/.agent-src/commands/optimize/agents-dir.md +9 -7
- package/.agent-src/commands/optimize/augmentignore.md +2 -2
- package/.agent-src/commands/optimize/skills.md +9 -9
- package/.agent-src/commands/post-as.md +1 -1
- package/.agent-src/commands/project-analyze.md +2 -2
- package/.agent-src/commands/project-health.md +3 -2
- package/.agent-src/commands/research/deep.md +1 -1
- package/.agent-src/commands/research/report.md +1 -1
- package/.agent-src/commands/research.md +1 -1
- package/.agent-src/commands/roadmap/ai-council.md +1 -1
- package/.agent-src/commands/roadmap/create.md +9 -4
- package/.agent-src/commands/rule-compliance-audit.md +1 -1
- package/.agent-src/commands/upstream-contribute.md +14 -14
- package/.agent-src/commands/video/from-script.md +1 -1
- package/.agent-src/commands/video/scene.md +1 -1
- package/.agent-src/commands/video/stitch.md +1 -1
- package/.agent-src/commands/video/storyboard.md +1 -1
- package/.agent-src/commands/video.md +1 -1
- package/.agent-src/contexts/augment-infrastructure.md +1 -1
- package/.agent-src/contexts/authority/commit-mechanics.md +15 -0
- package/.agent-src/contexts/authority/kernel-rule-edits.md +3 -3
- package/.agent-src/contexts/authority/scope-mechanics.md +1 -1
- package/.agent-src/contexts/communication/rules-auto/augment-source-of-truth-mechanics.md +28 -28
- package/.agent-src/contexts/communication/rules-auto/skill-quality-mechanics.md +4 -4
- package/.agent-src/contexts/communication/rules-auto/think-before-action-mechanics.md +2 -2
- package/.agent-src/contexts/contracts/artifact-engagement-flow.md +6 -6
- package/.agent-src/contexts/contracts/command-suggestion-flow.md +3 -3
- package/.agent-src/contexts/contracts/emergency-triage-block.md +4 -4
- package/.agent-src/contexts/contracts/frugality-charter.md +3 -3
- package/.agent-src/contexts/documentation-hierarchy.md +14 -7
- package/.agent-src/contexts/execution/autonomy-examples.md +1 -1
- package/.agent-src/contexts/execution/cheap-question-mechanics.md +39 -2
- package/.agent-src/contexts/execution/roadmap-process-loop.md +28 -5
- package/.agent-src/contexts/override-system.md +5 -5
- package/.agent-src/ghostwriter/fictional-fixture-v1.md +1 -1
- package/.agent-src/personas/advisors/first-principles.md +1 -1
- package/.agent-src/personas/hollywood-director.md +1 -1
- package/.agent-src/rules/architecture.md +5 -1
- package/.agent-src/rules/augment-edit-discipline.md +5 -5
- package/.agent-src/rules/augment-source-of-truth.md +15 -15
- package/.agent-src/rules/commit-conventions.md +1 -1
- package/.agent-src/rules/commit-policy.md +10 -0
- package/.agent-src/rules/domain-adoption-policy.md +3 -3
- package/.agent-src/rules/fast-path-marker-visibility.md +3 -3
- package/.agent-src/rules/finance-safety-floor.md +1 -1
- package/.agent-src/rules/framework-neutrality-in-generic-skills.md +8 -8
- package/.agent-src/rules/git-history-discipline.md +1 -1
- package/.agent-src/rules/improve-before-implement.md +2 -2
- package/.agent-src/rules/language-and-tone.md +2 -2
- package/.agent-src/rules/media-governance-routing.md +5 -5
- package/.agent-src/rules/no-attribution-footers.md +1 -0
- package/.agent-src/rules/no-cheap-questions.md +3 -0
- package/.agent-src/rules/no-decorative-emojis-in-git-surfaces.md +111 -0
- package/.agent-src/rules/no-pr-progress-comments.md +118 -0
- package/.agent-src/rules/no-roadmap-references.md +3 -3
- package/.agent-src/rules/non-destructive-by-default.md +1 -1
- package/.agent-src/rules/persona-governance.md +3 -3
- package/.agent-src/rules/preservation-guard.md +15 -15
- package/.agent-src/rules/roadmap-ci-steps-policy.md +7 -3
- package/.agent-src/rules/rule-type-governance.md +1 -1
- package/.agent-src/rules/skill-quality.md +1 -1
- package/.agent-src/rules/{caveman-speak.md → telegraph-speak.md} +15 -15
- package/.agent-src/rules/token-optimizer-maintenance.md +6 -6
- package/.agent-src/skills/agent-docs-writing/SKILL.md +17 -11
- package/.agent-src/skills/agents-md-thin-root/SKILL.md +9 -9
- package/.agent-src/skills/check-refs/SKILL.md +2 -2
- package/.agent-src/skills/code-refactoring/SKILL.md +2 -2
- package/.agent-src/skills/command-writing/SKILL.md +19 -19
- package/.agent-src/skills/comp-banding/SKILL.md +1 -1
- package/.agent-src/skills/condense-memory/SKILL.md +131 -0
- package/.agent-src/skills/context-authoring/SKILL.md +2 -2
- package/.agent-src/skills/context-document/SKILL.md +5 -3
- package/.agent-src/skills/copilot-agents-optimization/SKILL.md +3 -3
- package/.agent-src/skills/description-assist/SKILL.md +2 -2
- package/.agent-src/skills/git-workflow/SKILL.md +1 -1
- package/.agent-src/skills/guideline-writing/SKILL.md +5 -5
- package/.agent-src/skills/learning-to-rule-or-skill/SKILL.md +4 -4
- package/.agent-src/skills/lint-skills/SKILL.md +3 -3
- package/.agent-src/skills/md-language-check/SKILL.md +2 -2
- package/.agent-src/skills/module-detect-on-the-fly/SKILL.md +138 -0
- package/.agent-src/skills/module-management/SKILL.md +166 -94
- package/.agent-src/skills/override-management/SKILL.md +1 -1
- package/.agent-src/skills/persona-writing/SKILL.md +5 -5
- package/.agent-src/skills/positioning-strategy/SKILL.md +1 -1
- package/.agent-src/skills/project-docs/SKILL.md +6 -4
- package/.agent-src/skills/readme-reviewer/SKILL.md +2 -2
- package/.agent-src/skills/roadmap-management/SKILL.md +13 -1
- package/.agent-src/skills/roadmap-writing/SKILL.md +4 -2
- package/.agent-src/skills/rule-refactor/SKILL.md +5 -5
- package/.agent-src/skills/rule-writing/SKILL.md +18 -18
- package/.agent-src/skills/script-writing/SKILL.md +1 -1
- package/.agent-src/skills/skill-improvement-pipeline/SKILL.md +6 -6
- package/.agent-src/skills/skill-management/SKILL.md +21 -21
- package/.agent-src/skills/skill-reviewer/SKILL.md +2 -2
- package/.agent-src/skills/skill-writing/SKILL.md +8 -8
- package/.agent-src/skills/skill-writing/evals/triggers.json +1 -1
- package/.agent-src/skills/token-optimizer/SKILL.md +4 -4
- package/.agent-src/skills/unit-economics-modeling/SKILL.md +1 -1
- package/.agent-src/skills/upstream-contribute/SKILL.md +17 -17
- package/.agent-src/templates/AGENTS.md +1 -0
- package/.agent-src/templates/agent-settings.md +24 -13
- package/.agent-src/templates/agents/agent-project-settings.example.yml +61 -2
- package/.agent-src/templates/command.md +5 -5
- package/.agent-src/templates/contexts.md +1 -1
- package/.agent-src/templates/copilot-instructions.md +8 -8
- package/.agent-src/templates/features.md +1 -1
- package/.agent-src/templates/hooks/pre-commit-frontmatter +2 -2
- package/.agent-src/templates/hooks/pre-commit-roadmap-progress +3 -3
- package/.agent-src/templates/persona.md +2 -2
- package/.agent-src/templates/roadmaps.md +1 -1
- package/.agent-src/templates/rule.md +13 -13
- package/.agent-src/templates/scripts/memory_lookup.py +1 -1
- package/.agent-src/templates/scripts/memory_status.py +2 -2
- package/.agent-src/templates/scripts/work_engine/_lib/agent_settings.py +195 -1
- package/.agent-src/templates/scripts/work_engine/orchestration.py +1 -1
- package/.agent-src/templates/skill-archive-note.md +5 -5
- package/.agent-src/templates/skill.md +1 -1
- package/.claude-plugin/marketplace.json +4 -4
- package/AGENTS.md +16 -17
- package/CHANGELOG.md +216 -3
- package/CONTRIBUTING.md +31 -12
- package/README.md +21 -12
- package/config/agent-settings.template.yml +22 -2
- package/config/discovery/unassigned-artefacts.yml +24 -24
- package/config/profiles/full.ini +1 -1
- package/dist/cli/agent-config.js +52 -3
- package/dist/cli/agent-config.js.map +1 -1
- package/dist/cli/commands/uiServe.js +9 -0
- package/dist/cli/commands/uiServe.js.map +1 -1
- package/dist/cli/registry.js +2 -1
- package/dist/cli/registry.js.map +1 -1
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +649 -606
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +4 -4
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +439 -437
- package/dist/discovery/trust-report.md +5 -5
- package/dist/discovery/workspaces.json +450 -448
- package/dist/install/atomic.js +92 -0
- package/dist/install/atomic.js.map +1 -0
- package/dist/install/conflict.js +196 -0
- package/dist/install/conflict.js.map +1 -0
- package/dist/install/detect.js +218 -0
- package/dist/install/detect.js.map +1 -0
- package/dist/install/paths.js +82 -0
- package/dist/install/paths.js.map +1 -0
- package/dist/install/plan.js +157 -0
- package/dist/install/plan.js.map +1 -0
- package/dist/install/txlog.js +140 -0
- package/dist/install/txlog.js.map +1 -0
- package/dist/install/types.js +19 -0
- package/dist/install/types.js.map +1 -0
- package/dist/install/wizard-plan.js +184 -0
- package/dist/install/wizard-plan.js.map +1 -0
- package/dist/mcp/registry-manifest.json +4 -4
- package/dist/router.json +67 -19
- package/dist/server/app.js +6 -0
- package/dist/server/app.js.map +1 -1
- package/dist/server/routes/install.js +358 -0
- package/dist/server/routes/install.js.map +1 -0
- package/dist/server/routes/wizard.js +468 -32
- package/dist/server/routes/wizard.js.map +1 -1
- package/dist/server/routes/workspace.js +396 -0
- package/dist/server/routes/workspace.js.map +1 -0
- package/dist/server/schemas/settings.js +5 -3
- package/dist/server/schemas/settings.js.map +1 -1
- package/dist/ui/assets/index-BDAhhpDV.js +40 -0
- package/dist/ui/assets/index-BDAhhpDV.js.map +1 -0
- package/dist/ui/assets/index-BXZILUxe.css +1 -0
- package/dist/ui/index.html +2 -2
- package/docs/MIGRATION.md +1 -1
- package/docs/adrs/cost/0001-hard-stop-hook.md +1 -1
- package/docs/adrs/router/0001-three-tier-routing.md +4 -4
- package/docs/adrs/schema/0001-json-schema-frontmatter.md +1 -1
- package/docs/adrs/smoke/0001-per-tier-smoke-scripts.md +4 -4
- package/docs/adrs/{caveman → telegraph}/0001-default-off-until-bench.md +9 -9
- package/docs/adrs/telegraph/README.md +9 -0
- package/docs/architecture/augment-projection.md +4 -4
- package/docs/architecture/claude-bundle.md +1 -1
- package/docs/architecture/current-onboard-baseline.md +3 -3
- package/docs/architecture/multi-tool-projection.md +10 -10
- package/docs/architecture/source-projection.md +27 -27
- package/docs/architecture.md +19 -15
- package/docs/archive/CHANGELOG-pre-2.11.0.md +2 -2
- package/docs/archive/CHANGELOG-pre-2.15.0.md +3 -3
- package/docs/archive/CHANGELOG-pre-2.16.0.md +1 -1
- package/docs/archive/CHANGELOG-pre-2.2.0.md +70 -70
- package/docs/archive/CHANGELOG-pre-2.20.0.md +2 -2
- package/docs/archive/CHANGELOG-pre-2.25.0.md +15 -15
- package/docs/archive/CHANGELOG-pre-3.0.0.md +4 -4
- package/docs/archive/CHANGELOG-pre-3.1.0.md +2 -2
- package/docs/archive/CHANGELOG-pre-3.2.0.md +3 -3
- package/docs/benchmark.md +65 -0
- package/docs/benchmarks.md +16 -16
- package/docs/catalog.md +17 -15
- package/docs/contracts/CHANGELOG-conventions.md +1 -1
- package/docs/contracts/STABILITY.md +2 -2
- package/docs/contracts/adoption-signal-floor.md +110 -0
- package/docs/contracts/adr-chat-history-split.md +4 -4
- package/docs/contracts/adr-command-suggestion.md +4 -4
- package/docs/contracts/adr-gtm-context-spine.md +1 -1
- package/docs/contracts/adr-implement-ticket-runtime.md +4 -4
- package/docs/contracts/adr-install-user-type-axis.md +1 -1
- package/docs/contracts/adr-layout.md +2 -2
- package/docs/contracts/adr-product-ui-track.md +10 -10
- package/docs/contracts/adr-user-types-axis.md +3 -3
- package/docs/contracts/adr-wing4-context-spine.md +1 -1
- package/docs/contracts/agent-memory-contract.md +3 -3
- package/docs/contracts/agents-md-tech-stack.md +2 -2
- package/docs/contracts/ai-council-config.md +2 -2
- package/docs/contracts/at-rest-encryption.md +4 -0
- package/docs/contracts/audit-log-v1.md +1 -1
- package/docs/contracts/benchmark-ab-contract.md +101 -0
- package/docs/contracts/benchmark-corpus-spec.md +1 -1
- package/docs/contracts/branch-protection-policy.md +98 -0
- package/docs/contracts/ci-cost-budget.md +106 -0
- package/docs/contracts/ci-green-floor.md +141 -0
- package/docs/contracts/command-clusters.md +6 -6
- package/docs/contracts/command-surface-tiers.md +2 -2
- package/docs/contracts/command-taxonomy.md +2 -2
- package/docs/contracts/{compression-default-kill-criterion.md → condensation-default-kill-criterion.md} +29 -29
- package/docs/contracts/config-presets.md +1 -1
- package/docs/contracts/context-paths.md +3 -3
- package/docs/contracts/context-spine.md +1 -1
- package/docs/contracts/cost-summary-schema.md +12 -12
- package/docs/contracts/cross-wing-handoff.md +4 -4
- package/docs/contracts/daily-workspace.md +4 -0
- package/docs/contracts/decision-trace-v1.md +2 -2
- package/docs/contracts/discovery-manifest.md +4 -4
- package/docs/contracts/explain-modes.md +4 -0
- package/docs/contracts/file-ownership-matrix.json +3493 -3318
- package/docs/contracts/file-ownership-matrix.md +3 -3
- package/docs/contracts/frontmatter-contract.md +4 -4
- package/docs/contracts/ghostwriter-schema.md +3 -3
- package/docs/contracts/gui-wizard.md +110 -97
- package/docs/contracts/harness-expectations.md +123 -0
- package/docs/contracts/host-agent-protocol.md +4 -0
- package/docs/contracts/implement-ticket-flow.md +9 -9
- package/docs/contracts/install-scopes.md +77 -0
- package/docs/contracts/iron-law-overrides.txt +1 -1
- package/docs/contracts/kernel-membership.md +26 -26
- package/docs/contracts/linear-ai-rules-inclusion.md +1 -1
- package/docs/contracts/linter-structural-model.md +2 -2
- package/docs/contracts/load-context-budget-model.md +4 -4
- package/docs/contracts/load-context-schema.md +13 -13
- package/docs/contracts/local-analytics.md +4 -0
- package/docs/contracts/local-knowledge-ingestion.md +1 -1
- package/docs/contracts/mcp-cloud-scope.md +2 -2
- package/docs/contracts/mcp-phase-1-scope.md +3 -3
- package/docs/contracts/measurement-baseline.md +5 -5
- package/docs/contracts/mental-models.md +30 -30
- package/docs/contracts/multi-tool-projection-fidelity.md +4 -4
- package/docs/contracts/namespace.md +4 -4
- package/docs/contracts/orchestration-dsl-v1.md +7 -7
- package/docs/contracts/package-self-orientation.md +12 -12
- package/docs/contracts/persona-schema.md +6 -6
- package/docs/contracts/pilot/language-and-tone.md +1 -1
- package/docs/contracts/plain-language-surface.md +117 -0
- package/docs/contracts/profile-system.md +3 -3
- package/docs/contracts/release-pr-gating.md +103 -0
- package/docs/contracts/role-experience.md +3 -3
- package/docs/contracts/rule-classification.md +13 -13
- package/docs/contracts/rule-interactions.md +4 -4
- package/docs/contracts/rule-interactions.yml +30 -30
- package/docs/contracts/rule-priority-hierarchy.md +13 -13
- package/docs/contracts/rule-router.md +2 -2
- package/docs/contracts/safety-model.md +1 -1
- package/docs/contracts/skill-distribution-channels.md +61 -0
- package/docs/contracts/skill-domains.md +2 -2
- package/docs/contracts/smoke-contracts.md +5 -5
- package/docs/contracts/telegraph-telemetry.md +83 -0
- package/docs/contracts/trust-and-safety.md +5 -5
- package/docs/contracts/ui-stack-extension.md +7 -7
- package/docs/contracts/ui-track-flow.md +9 -9
- package/docs/contracts/user-type-schema.md +4 -4
- package/docs/contracts/workflow-packs.md +4 -4
- package/docs/contracts/workspace-documents.md +4 -0
- package/docs/customization.md +28 -8
- package/docs/decisions/ADR-001-kernel-swap-deferred.md +6 -6
- package/docs/decisions/ADR-002-kernel-bucket-overrides.md +11 -11
- package/docs/decisions/ADR-003-flat-cluster-subs-and-colon-syntax.md +2 -2
- package/docs/decisions/ADR-004-rule-governance-pruning.md +4 -4
- package/docs/decisions/ADR-005-subagent-worktrees.md +7 -7
- package/docs/decisions/ADR-011-domain-pack-readiness.md +6 -6
- package/docs/decisions/ADR-013-discovery-frontmatter-contract.md +3 -3
- package/docs/decisions/ADR-015-discovery-manifest-contract.md +3 -3
- package/docs/decisions/ADR-017-monorepo-physical-layout.md +10 -10
- package/docs/decisions/ADR-018-trust-and-safety-layer.md +6 -6
- package/docs/decisions/ADR-019-router-json-dist-location.md +2 -2
- package/docs/decisions/ADR-020-global-only-consumer-scope.md +2 -2
- package/docs/decisions/ADR-021-deployment-shape.md +3 -3
- package/docs/decisions/ADR-022-daily-workspace-decomposition.md +1 -1
- package/docs/decisions/ADR-027-changelog-machine-vs-manual.md +2 -2
- package/docs/decisions/ADR-028-root-layout.md +7 -7
- package/docs/decisions/ADR-029-multi-workspace-deferred.md +2 -2
- package/docs/decisions/ADR-rule-kernel-and-router.md +5 -5
- package/docs/deploy/connector-setup.md +2 -2
- package/docs/deploy/policy-cookbook.md +2 -2
- package/docs/deploy/team-deployment-posture.md +20 -0
- package/docs/development.md +17 -17
- package/docs/distribution/registries.md +32 -0
- package/docs/distribution/registry-submissions.md +85 -0
- package/docs/distribution/telemetry-schema.md +1 -1
- package/docs/getting-started-by-role.md +45 -3
- package/docs/getting-started.md +2 -2
- package/docs/guidelines/agent-infra/5w2h-analysis.md +3 -3
- package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +1 -1
- package/docs/guidelines/agent-infra/asking-and-brevity-examples.md +3 -3
- package/docs/guidelines/agent-infra/carve-out-predicates.md +3 -3
- package/docs/guidelines/agent-infra/critical-thinking.md +4 -4
- package/docs/guidelines/agent-infra/direct-answers-demos.md +1 -1
- package/docs/guidelines/agent-infra/first-principles.md +2 -2
- package/docs/guidelines/agent-infra/inversion-thinking.md +5 -5
- package/docs/guidelines/agent-infra/layered-settings.md +56 -2
- package/docs/guidelines/agent-infra/mental-models.md +3 -3
- package/docs/guidelines/agent-infra/roadmap-progress-mechanics.md +2 -2
- package/docs/guidelines/agent-infra/rule-type-governance.md +1 -1
- package/docs/guidelines/agent-infra/scqa-framework.md +5 -5
- package/docs/guidelines/agent-infra/self-improvement-pipeline.md +2 -2
- package/docs/guidelines/agent-infra/six-hats.md +3 -3
- package/docs/guidelines/agent-infra/skill-quality-checklist.md +5 -5
- package/docs/guidelines/agent-infra/systems-thinking.md +1 -1
- package/docs/guidelines/agent-infra/verify-before-complete-demos.md +1 -1
- package/docs/guidelines/augment-portability-patterns.md +4 -4
- package/docs/guidelines/cross-role-handoff.md +2 -2
- package/docs/guidelines/php/php-coding-patterns.md +1 -1
- package/docs/guidelines/prompt-templates.md +6 -6
- package/docs/maintainers/dev-mode.md +1 -1
- package/docs/mcp.md +1 -1
- package/docs/parity/bench.json +3 -3
- package/docs/parity/ruflo.md +2 -2
- package/docs/profiles.md +11 -11
- package/docs/quality.md +11 -11
- package/docs/safety.md +3 -3
- package/docs/setup/mcp-client-config.md +1 -1
- package/docs/setup/mcp-r2-bootstrap.md +1 -1
- package/docs/setup/mcp-server-docker.md +3 -3
- package/docs/setup/per-ide/windsurf.md +1 -1
- package/docs/skills-catalog.md +8 -7
- package/docs/troubleshooting.md +1 -1
- package/docs/walkthroughs/daily-workspace-a11y.md +87 -0
- package/llms.txt +7 -6
- package/package.json +1 -1
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_archive/README.md +2 -2
- package/scripts/_archive/_backfill_skill_domains.py +3 -3
- package/scripts/_archive/_bootstrap_tier_frontmatter.py +3 -3
- package/scripts/_archive/_p43_bodies.py +10 -10
- package/scripts/_archive/{_p43_compress.py → _p43_condense.py} +5 -5
- package/scripts/_archive/_p4_migrate.py +7 -7
- package/scripts/_archive/_phase2_shim_helper.py +1 -1
- package/scripts/_archive/_pilot_council_question.py +5 -5
- package/scripts/_cli/explain_last/inputs.py +1 -1
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/agent_settings.py +195 -1
- package/scripts/_lib/agent_src.py +19 -19
- package/scripts/_lib/bench_ab_cache.py +162 -0
- package/scripts/_lib/bench_ab_scoring.py +209 -0
- package/scripts/_lib/{bench_caveman.py → bench_telegraph.py} +21 -21
- package/scripts/_lib/{bench_caveman_report.py → bench_telegraph_report.py} +21 -21
- package/scripts/_lib/claude_desktop_bundler.py +5 -5
- package/scripts/_lib/module_detection.py +223 -0
- package/scripts/_lib/scope_guard.sh +162 -0
- package/scripts/_phase4_bucket.py +3 -3
- package/scripts/_pilot_measure.py +4 -4
- package/scripts/_tmp_scan_framework_leakage.py +1 -1
- package/scripts/adoption_report.py +195 -0
- package/scripts/adoption_snapshot.py +219 -0
- package/scripts/adoption_status.py +166 -0
- package/scripts/ai-video/lib/parse-blueprint.sh +1 -1
- package/scripts/ai_council/advisors.py +5 -5
- package/scripts/ai_council/compile_corpus.py +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +3 -3
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_context_layer_v1_review.py +2 -2
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_inject_quiet_flag.py +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_v2.sh +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_verbosity.sh +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py +3 -3
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_per_task.sh +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py +6 -6
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_rebalancing_audit.py +1 -1
- package/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +6 -6
- package/scripts/annotate_discovery.py +13 -13
- package/scripts/apply_modules_config.py +290 -0
- package/scripts/audit_adr_coverage.py +2 -2
- package/scripts/audit_auto_rules.py +2 -2
- package/scripts/audit_cloud_compatibility.py +3 -3
- package/scripts/audit_command_surface.py +9 -9
- package/scripts/audit_likelihood.py +2 -2
- package/scripts/audit_user_type_axis.py +2 -2
- package/scripts/bench_ab_cache_dispatch.py +68 -0
- package/scripts/bench_ab_clone.py +170 -0
- package/scripts/bench_ab_diff.py +227 -0
- package/scripts/bench_ab_integrity.py +143 -0
- package/scripts/bench_ab_run.py +235 -0
- package/scripts/bench_ab_task_runner.py +369 -0
- package/scripts/bench_ab_tracka_run.py +202 -0
- package/scripts/{bench_compress_memory.py → bench_condense_memory.py} +16 -16
- package/scripts/bench_run.py +33 -33
- package/scripts/bench_runner.py +2 -2
- package/scripts/bootstrap.sh +99 -0
- package/scripts/build_cloud_bundle.py +6 -6
- package/scripts/build_discovery_manifest.py +7 -7
- package/scripts/build_linear_digest.py +3 -3
- package/scripts/build_rule_trigger_matrix.py +8 -8
- package/scripts/chat_history.py +5 -5
- package/scripts/check_always_budget.py +11 -5
- package/scripts/check_augment_description_cap.py +3 -3
- package/scripts/check_cluster_patterns.py +2 -2
- package/scripts/check_command_count_messaging.py +3 -3
- package/scripts/{check_compression.py → check_condensation.py} +34 -34
- package/scripts/{check_compressed_paths.py → check_condensed_paths.py} +8 -8
- package/scripts/check_context_paths.py +7 -7
- package/scripts/check_council_layout.py +2 -2
- package/scripts/check_council_references.py +9 -9
- package/scripts/check_iron_law_prominence.py +2 -2
- package/scripts/check_kernel_rule_bundle.py +2 -2
- package/scripts/check_module_management_neutral.py +149 -0
- package/scripts/check_no_roadmap_refs.py +9 -9
- package/scripts/check_portability.py +3 -3
- package/scripts/check_public_catalog_links.py +4 -4
- package/scripts/check_references.py +7 -6
- package/scripts/check_release_pr_shape.py +112 -0
- package/scripts/check_reply_consistency.py +3 -3
- package/scripts/check_safety_floor_untouched.py +1 -1
- package/scripts/check_template_pin_drift.py +5 -5
- package/scripts/check_token_optimizer_freshness.py +3 -3
- package/scripts/ci_status.py +301 -0
- package/scripts/ci_time_ratio.py +1 -1
- package/scripts/cleanup_other_scope.sh +146 -0
- package/scripts/compile_router.py +10 -10
- package/scripts/{compress.py → condense.py} +64 -64
- package/scripts/condense.sh +18 -0
- package/scripts/{compress_memory.py → condense_memory.py} +33 -33
- package/scripts/config/presets.py +2 -2
- package/scripts/config/profiles.py +1 -1
- package/scripts/cost_by_conversation.py +3 -3
- package/scripts/cost_summary.py +7 -7
- package/scripts/count_token_optimizer_usage.sh +1 -1
- package/scripts/gen_discovery_baseline.py +5 -5
- package/scripts/generate_index.py +6 -6
- package/scripts/generate_ownership_matrix.py +10 -10
- package/scripts/generate_pack_manifests.py +1 -1
- package/scripts/ghostwriter_fixture_allowlist.txt +1 -1
- package/scripts/install +3 -3
- package/scripts/install-hooks.sh +6 -6
- package/scripts/install.py +273 -45
- package/scripts/install.sh +187 -1
- package/scripts/inventory_frontmatter.py +2 -2
- package/scripts/iron_law_sha.py +3 -3
- package/scripts/lint_agents_layout.py +14 -7
- package/scripts/lint_agents_md.py +4 -4
- package/scripts/lint_archived_skills.py +3 -3
- package/scripts/lint_artefact_frontmatter.py +2 -2
- package/scripts/lint_bench_ab.py +172 -0
- package/scripts/lint_bench_corpus.py +1 -1
- package/scripts/lint_command_tiers.py +5 -5
- package/scripts/lint_context_spine_usage.py +1 -1
- package/scripts/lint_framework_leakage.py +7 -7
- package/scripts/lint_framework_leakage_allowlist.json +152 -84
- package/scripts/lint_ghostwriter_source.py +3 -3
- package/scripts/lint_handoffs.py +1 -1
- package/scripts/lint_load_context.py +11 -11
- package/scripts/lint_media_policy_linkage.py +5 -5
- package/scripts/lint_namespace.py +1 -1
- package/scripts/lint_no_new_atomic_commands.py +2 -2
- package/scripts/lint_orchestration_dsl.py +1 -1
- package/scripts/lint_pack_boundaries.py +2 -2
- package/scripts/lint_persona_governance.py +4 -4
- package/scripts/lint_role_experiences.py +237 -0
- package/scripts/lint_rule_interactions.py +2 -2
- package/scripts/lint_rule_tiers.py +1 -1
- package/scripts/lint_trust_coherence.py +2 -2
- package/scripts/mcp_registry_submit.sh +187 -0
- package/scripts/mcp_server/tools.py +1 -1
- package/scripts/measure_frugality_savings.py +10 -10
- package/scripts/measure_patterns.py +1 -1
- package/scripts/measure_projection_bytes.py +5 -5
- package/scripts/measure_rule_budget.py +3 -3
- package/scripts/measure_skill_reduction.py +1 -1
- package/scripts/memory_lookup.py +1 -1
- package/scripts/memory_status.py +2 -2
- package/scripts/migrate_command_suggestions.py +3 -3
- package/scripts/mine_session.py +1 -1
- package/scripts/move_artefact.py +3 -3
- package/scripts/new_skill.py +2 -2
- package/scripts/pack_mcp_content.py +9 -9
- package/scripts/plan_physical_move.py +6 -6
- package/scripts/print_required_checks.py +196 -0
- package/scripts/probe_skill_registration.py +413 -0
- package/scripts/propose_modules_config.py +145 -0
- package/scripts/prototype_lint_contradictions.py +1 -1
- package/scripts/recruit_preflight.sh +152 -0
- package/scripts/refine_ticket_detect.py +3 -3
- package/scripts/release.py +20 -0
- package/scripts/render_benchmark_md.py +308 -0
- package/scripts/roadmap_progress_hook.py +1 -1
- package/scripts/run_skill_evals.py +2 -2
- package/scripts/runtime_registry.py +4 -4
- package/scripts/schemas/command.schema.json +4 -4
- package/scripts/schemas/rule.schema.json +5 -5
- package/scripts/schemas/skill.schema.json +3 -3
- package/scripts/schemas/user-type.schema.json +1 -1
- package/scripts/score_skill_selection.py +1 -1
- package/scripts/skill_collision_clusters.py +2 -2
- package/scripts/skill_linter.py +81 -81
- package/scripts/skill_overlap.py +5 -5
- package/scripts/skill_tools/audit_persona_coverage.py +2 -2
- package/scripts/skill_tools/audit_user_type_coverage.py +2 -2
- package/scripts/skill_tools/run_block_d_eval.py +1 -1
- package/scripts/skill_tools/score_skill_relevance.py +1 -1
- package/scripts/skill_tools/suggest_skill_for_task.py +1 -1
- package/scripts/skill_trigger_eval.py +3 -3
- package/scripts/smoke/kernel.sh +7 -1
- package/scripts/smoke/router.sh +5 -5
- package/scripts/smoke/skills.sh +1 -1
- package/scripts/smoke_quickstart.py +1 -1
- package/scripts/snapshot_agent_outputs.py +3 -3
- package/scripts/spotcheck_thin_root.py +1 -1
- package/scripts/{caveman_stats.py → telegraph_stats.py} +18 -18
- package/scripts/update_counts.py +1 -1
- package/scripts/validate_decision_engine.py +1 -1
- package/scripts/validate_frontmatter.py +1 -1
- package/scripts/validate_safe_paths.py +3 -3
- package/scripts/{validate_caveman_carveouts.py → validate_telegraph_carveouts.py} +7 -7
- package/scripts/verify_roadmap_closure.py +6 -6
- package/templates/consumer-settings/ONBOARDING.md +41 -0
- package/.agent-src/commands/install-via-agent.md +0 -129
- package/.agent-src/skills/compress-memory/SKILL.md +0 -131
- package/dist/ui/assets/index-D-DY1ywI.js +0 -35
- package/dist/ui/assets/index-D-DY1ywI.js.map +0 -1
- package/dist/ui/assets/index-Dqfhmg-d.css +0 -1
- package/docs/adrs/caveman/README.md +0 -9
- package/docs/contracts/caveman-telemetry.md +0 -83
- package/scripts/compress.sh +0 -18
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""Structural success criteria for Track B.
|
|
2
|
+
|
|
3
|
+
Phase 4 Step 3 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
|
|
4
|
+
|
|
5
|
+
No LLM-judge. Each criterion is a syntactic or behavioural check executable
|
|
6
|
+
against the post-run working tree + the captured transcript. If the structural
|
|
7
|
+
signal turns out too weak, a separate follow-up roadmap adds an LLM judge —
|
|
8
|
+
not this one.
|
|
9
|
+
|
|
10
|
+
Per-category criteria, expressed as keys in the task's `success_criteria`
|
|
11
|
+
dict (see internal/bench/corpora/ab-trackb.yaml):
|
|
12
|
+
|
|
13
|
+
- `target_file_modified`: <path> — file at <path> changed between
|
|
14
|
+
the pre-run snapshot and the post-run snapshot.
|
|
15
|
+
- `regex_in_target`: <pattern> — pattern found in the named
|
|
16
|
+
target_file (case-insensitive).
|
|
17
|
+
- `regex_in_any`: <pattern> — pattern found in any modified file.
|
|
18
|
+
- `new_test_file_exists`: <path> — new test file present after the run.
|
|
19
|
+
- `test_assertion_added`: <path> — file contains at least one
|
|
20
|
+
`assert` / `expect(` / `test(` call.
|
|
21
|
+
- `one_of_files_modified`: [<paths>] — at least one path modified.
|
|
22
|
+
- `preserves_public_api`: [<names>] — each name still exported / present.
|
|
23
|
+
- `transcript_contains_one_of`: [<strings>] — any string appears in the
|
|
24
|
+
transcript (case-insensitive).
|
|
25
|
+
- `no_file_write_before_audit`: bool — if true, transcript shows an
|
|
26
|
+
audit reference before the first write tool call (UI-audit category).
|
|
27
|
+
- `no_existing_test_removed`: [<names>] — pre-existing test names still
|
|
28
|
+
present in the file.
|
|
29
|
+
- `min_test_count`: int — at least N `test(` /
|
|
30
|
+
`it(` / `describe(` calls.
|
|
31
|
+
"""
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
import re
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from typing import Any
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _read(path: Path) -> str:
|
|
40
|
+
try:
|
|
41
|
+
return path.read_text(errors="replace")
|
|
42
|
+
except OSError:
|
|
43
|
+
return ""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _file_changed(pre: dict[str, str], post: dict[str, str], rel: str) -> bool:
|
|
47
|
+
return pre.get(rel) != post.get(rel)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _has_regex(text: str, pattern: str) -> bool:
|
|
51
|
+
return bool(re.search(pattern, text, re.IGNORECASE | re.MULTILINE))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _count_regex(text: str, pattern: str) -> int:
|
|
55
|
+
return len(re.findall(pattern, text, re.IGNORECASE | re.MULTILINE))
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def score_task(
|
|
59
|
+
task: dict[str, Any],
|
|
60
|
+
*,
|
|
61
|
+
pre_snapshot: dict[str, str],
|
|
62
|
+
post_snapshot: dict[str, str],
|
|
63
|
+
clone_root: Path,
|
|
64
|
+
transcript: str,
|
|
65
|
+
) -> dict[str, Any]:
|
|
66
|
+
"""Score one task. Returns {passed: bool, checks: [{name, ok, reason}]}."""
|
|
67
|
+
crit = task.get("success_criteria") or {}
|
|
68
|
+
checks: list[dict[str, Any]] = []
|
|
69
|
+
|
|
70
|
+
def add(name: str, ok: bool, reason: str = "") -> None:
|
|
71
|
+
checks.append({"name": name, "ok": bool(ok), "reason": reason})
|
|
72
|
+
|
|
73
|
+
# target_file_modified
|
|
74
|
+
target_modified_path = crit.get("target_file_modified")
|
|
75
|
+
if target_modified_path:
|
|
76
|
+
ok = _file_changed(pre_snapshot, post_snapshot, target_modified_path)
|
|
77
|
+
add("target_file_modified", ok, f"file: {target_modified_path}")
|
|
78
|
+
|
|
79
|
+
# regex_in_target — uses target_file_modified path, or the new_test_file_exists path
|
|
80
|
+
regex_target_pattern = crit.get("regex_in_target")
|
|
81
|
+
if regex_target_pattern:
|
|
82
|
+
target_rel = (
|
|
83
|
+
crit.get("target_file_modified")
|
|
84
|
+
or crit.get("new_test_file_exists")
|
|
85
|
+
or ""
|
|
86
|
+
)
|
|
87
|
+
body = _read(clone_root / target_rel) if target_rel else ""
|
|
88
|
+
ok = _has_regex(body, regex_target_pattern)
|
|
89
|
+
add(
|
|
90
|
+
"regex_in_target",
|
|
91
|
+
ok,
|
|
92
|
+
f"pattern={regex_target_pattern!r} in {target_rel!r}",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# regex_in_any
|
|
96
|
+
regex_any_pattern = crit.get("regex_in_any")
|
|
97
|
+
if regex_any_pattern:
|
|
98
|
+
modified_files = [
|
|
99
|
+
rel for rel in post_snapshot if _file_changed(pre_snapshot, post_snapshot, rel)
|
|
100
|
+
]
|
|
101
|
+
ok = any(
|
|
102
|
+
_has_regex(_read(clone_root / rel), regex_any_pattern)
|
|
103
|
+
for rel in modified_files
|
|
104
|
+
)
|
|
105
|
+
add(
|
|
106
|
+
"regex_in_any",
|
|
107
|
+
ok,
|
|
108
|
+
f"pattern={regex_any_pattern!r} across {len(modified_files)} modified files",
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# new_test_file_exists
|
|
112
|
+
new_test = crit.get("new_test_file_exists")
|
|
113
|
+
if new_test:
|
|
114
|
+
ok = (clone_root / new_test).exists() and new_test not in pre_snapshot
|
|
115
|
+
add("new_test_file_exists", ok, f"path={new_test}")
|
|
116
|
+
|
|
117
|
+
# test_assertion_added
|
|
118
|
+
test_target = crit.get("test_assertion_added")
|
|
119
|
+
if test_target:
|
|
120
|
+
body = _read(clone_root / test_target)
|
|
121
|
+
ok = _has_regex(body, r"assert|expect\(|test\(|it\(")
|
|
122
|
+
add("test_assertion_added", ok, f"in {test_target}")
|
|
123
|
+
|
|
124
|
+
# one_of_files_modified
|
|
125
|
+
one_of = crit.get("one_of_files_modified")
|
|
126
|
+
if isinstance(one_of, list) and one_of:
|
|
127
|
+
ok = any(_file_changed(pre_snapshot, post_snapshot, rel) for rel in one_of)
|
|
128
|
+
add("one_of_files_modified", ok, f"any of: {one_of}")
|
|
129
|
+
|
|
130
|
+
# preserves_public_api
|
|
131
|
+
api = crit.get("preserves_public_api")
|
|
132
|
+
if isinstance(api, list) and api and target_modified_path:
|
|
133
|
+
body = _read(clone_root / target_modified_path)
|
|
134
|
+
missing = [name for name in api if name not in body]
|
|
135
|
+
add(
|
|
136
|
+
"preserves_public_api",
|
|
137
|
+
not missing,
|
|
138
|
+
f"missing: {missing}" if missing else "all present",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# transcript_contains_one_of
|
|
142
|
+
transcript_one_of = crit.get("transcript_contains_one_of")
|
|
143
|
+
if isinstance(transcript_one_of, list) and transcript_one_of:
|
|
144
|
+
lt = (transcript or "").lower()
|
|
145
|
+
ok = any(s.lower() in lt for s in transcript_one_of)
|
|
146
|
+
add(
|
|
147
|
+
"transcript_contains_one_of",
|
|
148
|
+
ok,
|
|
149
|
+
f"any of: {transcript_one_of}",
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# no_file_write_before_audit
|
|
153
|
+
audit_first = crit.get("no_file_write_before_audit")
|
|
154
|
+
if audit_first:
|
|
155
|
+
ok = _no_write_before_audit(transcript)
|
|
156
|
+
add(
|
|
157
|
+
"no_file_write_before_audit",
|
|
158
|
+
ok,
|
|
159
|
+
"audit reference precedes any write tool call",
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# no_existing_test_removed
|
|
163
|
+
keep_tests = crit.get("no_existing_test_removed")
|
|
164
|
+
if isinstance(keep_tests, list) and keep_tests and target_modified_path:
|
|
165
|
+
body = _read(clone_root / target_modified_path)
|
|
166
|
+
missing = [name for name in keep_tests if name not in body]
|
|
167
|
+
add(
|
|
168
|
+
"no_existing_test_removed",
|
|
169
|
+
not missing,
|
|
170
|
+
f"missing: {missing}" if missing else "all present",
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# min_test_count
|
|
174
|
+
min_tests = crit.get("min_test_count")
|
|
175
|
+
if isinstance(min_tests, int) and min_tests > 0 and (new_test or test_target):
|
|
176
|
+
path = new_test or test_target
|
|
177
|
+
body = _read(clone_root / path) if path else ""
|
|
178
|
+
count = _count_regex(body, r"\btest\s*\(|\bit\s*\(|\bdescribe\s*\(")
|
|
179
|
+
add(
|
|
180
|
+
"min_test_count",
|
|
181
|
+
count >= min_tests,
|
|
182
|
+
f"found={count}, required={min_tests}",
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
passed = bool(checks) and all(c["ok"] for c in checks)
|
|
186
|
+
return {"passed": passed, "checks": checks}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _no_write_before_audit(transcript: str) -> bool:
|
|
190
|
+
"""Best-effort: scan the transcript for any string suggesting an audit
|
|
191
|
+
reference; require it to appear before any write/edit tool call.
|
|
192
|
+
|
|
193
|
+
Without a structured tool-call log this is a heuristic; the task runner
|
|
194
|
+
emits a structured `events` list (Phase 4 Step 2) that the scorer can
|
|
195
|
+
later consume directly when we want a stricter check.
|
|
196
|
+
"""
|
|
197
|
+
if not transcript:
|
|
198
|
+
# Empty transcript = nothing fired = treat as not-failed-yet (will fail other checks)
|
|
199
|
+
return False
|
|
200
|
+
lt = transcript.lower()
|
|
201
|
+
audit_markers = ["existing-ui-audit", "ui_audit", "audit"]
|
|
202
|
+
write_markers = ["str-replace-editor", "save-file", "edit(", "write("]
|
|
203
|
+
audit_idx = min((lt.find(m) for m in audit_markers if m in lt), default=-1)
|
|
204
|
+
write_idx = min((lt.find(m) for m in write_markers if m in lt), default=-1)
|
|
205
|
+
if audit_idx == -1:
|
|
206
|
+
return False
|
|
207
|
+
if write_idx == -1:
|
|
208
|
+
return True
|
|
209
|
+
return audit_idx < write_idx
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Telegraph condensation bench — step-16 Phase 1 Step 4.
|
|
2
2
|
#
|
|
3
|
-
# Three-arm live bench against internal/bench/corpora/
|
|
4
|
-
#
|
|
3
|
+
# Three-arm live bench against internal/bench/corpora/telegraph/prompts.yaml:
|
|
4
|
+
# condensed — system prompt embeds telegraph-speak rule (aggressive).
|
|
5
5
|
# terse_control — system prompt = "Answer concisely. …" (carve-out-free baseline).
|
|
6
|
-
#
|
|
6
|
+
# uncondensed — generic helpful-assistant system prompt.
|
|
7
7
|
#
|
|
8
8
|
# Token counts come from Anthropic API `usage` (authoritative). Carve-out
|
|
9
9
|
# share is measured via regex extraction on the reply text; chars/4 yields
|
|
10
10
|
# an estimated carve-out-token figure for the carve-out-tax accounting.
|
|
11
11
|
#
|
|
12
12
|
# Cost-touch: 10 prompts × 3 arms × claude-sonnet-4-5 (~$3/M in, ~$15/M out).
|
|
13
|
-
"""
|
|
13
|
+
"""Telegraph condensation bench runner."""
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
16
|
import re
|
|
@@ -24,16 +24,16 @@ import yaml
|
|
|
24
24
|
|
|
25
25
|
# ── system prompts per arm ──────────────────────────────────────────────
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
SYSTEM_PROMPT_CONDENSED = """You are speaking in TELEGRAPH-SPEAK mode (speak_scope=aggressive).
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
Condense all body prose to telegraph grammar:
|
|
30
30
|
- Drop articles (the, a, an).
|
|
31
31
|
- Drop linking auxiliaries (is, are, was, be) where unambiguous.
|
|
32
32
|
- Drop pronouns when context is clear.
|
|
33
33
|
- Keep nouns, verbs, key adjectives, negation, numbers.
|
|
34
34
|
- Example: "I will now check the file and see if it exists" -> "Check file. Exists?"
|
|
35
35
|
|
|
36
|
-
Carve-outs — preserve BYTE-FOR-BYTE (do NOT
|
|
36
|
+
Carve-outs — preserve BYTE-FOR-BYTE (do NOT condense these):
|
|
37
37
|
1. Triple-backtick code/literal blocks (any language, including ALL-CAPS Iron-Law fences).
|
|
38
38
|
2. Numbered-options lines matching ^\\d+\\.\\s + a **Recommendation:** label.
|
|
39
39
|
3. Backtick spans (file paths, command names, identifiers).
|
|
@@ -43,22 +43,22 @@ Carve-outs — preserve BYTE-FOR-BYTE (do NOT compress these):
|
|
|
43
43
|
7. Deliverables (PR titles, commit messages, ticket summaries, articles, the prompt
|
|
44
44
|
line of any single question asked to the user).
|
|
45
45
|
|
|
46
|
-
Apply
|
|
46
|
+
Apply telegraph condensation aggressively to every other prose surface."""
|
|
47
47
|
|
|
48
48
|
SYSTEM_PROMPT_TERSE = (
|
|
49
49
|
"Answer concisely. Skip preamble. Do not restate the question. "
|
|
50
50
|
"Avoid filler phrases ('Let me', 'Here is', 'I will'). Get to the answer."
|
|
51
51
|
)
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
SYSTEM_PROMPT_UNCONDENSED = (
|
|
54
54
|
"You are a helpful AI assistant. Answer the user's question clearly and completely."
|
|
55
55
|
)
|
|
56
56
|
|
|
57
|
-
ARMS: tuple[str, ...] = ("
|
|
57
|
+
ARMS: tuple[str, ...] = ("condensed", "terse_control", "uncondensed")
|
|
58
58
|
ARM_SYSTEM_PROMPT: dict[str, str] = {
|
|
59
|
-
"
|
|
59
|
+
"condensed": SYSTEM_PROMPT_CONDENSED,
|
|
60
60
|
"terse_control": SYSTEM_PROMPT_TERSE,
|
|
61
|
-
"
|
|
61
|
+
"uncondensed": SYSTEM_PROMPT_UNCONDENSED,
|
|
62
62
|
}
|
|
63
63
|
|
|
64
64
|
# ── carve-out detection ────────────────────────────────────────────────
|
|
@@ -113,15 +113,15 @@ class PromptResult:
|
|
|
113
113
|
|
|
114
114
|
@property
|
|
115
115
|
def savings_vs_raw(self) -> float | None:
|
|
116
|
-
c = self.arms.get("
|
|
117
|
-
u = self.arms.get("
|
|
116
|
+
c = self.arms.get("condensed")
|
|
117
|
+
u = self.arms.get("uncondensed")
|
|
118
118
|
if not c or not u or u.output_tokens == 0:
|
|
119
119
|
return None
|
|
120
120
|
return 1.0 - (c.output_tokens / u.output_tokens)
|
|
121
121
|
|
|
122
122
|
@property
|
|
123
123
|
def savings_vs_terse(self) -> float | None:
|
|
124
|
-
c = self.arms.get("
|
|
124
|
+
c = self.arms.get("condensed")
|
|
125
125
|
t = self.arms.get("terse_control")
|
|
126
126
|
if not c or not t or t.output_tokens == 0:
|
|
127
127
|
return None
|
|
@@ -131,7 +131,7 @@ class PromptResult:
|
|
|
131
131
|
# ── corpus + runner ────────────────────────────────────────────────────
|
|
132
132
|
|
|
133
133
|
def load_corpus(corpus_path: Path) -> list[dict[str, Any]]:
|
|
134
|
-
"""Read internal/bench/corpora/
|
|
134
|
+
"""Read internal/bench/corpora/telegraph/prompts.yaml → list of prompt dicts."""
|
|
135
135
|
data = yaml.safe_load(corpus_path.read_text(encoding="utf-8")) or {}
|
|
136
136
|
prompts = data.get("prompts") or []
|
|
137
137
|
if not prompts:
|
|
@@ -191,12 +191,12 @@ def _stats(values: list[float]) -> dict[str, float]:
|
|
|
191
191
|
|
|
192
192
|
|
|
193
193
|
def aggregate_results(results: list[PromptResult]) -> dict[str, Any]:
|
|
194
|
-
"""Compute median/p10/p90 for
|
|
194
|
+
"""Compute median/p10/p90 for condensation metrics across the corpus."""
|
|
195
195
|
vs_raw = [r.savings_vs_raw for r in results if r.savings_vs_raw is not None]
|
|
196
196
|
vs_terse = [r.savings_vs_terse for r in results if r.savings_vs_terse is not None]
|
|
197
197
|
realised_carve_pct = [
|
|
198
|
-
r.arms["
|
|
199
|
-
for r in results if "
|
|
198
|
+
r.arms["condensed"].realised_carve_out_pct
|
|
199
|
+
for r in results if "condensed" in r.arms and r.arms["condensed"].output_chars
|
|
200
200
|
]
|
|
201
201
|
expected_carve_pct = [r.expected_carve_out_pct for r in results]
|
|
202
202
|
|
|
@@ -242,7 +242,7 @@ def compute_cost(results: list[PromptResult], pricing: dict[str, float]) -> dict
|
|
|
242
242
|
|
|
243
243
|
# ── orchestrator ───────────────────────────────────────────────────────────
|
|
244
244
|
|
|
245
|
-
def
|
|
245
|
+
def run_telegraph_bench(
|
|
246
246
|
client: Any,
|
|
247
247
|
corpus_path: Path,
|
|
248
248
|
*,
|
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Telegraph bench report serializer — step-16 Phase 1 Step 5.
|
|
2
2
|
#
|
|
3
|
-
# Emits the
|
|
4
|
-
# ("
|
|
5
|
-
# blocks are disjoint:
|
|
6
|
-
# selection bench has no three-arm
|
|
7
|
-
"""
|
|
3
|
+
# Emits the telegraph-v1 JSON + Markdown shape. Distinct schema_version
|
|
4
|
+
# ("telegraph-v1") from the selection-accuracy bench (v1) because the
|
|
5
|
+
# blocks are disjoint: telegraph has no `selection`/`quality`, and the
|
|
6
|
+
# selection bench has no three-arm condensation metrics.
|
|
7
|
+
"""Telegraph bench report serializer."""
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
|
-
from _lib.
|
|
12
|
+
from _lib.bench_telegraph import ARMS, PromptResult, aggregate_results, compute_cost
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def
|
|
15
|
+
def build_telegraph_report(
|
|
16
16
|
*,
|
|
17
17
|
results: list[PromptResult],
|
|
18
18
|
corpus_path_rel: str,
|
|
@@ -30,10 +30,10 @@ def build_caveman_report(
|
|
|
30
30
|
cost["pricing_sourced_on"] = pricing_sourced_on
|
|
31
31
|
errors = cost["totals"]["errors"]
|
|
32
32
|
return {
|
|
33
|
-
"schema_version": "
|
|
33
|
+
"schema_version": "telegraph-v1",
|
|
34
34
|
"generated_at": generated_at,
|
|
35
35
|
"corpus": {
|
|
36
|
-
"id": "
|
|
36
|
+
"id": "telegraph",
|
|
37
37
|
"path": corpus_path_rel,
|
|
38
38
|
"prompt_count": len(results),
|
|
39
39
|
},
|
|
@@ -42,7 +42,7 @@ def build_caveman_report(
|
|
|
42
42
|
"transport": transport,
|
|
43
43
|
"model": model,
|
|
44
44
|
},
|
|
45
|
-
"
|
|
45
|
+
"telegraph": {
|
|
46
46
|
"arms": list(ARMS),
|
|
47
47
|
"aggregate": aggregate,
|
|
48
48
|
"per_prompt": [_prompt_block(r) for r in results],
|
|
@@ -61,8 +61,8 @@ def _prompt_block(r: PromptResult) -> dict[str, Any]:
|
|
|
61
61
|
"category": r.category,
|
|
62
62
|
"expected_carve_out_pct": r.expected_carve_out_pct,
|
|
63
63
|
"realised_carve_out_pct": (
|
|
64
|
-
r.arms["
|
|
65
|
-
if "
|
|
64
|
+
r.arms["condensed"].realised_carve_out_pct
|
|
65
|
+
if "condensed" in r.arms else None
|
|
66
66
|
),
|
|
67
67
|
"savings_vs_raw": r.savings_vs_raw,
|
|
68
68
|
"savings_vs_terse": r.savings_vs_terse,
|
|
@@ -85,12 +85,12 @@ def _fmt_pct(x: float | None) -> str:
|
|
|
85
85
|
return f"{x:.2%}" if isinstance(x, (int, float)) else "—"
|
|
86
86
|
|
|
87
87
|
|
|
88
|
-
def
|
|
89
|
-
cv = report["
|
|
88
|
+
def render_telegraph_markdown(report: dict[str, Any]) -> str:
|
|
89
|
+
cv = report["telegraph"]
|
|
90
90
|
agg = cv["aggregate"]
|
|
91
91
|
cost = report["cost"]
|
|
92
92
|
head = [
|
|
93
|
-
f"#
|
|
93
|
+
f"# Telegraph Bench Report — `telegraph` · {report['generated_at']}",
|
|
94
94
|
"",
|
|
95
95
|
"## Headline",
|
|
96
96
|
"",
|
|
@@ -102,7 +102,7 @@ def render_caveman_markdown(report: dict[str, Any]) -> str:
|
|
|
102
102
|
f"(p10 {_fmt_pct(agg['savings_vs_raw']['p10'])} · p90 {_fmt_pct(agg['savings_vs_raw']['p90'])})",
|
|
103
103
|
f"- median savings vs terse-control: **{_fmt_pct(agg['savings_vs_terse']['median'])}** "
|
|
104
104
|
f"(p10 {_fmt_pct(agg['savings_vs_terse']['p10'])} · p90 {_fmt_pct(agg['savings_vs_terse']['p90'])})",
|
|
105
|
-
f"- median realised carve-out share (
|
|
105
|
+
f"- median realised carve-out share (condensed arm): **{_fmt_pct(agg['realised_carve_out_pct']['median'])}** "
|
|
106
106
|
f"(expected median {_fmt_pct(agg['expected_carve_out_pct']['median'])})",
|
|
107
107
|
f"- total cost: **${cost['totals']['total_cost_usd']:.6f}** "
|
|
108
108
|
f"(calls {cost['totals']['calls']} · errors {cost['totals']['errors']})",
|
|
@@ -125,14 +125,14 @@ def render_caveman_markdown(report: dict[str, Any]) -> str:
|
|
|
125
125
|
per_prompt = [
|
|
126
126
|
"## Per-prompt results",
|
|
127
127
|
"",
|
|
128
|
-
"| id | category | exp.carve | real.carve | out.
|
|
128
|
+
"| id | category | exp.carve | real.carve | out.condensed | out.terse | out.uncondensed | vs raw | vs terse |",
|
|
129
129
|
"|---|---|---:|---:|---:|---:|---:|---:|---:|",
|
|
130
130
|
]
|
|
131
131
|
for r in cv["per_prompt"]:
|
|
132
132
|
arms = r["arms"]
|
|
133
|
-
oc = arms.get("
|
|
133
|
+
oc = arms.get("condensed", {}).get("output_tokens", "—")
|
|
134
134
|
ot = arms.get("terse_control", {}).get("output_tokens", "—")
|
|
135
|
-
ou = arms.get("
|
|
135
|
+
ou = arms.get("uncondensed", {}).get("output_tokens", "—")
|
|
136
136
|
per_prompt.append(
|
|
137
137
|
f"| `{r['id']}` | {r['category']} | "
|
|
138
138
|
f"{_fmt_pct(r['expected_carve_out_pct'])} | {_fmt_pct(r['realised_carve_out_pct'])} | "
|
|
@@ -145,7 +145,7 @@ def render_caveman_markdown(report: dict[str, Any]) -> str:
|
|
|
145
145
|
"",
|
|
146
146
|
f"- corpus: `{report['corpus']['path']}`",
|
|
147
147
|
f"- pricing: `internal/bench/pricing.yaml` (sourced {cost.get('pricing_sourced_on') or '—'})",
|
|
148
|
-
f"- schema: `
|
|
148
|
+
f"- schema: `telegraph-v1` (see `docs/contracts/benchmark-report-schema.md`)",
|
|
149
149
|
f"- bench_run version: `{report['runner']['bench_run_version']}`",
|
|
150
150
|
"",
|
|
151
151
|
]
|
|
@@ -17,7 +17,7 @@ Contract:
|
|
|
17
17
|
- Command bundles wrap a single ``.agent-src/commands/<path>.md`` file
|
|
18
18
|
as ``SKILL.md`` inside the ZIP. Nested commands flatten to
|
|
19
19
|
``<cluster>-<leaf>`` slugs (e.g. ``council/default.md`` →
|
|
20
|
-
``council-default.zip``) to mirror ``
|
|
20
|
+
``council-default.zip``) to mirror ``condense.py``.
|
|
21
21
|
- Exclusions: ``.git*``, ``__pycache__``, ``*.pyc`` — matched on the
|
|
22
22
|
basename of any path component.
|
|
23
23
|
- A skill folder without a ``SKILL.md`` is skipped (defensive: avoids
|
|
@@ -26,7 +26,7 @@ Contract:
|
|
|
26
26
|
- Command files named ``AGENTS.md`` are skipped (cluster authoring docs,
|
|
27
27
|
not invocable commands).
|
|
28
28
|
- A command slug that collides with an existing skill name is skipped —
|
|
29
|
-
the real skill bundle wins, matching ``
|
|
29
|
+
the real skill bundle wins, matching ``condense.generate_claude_commands``.
|
|
30
30
|
- Writes are atomic via tempfile → ``os.replace``.
|
|
31
31
|
- Idempotent: each ZIP gets a sibling ``<slug>.sha256`` recording
|
|
32
32
|
the manifest digest. If the recomputed digest matches the recorded
|
|
@@ -165,7 +165,7 @@ def build_skill_bundles(
|
|
|
165
165
|
def _command_slug(source_file: Path, commands_root: Path) -> str:
|
|
166
166
|
"""Return the flat slug for a command source file.
|
|
167
167
|
|
|
168
|
-
Mirrors ``scripts/
|
|
168
|
+
Mirrors ``scripts/condense.py::_command_slug``: top-level commands
|
|
169
169
|
keep their stem (``commit.md`` → ``commit``); nested commands flatten
|
|
170
170
|
the relative path with ``-`` (``council/default.md`` →
|
|
171
171
|
``council-default``).
|
|
@@ -178,7 +178,7 @@ def _iter_command_files(commands_root: Path) -> Iterable[Path]:
|
|
|
178
178
|
"""Yield every command ``.md`` file under ``commands_root`` (recursive).
|
|
179
179
|
|
|
180
180
|
Skips ``AGENTS.md`` cluster authoring docs, matching
|
|
181
|
-
``scripts/
|
|
181
|
+
``scripts/condense.py::_iter_commands``.
|
|
182
182
|
"""
|
|
183
183
|
for source_file in sorted(commands_root.rglob("*.md")):
|
|
184
184
|
if source_file.name == "AGENTS.md":
|
|
@@ -196,7 +196,7 @@ def build_command_bundles(
|
|
|
196
196
|
|
|
197
197
|
Each ZIP contains a single ``SKILL.md`` whose bytes are the source
|
|
198
198
|
command ``.md`` file — same wrapping pattern that
|
|
199
|
-
``
|
|
199
|
+
``condense.generate_claude_commands`` uses for Claude Code via
|
|
200
200
|
``.claude/skills/<slug>/SKILL.md`` symlinks.
|
|
201
201
|
|
|
202
202
|
Slugs that collide with an existing skill folder under
|