@event4u/agent-config 6.1.0 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +35 -3
- package/AGENTS.md +8 -7
- package/CHANGELOG.md +408 -0
- package/CONTRIBUTING.md +1 -1
- package/README.md +17 -15
- package/dist/agent-src/commands/agent-status.md +2 -2
- package/dist/agent-src/commands/agents/audit.md +3 -3
- package/dist/agent-src/commands/agents/init.md +1 -1
- package/dist/agent-src/commands/agents/optimize.md +4 -4
- package/dist/agent-src/commands/analyze/decision.md +108 -0
- package/dist/agent-src/commands/analyze/incident.md +120 -0
- package/dist/agent-src/commands/analyze/near-miss.md +113 -0
- package/dist/agent-src/commands/analyze/postmortem.md +130 -0
- package/dist/agent-src/commands/analyze/premortem.md +104 -0
- package/dist/agent-src/commands/analyze.md +124 -0
- package/dist/agent-src/commands/brand/identity.md +27 -0
- package/dist/agent-src/commands/brand/review.md +27 -0
- package/dist/agent-src/commands/brand/strategy.md +27 -0
- package/dist/agent-src/commands/brand/tokens.md +28 -0
- package/dist/agent-src/commands/brand/voice.md +27 -0
- package/dist/agent-src/commands/brand.md +58 -0
- package/dist/agent-src/commands/check-current-md.md +3 -3
- package/dist/agent-src/commands/condense.md +2 -2
- package/dist/agent-src/commands/council/debate.md +2 -2
- package/dist/agent-src/commands/council/default.md +45 -18
- package/dist/agent-src/commands/fix/portability.md +3 -3
- package/dist/agent-src/commands/fix/refs.md +3 -3
- package/dist/agent-src/commands/implement-ticket.md +36 -6
- package/dist/agent-src/commands/knowledge/cross-repo.md +1 -1
- package/dist/agent-src/commands/memory/add.md +1 -1
- package/dist/agent-src/commands/mission/upgrade.md +182 -0
- package/dist/agent-src/commands/optimize/skills.md +2 -2
- package/dist/agent-src/commands/orchestrate.md +1 -1
- package/dist/agent-src/commands/pr/create.md +6 -4
- package/dist/agent-src/commands/review-changes.md +8 -0
- package/dist/agent-src/commands/roadmap/materialize.md +73 -0
- package/dist/agent-src/commands/skill/preview.md +1 -1
- package/dist/agent-src/commands/skills/discover.md +1 -1
- package/dist/agent-src/commands/threat-model.md +4 -4
- package/dist/agent-src/commands/upstream-contribute.md +3 -3
- package/dist/agent-src/commands/video/from-script.md +2 -2
- package/dist/agent-src/commands/video/from-song.md +3 -3
- package/dist/agent-src/commands/video/scene.md +1 -1
- package/dist/agent-src/commands/video/storyboard.md +1 -1
- package/dist/agent-src/commands/video.md +3 -3
- package/dist/agent-src/contexts/communication/rules-auto/source-of-truth-mechanics.md +3 -3
- package/dist/agent-src/contexts/communication/rules-auto/user-interaction-mechanics.md +1 -1
- package/dist/agent-src/contexts/execution/evidence-discipline.md +153 -0
- package/dist/agent-src/contexts/execution/project-intelligence.md +264 -0
- package/dist/agent-src/contexts/execution/roadmap-process-loop.md +2 -1
- package/dist/agent-src/personas/ai-video-technical-director.md +1 -1
- package/dist/agent-src/personas/brand-strategist.md +74 -0
- package/dist/agent-src/personas/design-director.md +74 -0
- package/dist/agent-src/rules/brand-consistency.md +77 -0
- package/dist/agent-src/rules/brand-source-of-truth.md +57 -0
- package/dist/agent-src/rules/direct-answers.md +2 -0
- package/dist/agent-src/rules/domain-safety-disclaimer.md +2 -0
- package/dist/agent-src/rules/git-history-discipline.md +1 -0
- package/dist/agent-src/rules/icon-consistency.md +53 -0
- package/dist/agent-src/rules/image-likeness-and-rights.md +67 -0
- package/dist/agent-src/rules/lethal-trifecta-guard.md +1 -1
- package/dist/agent-src/rules/persona-governance.md +2 -2
- package/dist/agent-src/rules/provider-lifecycle-discipline.md +3 -1
- package/dist/agent-src/rules/roadmap-progress-sync.md +10 -0
- package/dist/agent-src/rules/security-sensitive-stop.md +9 -3
- package/dist/agent-src/rules/size-enforcement.md +1 -1
- package/dist/agent-src/rules/source-confidentiality.md +3 -3
- package/dist/agent-src/rules/source-discovery-gate.md +98 -0
- package/dist/agent-src/rules/think-before-action.md +1 -0
- package/dist/agent-src/rules/ui-audit-gate.md +2 -0
- package/dist/agent-src/rules/untrusted-input-defense.md +1 -1
- package/dist/agent-src/rules/user-interaction.md +1 -1
- package/dist/agent-src/scripts/archive_completed_roadmaps.ts +392 -0
- package/dist/agent-src/scripts/update_roadmap_progress.ts +824 -0
- package/dist/agent-src/skills/adr-create/SKILL.md +5 -5
- package/dist/agent-src/skills/agent-security-review/evals/triggers.json +1 -0
- package/dist/agent-src/skills/agents-md-thin-root/SKILL.md +1 -1
- package/dist/agent-src/skills/ai-council/SKILL.md +1 -1
- package/dist/agent-src/skills/analysis-autonomous-mode/SKILL.md +9 -13
- package/dist/agent-src/skills/blade-ui/SKILL.md +12 -5
- package/dist/agent-src/skills/blameless-post-mortem/SKILL.md +199 -0
- package/dist/agent-src/skills/brand/ATTRIBUTION.md +38 -0
- package/dist/agent-src/skills/brand/SKILL.md +115 -0
- package/dist/agent-src/skills/brand/data/archetypes.csv +13 -0
- package/dist/agent-src/skills/brand/data/color-psychology.csv +14 -0
- package/dist/agent-src/skills/brand/data/logo-style-fit.csv +13 -0
- package/dist/agent-src/skills/brand/data/manifest.json +226 -0
- package/dist/agent-src/skills/brand/data/messaging-frameworks.csv +13 -0
- package/dist/agent-src/skills/brand/data/naming-patterns.csv +13 -0
- package/dist/agent-src/skills/brand/data/typography-principles.csv +13 -0
- package/dist/agent-src/skills/brand/data/voice-tone.csv +13 -0
- package/dist/agent-src/skills/brand/evals/triggers.json +17 -0
- package/dist/agent-src/skills/brand-asset-generation/SKILL.md +89 -0
- package/dist/agent-src/skills/brand-asset-generation/evals/triggers.json +17 -0
- package/dist/agent-src/skills/brand-audit/SKILL.md +67 -0
- package/dist/agent-src/skills/brand-audit/evals/triggers.json +17 -0
- package/dist/agent-src/skills/brand-identity/SKILL.md +101 -0
- package/dist/agent-src/skills/brand-identity/evals/triggers.json +17 -0
- package/dist/agent-src/skills/brand-strategy/SKILL.md +83 -0
- package/dist/agent-src/skills/brand-strategy/evals/triggers.json +17 -0
- package/dist/agent-src/skills/brand-to-tokens/SKILL.md +102 -0
- package/dist/agent-src/skills/brand-to-tokens/evals/triggers.json +17 -0
- package/dist/agent-src/skills/brand-to-tokens/templates/marp-brand-deck.md.example +46 -0
- package/dist/agent-src/skills/brand-to-tokens/templates/reveal-brand-deck.yaml +32 -0
- package/dist/agent-src/skills/canvas-design/evals/triggers.json +1 -0
- package/dist/agent-src/skills/check-refs/SKILL.md +5 -5
- package/dist/agent-src/skills/code-review/SKILL.md +6 -15
- package/dist/agent-src/skills/command-writing/SKILL.md +2 -2
- package/dist/agent-src/skills/complexity-first-planning/evals/triggers.json +1 -0
- package/dist/agent-src/skills/context-authoring/SKILL.md +2 -2
- package/dist/agent-src/skills/context-document/SKILL.md +35 -2
- package/dist/agent-src/skills/corpus-grounding/evals/triggers.json +1 -0
- package/dist/agent-src/skills/corpus-grounding/scripts/bm25_search.ts +482 -0
- package/dist/agent-src/skills/corpus-grounding/scripts/decision_engine.ts +803 -0
- package/dist/agent-src/skills/corpus-grounding/scripts/ground.ts +541 -0
- package/dist/agent-src/skills/corpus-grounding/scripts/schema_validator.ts +309 -0
- package/dist/agent-src/skills/database/SKILL.md +26 -4
- package/dist/agent-src/skills/decision-record/SKILL.md +1 -1
- package/dist/agent-src/skills/decision-record/evals/triggers.json +17 -0
- package/dist/agent-src/skills/decision-review/SKILL.md +179 -0
- package/dist/agent-src/skills/description-assist/SKILL.md +1 -1
- package/dist/agent-src/skills/design-intelligence/SKILL.md +1 -1
- package/dist/agent-src/skills/design-intelligence/data/manifest.json +23 -6
- package/dist/agent-src/skills/design-intelligence/evals/triggers.json +1 -0
- package/dist/agent-src/skills/design-tokens/evals/triggers.json +1 -0
- package/dist/agent-src/skills/design-tokens/scripts/tokens.ts +888 -0
- package/dist/agent-src/skills/doc-coauthoring/evals/triggers.json +1 -0
- package/dist/agent-src/skills/eloquent/evals/triggers.json +1 -0
- package/dist/agent-src/skills/emit-tickets/SKILL.md +198 -0
- package/dist/agent-src/skills/estimate-ticket/evals/triggers.json +1 -0
- package/dist/agent-src/skills/git-workflow/SKILL.md +33 -0
- package/dist/agent-src/skills/guideline-writing/SKILL.md +2 -2
- package/dist/agent-src/skills/iconography/SKILL.md +88 -0
- package/dist/agent-src/skills/iconography/evals/triggers.json +17 -0
- package/dist/agent-src/skills/image-analyser/evals/triggers.json +1 -0
- package/dist/agent-src/skills/image-creator/evals/triggers.json +1 -0
- package/dist/agent-src/skills/image-editing/SKILL.md +100 -0
- package/dist/agent-src/skills/image-editing/evals/triggers.json +17 -0
- package/dist/agent-src/skills/image-generation/SKILL.md +95 -0
- package/dist/agent-src/skills/image-generation/evals/triggers.json +17 -0
- package/dist/agent-src/skills/image-provider-routing/SKILL.md +96 -0
- package/dist/agent-src/skills/image-provider-routing/evals/triggers.json +17 -0
- package/dist/agent-src/skills/launch-readiness/SKILL.md +21 -0
- package/dist/agent-src/skills/learning-to-rule-or-skill/SKILL.md +12 -8
- package/dist/agent-src/skills/lint-skills/SKILL.md +5 -5
- package/dist/agent-src/skills/logo-generation/SKILL.md +98 -0
- package/dist/agent-src/skills/logo-generation/evals/triggers.json +17 -0
- package/dist/agent-src/skills/markitdown/SKILL.md +1 -1
- package/dist/agent-src/skills/md-language-check/SKILL.md +1 -1
- package/dist/agent-src/skills/motion-choreographer/SKILL.md +1 -1
- package/dist/agent-src/skills/php-coder/evals/triggers.json +1 -0
- package/dist/agent-src/skills/prediction-pool-optimizer/evals/triggers.json +1 -0
- package/dist/agent-src/skills/premortem/SKILL.md +137 -0
- package/dist/agent-src/skills/prompt-engineering-image/SKILL.md +115 -0
- package/dist/agent-src/skills/prompt-engineering-image/evals/triggers.json +17 -0
- package/dist/agent-src/skills/prompt-validator/evals/triggers.json +1 -0
- package/dist/agent-src/skills/react-shadcn-ui/SKILL.md +12 -5
- package/dist/agent-src/skills/react-shadcn-ui/scripts/shadcn_add.ts +388 -0
- package/dist/agent-src/skills/reasoning-orchestrator/SKILL.md +1 -1
- package/dist/agent-src/skills/reasoning-orchestrator/evals/triggers.json +1 -0
- package/dist/agent-src/skills/refine-ticket/evals/triggers.json +1 -0
- package/dist/agent-src/skills/roadmap-management/SKILL.md +16 -3
- package/dist/agent-src/skills/roadmap-writing/SKILL.md +76 -0
- package/dist/agent-src/skills/root-cause-frameworks/SKILL.md +146 -0
- package/dist/agent-src/skills/rule-refactor/SKILL.md +9 -9
- package/dist/agent-src/skills/rule-writing/SKILL.md +7 -7
- package/dist/agent-src/skills/script-writing/SKILL.md +2 -2
- package/dist/agent-src/skills/security-audit/SKILL.md +5 -0
- package/dist/agent-src/skills/skill-improvement-pipeline/SKILL.md +19 -3
- package/dist/agent-src/skills/skill-management/SKILL.md +3 -3
- package/dist/agent-src/skills/skill-reviewer/SKILL.md +1 -1
- package/dist/agent-src/skills/skill-writing/SKILL.md +5 -5
- package/dist/agent-src/skills/skill-writing/evals/triggers.json +1 -0
- package/dist/agent-src/skills/source-discovery/SKILL.md +182 -0
- package/dist/agent-src/skills/standards-from-config/SKILL.md +93 -0
- package/dist/agent-src/skills/systematic-debugging/SKILL.md +7 -0
- package/dist/agent-src/skills/tailwind-engineer/scripts/tailwind_config_gen.ts +561 -0
- package/dist/agent-src/skills/threat-modeling/SKILL.md +1 -0
- package/dist/agent-src/skills/typography-system/SKILL.md +138 -0
- package/dist/agent-src/skills/typography-system/evals/triggers.json +17 -0
- package/dist/agent-src/skills/upstream-contribute/SKILL.md +3 -3
- package/dist/agent-src/skills/verify-repair-loop/SKILL.md +209 -0
- package/dist/agent-src/skills/verify-repair-loop/evals/output-schema.yml +20 -0
- package/dist/agent-src/skills/verify-repair-loop/evals/triggers.json +17 -0
- package/dist/agent-src/templates/agent-settings.md +7 -0
- package/dist/agent-src/templates/contexts/knowledge-card.md +69 -0
- package/dist/agent-src/templates/contexts/lesson-card.md +73 -0
- package/dist/agent-src/templates/roadmaps.md +29 -1
- package/dist/agent-src/templates/scripts/README.md +6 -6
- package/dist/agent-src/templates/scripts/check_memory.ts +640 -0
- package/dist/agent-src/templates/scripts/check_memory_proposal.ts +351 -0
- package/dist/agent-src/templates/scripts/implement_ticket/__main__.ts +27 -0
- package/dist/agent-src/templates/scripts/memory_hash.ts +333 -0
- package/dist/agent-src/templates/scripts/memory_lookup.ts +1067 -0
- package/dist/agent-src/templates/scripts/memory_report.ts +846 -0
- package/dist/agent-src/templates/scripts/memory_signal.ts +422 -0
- package/dist/agent-src/templates/scripts/memory_status.ts +191 -0
- package/dist/agent-src/templates/scripts/pr_review_routing.ts +523 -0
- package/dist/agent-src/templates/scripts/pr_risk_review.ts +0 -0
- package/dist/agent-src/templates/scripts/telemetry/aggregator.ts +0 -0
- package/dist/agent-src/templates/scripts/telemetry/boundary.ts +164 -0
- package/dist/agent-src/templates/scripts/telemetry/engagement.ts +479 -0
- package/dist/agent-src/templates/scripts/telemetry/report_renderer.ts +394 -0
- package/dist/agent-src/templates/scripts/telemetry/settings.ts +210 -0
- package/dist/agent-src/templates/scripts/telemetry_record.ts +255 -0
- package/dist/agent-src/templates/scripts/telemetry_report.ts +189 -0
- package/dist/agent-src/templates/scripts/telemetry_status.ts +312 -0
- package/dist/agent-src/templates/scripts/tier_usage_report.ts +597 -0
- package/dist/agent-src/templates/scripts/work_engine/__main__.ts +14 -0
- package/dist/agent-src/templates/scripts/work_engine/_lib/agent_settings.ts +1118 -0
- package/dist/agent-src/templates/scripts/work_engine/_lib/user_global_paths.ts +329 -0
- package/dist/agent-src/templates/scripts/work_engine/cli.ts +206 -0
- package/dist/agent-src/templates/scripts/work_engine/cli_args.ts +249 -0
- package/dist/agent-src/templates/scripts/work_engine/delivery_state.ts +225 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/analyze.ts +125 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/implement.ts +189 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/index.ts +94 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/memory.ts +193 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/plan.ts +267 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/refine.ts +518 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/report.ts +379 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/test.ts +268 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/verify.ts +258 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/index.ts +32 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/mixed/contract.ts +243 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/mixed/index.ts +108 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/mixed/stitch.ts +259 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/mixed/ui.ts +216 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/_passthrough.ts +40 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/app_spec.ts +241 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/apply.ts +216 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/audit.ts +506 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/design.ts +325 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/index.ts +102 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/polish.ts +462 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/review.ts +474 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/scaffold.ts +352 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/_skipped.ts +33 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/apply.ts +213 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/index.ts +111 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/refine.ts +126 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/report.ts +112 -0
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/test.ts +164 -0
- package/dist/agent-src/templates/scripts/work_engine/dispatcher.ts +515 -0
- package/dist/agent-src/templates/scripts/work_engine/emitters.ts +119 -0
- package/dist/agent-src/templates/scripts/work_engine/errors.ts +24 -0
- package/dist/agent-src/templates/scripts/work_engine/hook_bootstrap.ts +104 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/_chat_history_base.ts +176 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_append.ts +41 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_halt_append.ts +89 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/decision_gate.ts +193 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/decision_trace.ts +304 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/directive_set_guard.ts +110 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/halt_surface_audit.ts +118 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/index.ts +17 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.ts +161 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/state_shape_validation.ts +45 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/trace.ts +134 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/context.ts +94 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/events.ts +58 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/exceptions.ts +85 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/index.ts +27 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/registry.ts +66 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/runner.ts +90 -0
- package/dist/agent-src/templates/scripts/work_engine/hooks/settings.ts +260 -0
- package/dist/agent-src/templates/scripts/work_engine/input_builders.ts +260 -0
- package/dist/agent-src/templates/scripts/work_engine/intent/classify.ts +466 -0
- package/dist/agent-src/templates/scripts/work_engine/migration/v0_to_v1.ts +531 -0
- package/dist/agent-src/templates/scripts/work_engine/orchestration.ts +366 -0
- package/dist/agent-src/templates/scripts/work_engine/persona_policy.ts +97 -0
- package/dist/agent-src/templates/scripts/work_engine/resolvers/diff.ts +135 -0
- package/dist/agent-src/templates/scripts/work_engine/resolvers/file.ts +175 -0
- package/dist/agent-src/templates/scripts/work_engine/resolvers/prompt.ts +115 -0
- package/dist/agent-src/templates/scripts/work_engine/scoring/confidence.ts +415 -0
- package/dist/agent-src/templates/scripts/work_engine/scoring/decision_engine.ts +466 -0
- package/dist/agent-src/templates/scripts/work_engine/scoring/decision_trace.ts +298 -0
- package/dist/agent-src/templates/scripts/work_engine/scoring/memory_visibility.ts +444 -0
- package/dist/agent-src/templates/scripts/work_engine/stack/detect.ts +252 -0
- package/dist/agent-src/templates/scripts/work_engine/stack/runner.ts +745 -0
- package/dist/agent-src/templates/scripts/work_engine/state.ts +1151 -0
- package/dist/agent-src/templates/scripts/work_engine/state_io.ts +413 -0
- package/dist/agent-src/templates/tickets.md +120 -0
- package/dist/cli/commands/commands.js +2 -2
- package/dist/cli/commands/commands.js.map +1 -1
- package/dist/cli/commands/doctorShell.js +4 -22
- package/dist/cli/commands/doctorShell.js.map +1 -1
- package/dist/cli/commands/packs.js +1 -1
- package/dist/cli/commands/packs.js.map +1 -1
- package/dist/cli/commands/recordTriggerEval.js +179 -0
- package/dist/cli/commands/recordTriggerEval.js.map +1 -0
- package/dist/cli/commands/recordTriggerEval.test.js +113 -0
- package/dist/cli/commands/recordTriggerEval.test.js.map +1 -0
- package/dist/cli/commands/workspaces.js +1 -1
- package/dist/cli/commands/workspaces.js.map +1 -1
- package/dist/cli/main.js +22 -1
- package/dist/cli/main.js.map +1 -1
- package/dist/cli/python/knowledge_ingest.js +1048 -0
- package/dist/cli/python/knowledge_ingest.js.map +1 -0
- package/dist/cli/python/workspace_analytics.js +1085 -0
- package/dist/cli/python/workspace_analytics.js.map +1 -0
- package/dist/cli/python/workspace_crypto.js +544 -0
- package/dist/cli/python/workspace_crypto.js.map +1 -0
- package/dist/cli/python/workspace_documents.js +1216 -0
- package/dist/cli/python/workspace_documents.js.map +1 -0
- package/dist/cli/python/workspace_drive.js +574 -0
- package/dist/cli/python/workspace_drive.js.map +1 -0
- package/dist/cli/python/workspace_drive_health.js +628 -0
- package/dist/cli/python/workspace_drive_health.js.map +1 -0
- package/dist/cli/python/workspace_explain.js +765 -0
- package/dist/cli/python/workspace_explain.js.map +1 -0
- package/dist/cli/python/workspace_hosts.js +349 -0
- package/dist/cli/python/workspace_hosts.js.map +1 -0
- package/dist/cli/python/workspace_inbox.js +692 -0
- package/dist/cli/python/workspace_inbox.js.map +1 -0
- package/dist/cli/python/workspace_render.js +816 -0
- package/dist/cli/python/workspace_render.js.map +1 -0
- package/dist/cli/python/workspace_roles.js +487 -0
- package/dist/cli/python/workspace_roles.js.map +1 -0
- package/dist/cli/python/workspace_secrets.js +180 -0
- package/dist/cli/python/workspace_secrets.js.map +1 -0
- package/dist/cli/python/workspace_sessions.js +1079 -0
- package/dist/cli/python/workspace_sessions.js.map +1 -0
- package/dist/cli/python/workspace_skills.js +417 -0
- package/dist/cli/python/workspace_skills.js.map +1 -0
- package/dist/cli/registry.js +2 -0
- package/dist/cli/registry.js.map +1 -1
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +1174 -123
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +9 -6
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +163 -15
- package/dist/discovery/trust-report.md +4 -4
- package/dist/discovery/workspaces.json +73 -12
- package/dist/install/install.mjs +13934 -0
- package/dist/mcp/registry-manifest.json +4 -4
- package/dist/router.json +1 -1
- package/dist/server/routes/wizard.js +50 -21
- package/dist/server/routes/wizard.js.map +1 -1
- package/dist/server/routes/workspace.js +44 -25
- package/dist/server/routes/workspace.js.map +1 -1
- package/dist/server/schemas/settings.js +15 -0
- package/dist/server/schemas/settings.js.map +1 -1
- package/docs/SKILL_CENSUS.md +344 -0
- package/docs/architecture/augment-projection.md +1 -1
- package/docs/architecture/multi-tool-projection.md +3 -3
- package/docs/architecture.md +37 -6
- package/docs/benchmark.md +24 -27
- package/docs/capability-matrix.md +32 -0
- package/docs/catalog.md +50 -9
- package/docs/command-naming-audit.md +60 -0
- package/docs/contracts/STABILITY.md +32 -0
- package/docs/contracts/agents-md-tech-stack.md +1 -1
- package/docs/contracts/ai-council-config.md +22 -22
- package/docs/contracts/analysis-memory-loop.md +149 -0
- package/docs/contracts/benchmark-ab-contract.md +3 -3
- package/docs/contracts/branch-protection-policy.md +27 -0
- package/docs/contracts/brand-token-consumption.md +69 -0
- package/docs/contracts/command-clusters.md +2 -1
- package/docs/contracts/command-surface-tiers.md +13 -0
- package/docs/contracts/discovery-manifest.schema.json +24 -5
- package/docs/contracts/implement-ticket-flow.md +9 -9
- package/docs/contracts/install-layout.md +249 -0
- package/docs/contracts/kernel-membership.md +1 -1
- package/docs/contracts/linear-ai-rules-inclusion.md +2 -2
- package/docs/contracts/linter-structural-model.md +1 -1
- package/docs/contracts/mcp-discovery-phase-notice.md +1 -1
- package/docs/contracts/multi-tool-projection-fidelity.md +1 -1
- package/docs/contracts/namespace.md +2 -2
- package/docs/contracts/no-runtime-boundary.md +56 -0
- package/docs/contracts/package-self-orientation.md +24 -0
- package/docs/contracts/provider-lifecycle.md +3 -3
- package/docs/contracts/reasoning-discipline-protocol.md +83 -0
- package/docs/contracts/rule-classification.md +3 -3
- package/docs/contracts/skill-domains.md +1 -1
- package/docs/contracts/smoke-contracts.md +1 -1
- package/docs/contracts/surface-tiers.md +81 -0
- package/docs/contracts/ticket-bundle-format.md +228 -0
- package/docs/cookbook.md +152 -0
- package/docs/customization.md +12 -1
- package/docs/decisions/ADR-013-discovery-frontmatter-contract.md +16 -0
- package/docs/decisions/ADR-056-unvalidated-video-adapters-disposition.md +1 -1
- package/docs/decisions/ADR-059-render-resume-filesystem-as-state.md +1 -1
- package/docs/decisions/ADR-060-comfyui-sandbox-model.md +1 -1
- package/docs/decisions/ADR-061-corpus-grounding-layer.md +48 -1
- package/docs/decisions/ADR-096-analysis-workbench.md +110 -0
- package/docs/decisions/ADR-097-mission-recipe-privilege-boundary.md +121 -0
- package/docs/decisions/ADR-098-evidence-first-structure-discovery.md +154 -0
- package/docs/decisions/ADR-099-file-first-pattern-library.md +87 -0
- package/docs/decisions/ADR-100-global-knowledge-card-sharing.md +133 -0
- package/docs/decisions/ADR-101-ticket-bundle-emission.md +109 -0
- package/docs/decisions/ADR-102-ticket-handoff-paste-and-mcp.md +72 -0
- package/docs/decisions/ADR-103-global-knowledge-default-off-until-measured.md +92 -0
- package/docs/decisions/ADR-200-python-to-typescript-migration.md +193 -0
- package/docs/decisions/INDEX.md +9 -0
- package/docs/distribution/mcp-submission-checklist.md +3 -3
- package/docs/featured-commands.md +1 -1
- package/docs/featured-skills.md +1 -1
- package/docs/getting-started-by-role.md +2 -0
- package/docs/getting-started.md +2 -2
- package/docs/guidelines/agent-infra/failure-signatures.md +35 -0
- package/docs/guidelines/agent-infra/frontier-reasoning-operating-profile.md +5 -0
- package/docs/guidelines/agent-infra/size-and-scope.md +17 -0
- package/docs/guidelines/agent-infra/skill-quality-checklist.md +2 -2
- package/docs/guides/frontend-design-corpus-refresh.md +83 -0
- package/docs/guides/skill-preview.md +1 -1
- package/docs/hook-payload-capture.md +4 -4
- package/docs/mcp.md +1 -1
- package/docs/migration/consumer-template-consumption-model.md +145 -0
- package/docs/migration/divergences/README.md +55 -0
- package/docs/migration/divergences/_template.md +50 -0
- package/docs/migration/divergences/bench-stats-float-precision.md +72 -0
- package/docs/migration/divergences/mcp-telemetry-node-sqlite.md +61 -0
- package/docs/migration/divergences/pack-mcp-content-gzip-body.md +53 -0
- package/docs/migration/divergences/src-scripts-build_cloud_bundle.md +63 -0
- package/docs/migration/divergences/src-scripts-check_memory.md +91 -0
- package/docs/migration/divergences/src-scripts-inventory_abstraction_budget.md +65 -0
- package/docs/migration/divergences/src-scripts-lint_marketplace.md +57 -0
- package/docs/migration/divergences/src-scripts-lint_mcp_registry_manifest.md +70 -0
- package/docs/migration/divergences/src-scripts-spotcheck_thin_root.md +60 -0
- package/docs/migration/divergences/src-scripts-validate_agent_settings.md +58 -0
- package/docs/migration/node-floor.md +86 -0
- package/docs/migration/yaml-roundtrip-spike.md +163 -0
- package/docs/personas.md +6 -1
- package/docs/role-experiences.md +19 -0
- package/docs/setup/per-ide/windsurf.md +1 -1
- package/docs/skills-catalog.md +24 -3
- package/docs/threat-model.md +28 -0
- package/llms.txt +23 -2
- package/package.json +10 -15
- package/src/config/agent-settings.template.yml +64 -1
- package/src/config/discovery/packs.yml +31 -0
- package/src/config/discovery/unassigned-artefacts.yml +6 -0
- package/src/config/discovery/workspaces.yml +2 -2
- package/src/config/gitignore-block.txt +7 -0
- package/src/scripts/_cli/cmd_doctor.ts +2306 -0
- package/src/scripts/_cli/cmd_explain.ts +748 -0
- package/src/scripts/_cli/cmd_export.ts +375 -0
- package/src/scripts/_cli/cmd_migrate.ts +951 -0
- package/src/scripts/_cli/cmd_prune.ts +610 -0
- package/src/scripts/_cli/cmd_refresh.ts +530 -0
- package/src/scripts/_cli/cmd_settings_check.ts +407 -0
- package/src/scripts/_cli/cmd_settings_migrate.ts +344 -0
- package/src/scripts/_cli/cmd_sync.ts +381 -0
- package/src/scripts/_cli/cmd_uninstall.ts +833 -0
- package/src/scripts/_cli/cmd_update.ts +585 -0
- package/src/scripts/_cli/cmd_upgrade.ts +390 -0
- package/src/scripts/_cli/cmd_validate.ts +394 -0
- package/src/scripts/_cli/cmd_versions.ts +492 -0
- package/src/scripts/_cli/explain_last/assumptions.ts +114 -0
- package/src/scripts/_cli/explain_last/council.ts +197 -0
- package/src/scripts/_cli/explain_last/halt.ts +73 -0
- package/src/scripts/_cli/explain_last/index.ts +155 -0
- package/src/scripts/_cli/explain_last/inputs.ts +211 -0
- package/src/scripts/_cli/explain_last/memory.ts +231 -0
- package/src/scripts/_cli/explain_last/provider.ts +82 -0
- package/src/scripts/_cli/explain_last/render.ts +54 -0
- package/src/scripts/_cli/explain_last/route.ts +70 -0
- package/src/scripts/_cli/explain_last/scrubber.ts +138 -0
- package/src/scripts/_cli/explain_last/sections/assumptions.ts +51 -0
- package/src/scripts/_cli/explain_last/sections/council.ts +56 -0
- package/src/scripts/_cli/explain_last/sections/halt.ts +60 -0
- package/src/scripts/_cli/explain_last/sections/header.ts +50 -0
- package/src/scripts/_cli/explain_last/sections/index.ts +21 -0
- package/src/scripts/_cli/explain_last/sections/inputs.ts +63 -0
- package/src/scripts/_cli/explain_last/sections/memory.ts +124 -0
- package/src/scripts/_cli/explain_last/sections/pack.ts +42 -0
- package/src/scripts/_cli/explain_last/sections/provider.ts +51 -0
- package/src/scripts/_cli/explain_last/sections/route.ts +48 -0
- package/src/scripts/_cli/explain_last/state_loader.ts +119 -0
- package/src/scripts/_dispatch.bash +179 -163
- package/src/scripts/_lib/agent_settings.ts +1123 -0
- package/src/scripts/_lib/agent_src.ts +654 -0
- package/src/scripts/_lib/agents_overlay.ts +183 -0
- package/src/scripts/_lib/bench_ab_cache.ts +399 -0
- package/src/scripts/_lib/bench_ab_scoring.ts +352 -0
- package/src/scripts/_lib/bench_ab_scoring_v2.ts +751 -0
- package/src/scripts/_lib/bench_cost.ts +396 -0
- package/src/scripts/_lib/bench_quality.ts +237 -0
- package/src/scripts/_lib/bench_report.ts +255 -0
- package/src/scripts/_lib/bench_telegraph.ts +516 -0
- package/src/scripts/_lib/bench_telegraph_report.ts +272 -0
- package/src/scripts/_lib/changelog_eras.ts +398 -0
- package/src/scripts/_lib/claude_desktop_bundler.ts +324 -0
- package/src/scripts/_lib/cli_wrapper.ts +89 -0
- package/src/scripts/_lib/fs_atomic.ts +172 -0
- package/src/scripts/_lib/global_deploy_inventory.ts +639 -0
- package/src/scripts/_lib/install_layout.ts +87 -0
- package/src/scripts/_lib/install_regenerator.ts +157 -0
- package/src/scripts/_lib/installed_lock.ts +451 -0
- package/src/scripts/_lib/installed_tools.ts +518 -0
- package/src/scripts/_lib/json_pointers.ts +388 -0
- package/src/scripts/_lib/knowledge_global.ts +770 -0
- package/src/scripts/_lib/knowledge_global_promote.ts +453 -0
- package/src/scripts/_lib/knowledge_global_redaction.ts +448 -0
- package/src/scripts/_lib/link_crypto.ts +325 -0
- package/src/scripts/_lib/linked_projects.ts +613 -0
- package/src/scripts/_lib/model_tier.ts +65 -0
- package/src/scripts/_lib/module_detection.ts +275 -0
- package/src/scripts/_lib/node_sqlite.d.ts +32 -0
- package/src/scripts/_lib/pin_resolver.ts +264 -0
- package/src/scripts/_lib/py_random.ts +212 -0
- package/src/scripts/_lib/script_output.ts +147 -0
- package/src/scripts/_lib/security_lint.ts +623 -0
- package/src/scripts/_lib/surface_tiers.ts +127 -0
- package/src/scripts/_lib/token_count.ts +126 -0
- package/src/scripts/_lib/update_check.ts +297 -0
- package/src/scripts/_lib/user_global_paths.ts +329 -0
- package/src/scripts/_lib/value_ladder.ts +882 -0
- package/src/scripts/_lib/value_report.ts +617 -0
- package/src/scripts/_lib/zip_min.ts +175 -0
- package/src/scripts/adoption_report.ts +357 -0
- package/src/scripts/adoption_snapshot.ts +392 -0
- package/src/scripts/adoption_status.ts +424 -0
- package/src/scripts/adr/regenerate_index.ts +257 -0
- package/src/scripts/ai-image/adapters/flux.sh +45 -0
- package/src/scripts/ai-image/adapters/gemini-image.sh +45 -0
- package/src/scripts/ai-image/adapters/ideogram.sh +45 -0
- package/src/scripts/ai-image/adapters/recraft.sh +47 -0
- package/src/scripts/ai-video/adapters/comfyui.sh +3 -3
- package/src/scripts/ai-video/adapters/fal.sh +3 -3
- package/src/scripts/ai-video/adapters/gemini-veo.sh +3 -3
- package/src/scripts/ai-video/adapters/higgsfield.sh +3 -3
- package/src/scripts/ai-video/adapters/kling.sh +3 -3
- package/src/scripts/ai-video/adapters/musetalk.sh +2 -2
- package/src/scripts/ai-video/adapters/openai-images.sh +3 -3
- package/src/scripts/ai-video/adapters/replicate.sh +3 -3
- package/src/scripts/ai-video/adapters/sora.sh +3 -3
- package/src/scripts/ai-video/adapters/syncso.sh +3 -3
- package/src/scripts/ai-video/audio-adapters/allin1.sh +2 -2
- package/src/scripts/ai-video/audio-adapters/whisperx.sh +2 -2
- package/src/scripts/ai-video/lib/audio-adapter-contract.md +1 -1
- package/src/scripts/ai-video/lib/embed-provenance.sh +2 -2
- package/src/scripts/ai-video/lib/ingest-song.sh +2 -2
- package/src/scripts/ai-video/lib/parse-blueprint.sh +1 -1
- package/src/scripts/ai-video/lib/resume-scan.sh +2 -2
- package/src/scripts/ai-video/smoke-trace.sh +16 -7
- package/src/scripts/ai-video/stitch.sh +2 -2
- package/src/scripts/ai_council/_default_prices.ts +73 -0
- package/src/scripts/ai_council/advisors.ts +244 -0
- package/src/scripts/ai_council/airgap.ts +249 -0
- package/src/scripts/ai_council/budget_guard.ts +492 -0
- package/src/scripts/ai_council/bundler.ts +376 -0
- package/src/scripts/ai_council/cli_hints.ts +120 -0
- package/src/scripts/ai_council/clients.ts +2214 -0
- package/src/scripts/ai_council/compile_corpus.ts +681 -0
- package/src/scripts/ai_council/confidence_gate.ts +230 -0
- package/src/scripts/ai_council/config.ts +1729 -0
- package/src/scripts/ai_council/consensus.ts +551 -0
- package/src/scripts/ai_council/events_log.ts +327 -0
- package/src/scripts/ai_council/learn_low_impact_preview.ts +317 -0
- package/src/scripts/ai_council/low_impact.ts +1069 -0
- package/src/scripts/ai_council/low_impact_corpus.ts +662 -0
- package/src/scripts/ai_council/low_impact_intake.ts +222 -0
- package/src/scripts/ai_council/modes.ts +169 -0
- package/src/scripts/ai_council/necessity.ts +933 -0
- package/src/scripts/ai_council/orchestrator.ts +1689 -0
- package/src/scripts/ai_council/pricing.ts +267 -0
- package/src/scripts/ai_council/probation_gate.ts +282 -0
- package/src/scripts/ai_council/project_context.ts +308 -0
- package/src/scripts/ai_council/prompts.ts +600 -0
- package/src/scripts/ai_council/redact_low_impact_entry.ts +291 -0
- package/src/scripts/ai_council/replay.ts +314 -0
- package/src/scripts/ai_council/session.ts +558 -0
- package/src/scripts/ai_council/shadow_dispatch.ts +509 -0
- package/src/scripts/ai_council/solo_dispatch.ts +281 -0
- package/src/scripts/analysis_freshness.ts +343 -0
- package/src/scripts/annotate_discovery.ts +288 -0
- package/src/scripts/apply_modules_config.ts +537 -0
- package/src/scripts/audit_adr_coverage.ts +357 -0
- package/src/scripts/audit_auto_rules.ts +415 -0
- package/src/scripts/audit_cloud_compatibility.ts +608 -0
- package/src/scripts/audit_command_surface.ts +1227 -0
- package/src/scripts/audit_initial_context.ts +694 -0
- package/src/scripts/audit_likelihood.ts +434 -0
- package/src/scripts/audit_mcp_tools.ts +252 -0
- package/src/scripts/audit_overlap.ts +421 -0
- package/src/scripts/audit_skill_descriptions.ts +402 -0
- package/src/scripts/audit_skill_overlap.ts +576 -0
- package/src/scripts/audit_user_type_axis.ts +264 -0
- package/src/scripts/backfill_model_tier.ts +349 -0
- package/src/scripts/bench_ab_cache_dispatch.ts +126 -0
- package/src/scripts/bench_ab_clone.ts +610 -0
- package/src/scripts/bench_ab_diff.ts +609 -0
- package/src/scripts/bench_ab_integrity.ts +261 -0
- package/src/scripts/bench_ab_run.ts +417 -0
- package/src/scripts/bench_ab_task_runner.ts +1382 -0
- package/src/scripts/bench_ab_tracka_run.ts +436 -0
- package/src/scripts/bench_ab_v2_run.ts +585 -0
- package/src/scripts/bench_ab_v2_stats.ts +1018 -0
- package/src/scripts/bench_baseline_ready.ts +326 -0
- package/src/scripts/bench_condense_memory.ts +479 -0
- package/src/scripts/bench_drift_check.ts +503 -0
- package/src/scripts/bench_per_tool.ts +591 -0
- package/src/scripts/bench_rtk_savings.ts +710 -0
- package/src/scripts/bench_run.ts +509 -0
- package/src/scripts/bench_runner.ts +519 -0
- package/src/scripts/build_cloud_bundle.ts +692 -0
- package/src/scripts/build_discovery_manifest.ts +1371 -0
- package/src/scripts/build_linear_digest.ts +368 -0
- package/src/scripts/build_mcp_registry_manifest.ts +351 -0
- package/src/scripts/build_rule_trigger_matrix.ts +469 -0
- package/src/scripts/capture_showcase_session.ts +735 -0
- package/src/scripts/chat_history.ts +2301 -0
- package/src/scripts/check_always_budget.ts +694 -0
- package/src/scripts/check_artefact_checksums.ts +281 -0
- package/src/scripts/check_augment_description_cap.ts +133 -0
- package/src/scripts/check_augmentignore.ts +108 -0
- package/src/scripts/check_beta_review_markers.ts +234 -0
- package/src/scripts/check_bite_sized_granularity.ts +116 -0
- package/src/scripts/check_cluster_patterns.ts +285 -0
- package/src/scripts/check_command_count_messaging.ts +224 -0
- package/src/scripts/check_condensation.ts +900 -0
- package/src/scripts/check_condensed_paths.ts +414 -0
- package/src/scripts/check_context_paths.ts +388 -0
- package/src/scripts/check_council_config_location.ts +260 -0
- package/src/scripts/check_council_layout.ts +180 -0
- package/src/scripts/check_council_references.ts +345 -0
- package/src/scripts/check_discovery_determinism.ts +124 -0
- package/src/scripts/check_gate_paths.ts +230 -0
- package/src/scripts/check_iron_law_prominence.ts +298 -0
- package/src/scripts/check_kernel_rule_bundle.ts +242 -0
- package/src/scripts/check_knowledge_cards.ts +759 -0
- package/src/scripts/check_md_language.ts +291 -0
- package/src/scripts/check_memory.ts +845 -0
- package/src/scripts/check_memory_proposal.ts +351 -0
- package/src/scripts/check_module_management_neutral.ts +238 -0
- package/src/scripts/check_no_conflict_markers.ts +298 -0
- package/src/scripts/check_no_conflict_markers_allowlist.json +4 -0
- package/src/scripts/check_no_external_sources.ts +351 -0
- package/src/scripts/check_no_local_settings_committed.ts +69 -0
- package/src/scripts/check_no_new_legacy_path.ts +188 -0
- package/src/scripts/check_no_roadmap_refs.ts +304 -0
- package/src/scripts/check_one_off_location.ts +165 -0
- package/src/scripts/check_overlay_cascade_subdirs.ts +188 -0
- package/src/scripts/check_portability.ts +860 -0
- package/src/scripts/check_proposal.ts +0 -0
- package/src/scripts/check_public_catalog_links.ts +204 -0
- package/src/scripts/check_public_links.ts +357 -0
- package/src/scripts/check_references.ts +963 -0
- package/src/scripts/check_release_includes_discovery.ts +94 -0
- package/src/scripts/check_release_pr_shape.ts +222 -0
- package/src/scripts/check_release_published.ts +235 -0
- package/src/scripts/check_release_trunk_sync.ts +203 -0
- package/src/scripts/check_reply_consistency.ts +359 -0
- package/src/scripts/check_roadmap_trackable.ts +268 -0
- package/src/scripts/check_role_doc_links.ts +187 -0
- package/src/scripts/check_safety_floor_untouched.ts +160 -0
- package/src/scripts/check_skill_requires.ts +205 -0
- package/src/scripts/check_structural_breaking.ts +170 -0
- package/src/scripts/check_surface_tiers.ts +567 -0
- package/src/scripts/check_template_pin_drift.ts +222 -0
- package/src/scripts/check_test_coverage_diff.ts +235 -0
- package/src/scripts/check_token_optimizer_freshness.ts +183 -0
- package/src/scripts/check_trigger_evals.ts +375 -0
- package/src/scripts/check_update_banner.ts +143 -0
- package/src/scripts/ci_status.ts +0 -0
- package/src/scripts/ci_summary.ts +235 -0
- package/src/scripts/ci_time_ratio.ts +526 -0
- package/src/scripts/command_suggester/cooldown.ts +176 -0
- package/src/scripts/command_suggester/index.ts +41 -0
- package/src/scripts/command_suggester/loader.ts +205 -0
- package/src/scripts/command_suggester/match.ts +294 -0
- package/src/scripts/command_suggester/rank.ts +201 -0
- package/src/scripts/command_suggester/render.ts +122 -0
- package/src/scripts/command_suggester/sanitize.ts +114 -0
- package/src/scripts/command_suggester/settings.ts +186 -0
- package/src/scripts/command_suggester/types.ts +0 -0
- package/src/scripts/compile_router.ts +297 -0
- package/src/scripts/condense.sh +7 -1
- package/src/scripts/condense.ts +2035 -0
- package/src/scripts/condense_memory.ts +334 -0
- package/src/scripts/config/index.ts +15 -0
- package/src/scripts/config/packs.ts +310 -0
- package/src/scripts/config/presets.ts +369 -0
- package/src/scripts/config/profile_explain.ts +114 -0
- package/src/scripts/config/profiles.ts +277 -0
- package/src/scripts/config/session_profiles.ts +1064 -0
- package/src/scripts/context_hygiene_hook.ts +272 -0
- package/src/scripts/cost_by_conversation.ts +444 -0
- package/src/scripts/cost_summary.ts +407 -0
- package/src/scripts/council_cli.ts +2827 -0
- package/src/scripts/council_prune.ts +153 -0
- package/src/scripts/cross_repo_retrieve.ts +694 -0
- package/src/scripts/discovery_stats.ts +218 -0
- package/src/scripts/evidence_report.ts +580 -0
- package/src/scripts/external_sources_denylist.json +1 -0
- package/src/scripts/extract_audit_patterns.ts +394 -0
- package/src/scripts/first_run_gate_hook.ts +246 -0
- package/src/scripts/gen_discovery_baseline.ts +297 -0
- package/src/scripts/generate_capabilities_index.ts +496 -0
- package/src/scripts/generate_capability_matrix.ts +430 -0
- package/src/scripts/generate_catalog.ts +178 -0
- package/src/scripts/generate_command_flows.ts +316 -0
- package/src/scripts/generate_cookbook.ts +302 -0
- package/src/scripts/generate_index.ts +500 -0
- package/src/scripts/generate_ownership_matrix.ts +646 -0
- package/src/scripts/generate_pack_manifests.ts +1025 -0
- package/src/scripts/generate_role_experiences_catalog.ts +265 -0
- package/src/scripts/hermetic-install.sh +22 -11
- package/src/scripts/hook_manifest.yaml +24 -10
- package/src/scripts/hooks/augment-chat-history.sh +3 -10
- package/src/scripts/hooks/augment-context-hygiene.sh +3 -10
- package/src/scripts/hooks/augment-dispatcher.sh +3 -10
- package/src/scripts/hooks/augment-onboarding-gate.sh +3 -10
- package/src/scripts/hooks/augment-roadmap-progress.sh +3 -10
- package/src/scripts/hooks/block_no_verify.ts +413 -0
- package/src/scripts/hooks/cline-dispatcher.sh +3 -10
- package/src/scripts/hooks/cowork-dispatcher.sh +3 -14
- package/src/scripts/hooks/cursor-dispatcher.sh +3 -10
- package/src/scripts/hooks/dispatch_hook.ts +851 -0
- package/src/scripts/hooks/dispatch_issues.ts +226 -0
- package/src/scripts/hooks/envelope.ts +140 -0
- package/src/scripts/hooks/gemini-dispatcher.sh +3 -8
- package/src/scripts/hooks/replay_hook.ts +364 -0
- package/src/scripts/hooks/state_io.ts +293 -0
- package/src/scripts/hooks/windsurf-dispatcher.sh +3 -9
- package/src/scripts/hooks_doctor.ts +418 -0
- package/src/scripts/hooks_status.ts +292 -0
- package/src/scripts/injection_scan_hook.ts +285 -0
- package/src/scripts/install +36 -22
- package/src/scripts/install-hooks.sh +20 -14
- package/src/scripts/install.sh +38 -14
- package/src/scripts/install.ts +4515 -0
- package/src/scripts/inventory_abstraction_budget.ts +1104 -0
- package/src/scripts/inventory_frontmatter.ts +320 -0
- package/src/scripts/inventory_meta_layers.ts +516 -0
- package/src/scripts/iron_law_sha.ts +233 -0
- package/src/scripts/knowledge_global_cli.ts +1105 -0
- package/src/scripts/linked_projects_list.ts +310 -0
- package/src/scripts/lint_agent_security.ts +224 -0
- package/src/scripts/lint_agent_skill_names.ts +241 -0
- package/src/scripts/lint_agents_layout.ts +205 -0
- package/src/scripts/lint_agents_md.ts +294 -0
- package/src/scripts/lint_archived_skills.ts +309 -0
- package/src/scripts/lint_artefact_frontmatter.ts +359 -0
- package/src/scripts/lint_bench_ab.ts +319 -0
- package/src/scripts/lint_bench_corpus.ts +421 -0
- package/src/scripts/lint_command_flow_coverage.ts +231 -0
- package/src/scripts/lint_command_routing.ts +377 -0
- package/src/scripts/lint_command_tiers.ts +345 -0
- package/src/scripts/lint_command_verbs.ts +379 -0
- package/src/scripts/lint_commit_subjects.ts +243 -0
- package/src/scripts/lint_context_spine_usage.ts +198 -0
- package/src/scripts/lint_discovery_manifest.ts +540 -0
- package/src/scripts/lint_discovery_vocabulary.ts +393 -0
- package/src/scripts/lint_empty_roadmaps.ts +147 -0
- package/src/scripts/lint_eval_freshness.ts +335 -0
- package/src/scripts/lint_examples.ts +183 -0
- package/src/scripts/lint_explain_trace.ts +381 -0
- package/src/scripts/lint_featured_skills.ts +0 -0
- package/src/scripts/lint_flows.ts +701 -0
- package/src/scripts/lint_framework_leakage.ts +497 -0
- package/src/scripts/lint_framework_leakage_allowlist.json +8 -1
- package/src/scripts/lint_frontmatter_boilerplate.ts +356 -0
- package/src/scripts/lint_ghostwriter_source.ts +389 -0
- package/src/scripts/lint_global_paths.ts +420 -0
- package/src/scripts/lint_handoffs.ts +362 -0
- package/src/scripts/lint_hidden_unicode.ts +350 -0
- package/src/scripts/lint_hook_concern_budget.ts +319 -0
- package/src/scripts/lint_hook_manifest.ts +354 -0
- package/src/scripts/lint_instruction_smuggling.ts +173 -0
- package/src/scripts/lint_load_context.ts +371 -0
- package/src/scripts/lint_marketplace.ts +286 -0
- package/src/scripts/lint_marketplace_install_completeness.ts +309 -0
- package/src/scripts/lint_mcp_config_security.ts +225 -0
- package/src/scripts/lint_mcp_registry_manifest.ts +350 -0
- package/src/scripts/lint_media_policy_linkage.ts +224 -0
- package/src/scripts/lint_missions.ts +774 -0
- package/src/scripts/lint_model_tier_coverage.ts +151 -0
- package/src/scripts/lint_namespace.ts +295 -0
- package/src/scripts/lint_namespace_collisions.ts +203 -0
- package/src/scripts/lint_new_skill_gate.ts +462 -0
- package/src/scripts/lint_no_new_atomic_commands.ts +342 -0
- package/src/scripts/lint_one_off_age.ts +348 -0
- package/src/scripts/lint_orchestration_dsl.ts +377 -0
- package/src/scripts/lint_orchestrator_auto_detect.ts +177 -0
- package/src/scripts/lint_pack_boundaries.ts +366 -0
- package/src/scripts/lint_pack_dependencies.ts +541 -0
- package/src/scripts/lint_pack_first_win.ts +202 -0
- package/src/scripts/lint_persona_governance.ts +292 -0
- package/src/scripts/lint_positioning.ts +257 -0
- package/src/scripts/lint_profile_overlay_set_only.ts +324 -0
- package/src/scripts/lint_readme_jargon.ts +189 -0
- package/src/scripts/lint_readme_size.ts +73 -0
- package/src/scripts/lint_regression.ts +497 -0
- package/src/scripts/lint_roadmap_ci_steps.ts +252 -0
- package/src/scripts/lint_roadmap_complexity.ts +295 -0
- package/src/scripts/lint_roadmap_later_disposition.ts +357 -0
- package/src/scripts/lint_role_experiences.ts +410 -0
- package/src/scripts/lint_rule_interactions.ts +281 -0
- package/src/scripts/lint_rule_tiers.ts +169 -0
- package/src/scripts/lint_showcase_sessions.ts +254 -0
- package/src/scripts/lint_skill_frontmatter_safety.ts +279 -0
- package/src/scripts/lint_skill_originality.ts +586 -0
- package/src/scripts/lint_skill_originality_allowlist.json +20 -0
- package/src/scripts/lint_skill_tools.ts +320 -0
- package/src/scripts/lint_ticket_buildable.ts +1027 -0
- package/src/scripts/lint_topics_yaml.ts +203 -0
- package/src/scripts/lint_trust_coherence.ts +377 -0
- package/src/scripts/lint_value_dashboard.ts +314 -0
- package/src/scripts/lint_workflow_security.ts +637 -0
- package/src/scripts/lint_workflow_security_allowlist.json +20 -0
- package/src/scripts/lint_workspace_boundary.ts +248 -0
- package/src/scripts/mcp_parity_smoke.ts +638 -0
- package/src/scripts/mcp_render.ts +346 -0
- package/src/scripts/mcp_server/__main__.ts +28 -0
- package/src/scripts/mcp_server/catalog.ts +154 -0
- package/src/scripts/mcp_server/index.ts +24 -0
- package/src/scripts/mcp_server/metadata.ts +83 -0
- package/src/scripts/mcp_server/prompts.ts +711 -0
- package/src/scripts/mcp_server/resources.ts +343 -0
- package/src/scripts/mcp_server/server.ts +439 -0
- package/src/scripts/mcp_server/telemetry.ts +154 -0
- package/src/scripts/mcp_server/tools.ts +1031 -0
- package/src/scripts/mcp_setup.sh +25 -52
- package/src/scripts/mcp_telemetry_health.ts +362 -0
- package/src/scripts/mcp_telemetry_query.ts +371 -0
- package/src/scripts/mcp_telemetry_store.ts +422 -0
- package/src/scripts/measure_augment_budget.ts +453 -0
- package/src/scripts/measure_density.ts +618 -0
- package/src/scripts/measure_frugality_savings.ts +353 -0
- package/src/scripts/measure_markitdown_lift.ts +299 -0
- package/src/scripts/measure_patterns.ts +682 -0
- package/src/scripts/measure_projection_bytes.ts +425 -0
- package/src/scripts/measure_rule_budget.ts +627 -0
- package/src/scripts/measure_skill_reduction.ts +442 -0
- package/src/scripts/media/lib/adapter-common.sh +247 -0
- package/src/scripts/media/lib/adapter-contract.md +329 -0
- package/src/scripts/media/lib/fixtures/comfyui/result.json +1 -0
- package/src/scripts/media/lib/fixtures/fal/result.json +1 -0
- package/src/scripts/media/lib/fixtures/flux/asset-0001.png +0 -0
- package/src/scripts/media/lib/fixtures/flux/result.json +1 -0
- package/src/scripts/media/lib/fixtures/gemini-image/asset-0001.png +0 -0
- package/src/scripts/media/lib/fixtures/gemini-image/result.json +1 -0
- package/src/scripts/media/lib/fixtures/gemini-veo/result.json +1 -0
- package/src/scripts/media/lib/fixtures/higgsfield/result.json +1 -0
- package/src/scripts/media/lib/fixtures/ideogram/asset-0001.png +0 -0
- package/src/scripts/media/lib/fixtures/ideogram/result.json +1 -0
- package/src/scripts/media/lib/fixtures/kling/result.json +1 -0
- package/src/scripts/media/lib/fixtures/musetalk/result.json +1 -0
- package/src/scripts/media/lib/fixtures/openai-images/result.json +1 -0
- package/src/scripts/media/lib/fixtures/recraft/asset-0001.svg +1 -0
- package/src/scripts/media/lib/fixtures/recraft/result.json +1 -0
- package/src/scripts/media/lib/fixtures/replicate/result.json +1 -0
- package/src/scripts/media/lib/fixtures/sora/result.json +1 -0
- package/src/scripts/media/lib/fixtures/syncso/result.json +1 -0
- package/src/scripts/media/lib/load-config.sh +180 -0
- package/src/scripts/media/lib/redact.sh +85 -0
- package/src/scripts/memory_hash.ts +331 -0
- package/src/scripts/memory_lookup.ts +1278 -0
- package/src/scripts/memory_report.ts +845 -0
- package/src/scripts/memory_signal.ts +417 -0
- package/src/scripts/memory_status.ts +189 -0
- package/src/scripts/migrate_command_suggestions.ts +341 -0
- package/src/scripts/migrate_frontmatter_defaults.ts +539 -0
- package/src/scripts/migration_status.ts +301 -0
- package/src/scripts/mine_session.ts +645 -0
- package/src/scripts/minimal_safe_diff_hook.ts +355 -0
- package/src/scripts/move_artefact.ts +869 -0
- package/src/scripts/new_skill.ts +404 -0
- package/src/scripts/onboarding_gate_hook.ts +224 -0
- package/src/scripts/pack_dependency_allowlist.json +1 -1
- package/src/scripts/pack_mcp_content.ts +552 -0
- package/src/scripts/parity/README.md +140 -0
- package/src/scripts/parity/compare.ts +189 -0
- package/src/scripts/parity/coverage_diff.ts +199 -0
- package/src/scripts/parity/phase-manifest.json +93 -0
- package/src/scripts/parity/phase_gate.ts +270 -0
- package/src/scripts/parity/replay.ts +320 -0
- package/src/scripts/pattern_share.ts +363 -0
- package/src/scripts/plan_physical_move.ts +605 -0
- package/src/scripts/prediction-pool/poisson_sim.ts +537 -0
- package/src/scripts/prediction-pool/pool_winsim.ts +677 -0
- package/src/scripts/prediction-pool/score_ev.ts +546 -0
- package/src/scripts/print_required_checks.ts +249 -0
- package/src/scripts/probe_projection_fidelity.ts +468 -0
- package/src/scripts/probe_skill_registration.ts +787 -0
- package/src/scripts/profile_staleness_hook.ts +169 -0
- package/src/scripts/profile_use.ts +227 -0
- package/src/scripts/project_thin_rules.ts +387 -0
- package/src/scripts/propose_modules_config.ts +311 -0
- package/src/scripts/prototype_lint_contradictions.ts +414 -0
- package/src/scripts/prove_pack_extractable.ts +388 -0
- package/src/scripts/readme_linter.ts +913 -0
- package/src/scripts/redact_hook_capture.ts +325 -0
- package/src/scripts/refine_ticket_detect.ts +703 -0
- package/src/scripts/release.ts +1697 -0
- package/src/scripts/render_benchmark_md.ts +664 -0
- package/src/scripts/render_value_md.ts +506 -0
- package/src/scripts/repro/repro_marketplace_install_gap.sh +1 -1
- package/src/scripts/roadmap_progress_hook.ts +410 -0
- package/src/scripts/router_telemetry.ts +972 -0
- package/src/scripts/run.ts +98 -0
- package/src/scripts/run_skill_evals.ts +477 -0
- package/src/scripts/runtime_dispatcher.ts +586 -0
- package/src/scripts/runtime_handler.ts +231 -0
- package/src/scripts/runtime_registry.ts +394 -0
- package/src/scripts/schemas/command.schema.json +3 -2
- package/src/scripts/schemas/mission-catalog.schema.json +112 -0
- package/src/scripts/schemas/mission.schema.json +87 -0
- package/src/scripts/schemas/pack.schema.json +6 -0
- package/src/scripts/schemas/rule.schema.json +1 -0
- package/src/scripts/schemas/skill.schema.json +1 -0
- package/src/scripts/schemas/ticket-manifest.schema.json +35 -0
- package/src/scripts/schemas/ticket.schema.json +60 -0
- package/src/scripts/score_skill_selection.ts +570 -0
- package/src/scripts/security_audit_config.ts +423 -0
- package/src/scripts/skill_collision_clusters.ts +448 -0
- package/src/scripts/skill_discovery.ts +690 -0
- package/src/scripts/skill_linter.ts +4276 -0
- package/src/scripts/skill_overlap.ts +414 -0
- package/src/scripts/skill_preview.ts +548 -0
- package/src/scripts/skill_tools/audit_persona_coverage.ts +427 -0
- package/src/scripts/skill_tools/audit_user_type_coverage.ts +507 -0
- package/src/scripts/skill_tools/index.ts +28 -0
- package/src/scripts/skill_tools/run_block_d_eval.ts +373 -0
- package/src/scripts/skill_tools/score_skill_relevance.ts +475 -0
- package/src/scripts/skill_tools/suggest_skill_for_task.ts +288 -0
- package/src/scripts/skill_trigger_eval.ts +1046 -0
- package/src/scripts/skill_usage_collect.ts +465 -0
- package/src/scripts/skill_usage_report.ts +364 -0
- package/src/scripts/smoke/kernel.sh +4 -5
- package/src/scripts/smoke/router.sh +76 -76
- package/src/scripts/smoke/schema.sh +2 -2
- package/src/scripts/smoke/skills.sh +73 -52
- package/src/scripts/smoke_path_resolution.ts +194 -0
- package/src/scripts/smoke_quickstart.ts +224 -0
- package/src/scripts/snapshot_agent_outputs.ts +375 -0
- package/src/scripts/spotcheck_thin_root.ts +247 -0
- package/src/scripts/surface-tiers.yml +68 -0
- package/src/scripts/sync_agent_settings.ts +763 -0
- package/src/scripts/sync_github_metadata.ts +550 -0
- package/src/scripts/sync_gitignore.ts +630 -0
- package/src/scripts/sync_yaml_rt.ts +910 -0
- package/src/scripts/telegraph_stats.ts +447 -0
- package/src/scripts/tool_registry.ts +330 -0
- package/src/scripts/tools/adapter_errors.ts +93 -0
- package/src/scripts/tools/base_adapter.ts +147 -0
- package/src/scripts/tools/github_adapter.ts +229 -0
- package/src/scripts/tools/jira_adapter.ts +196 -0
- package/src/scripts/trigger_coverage.ts +251 -0
- package/src/scripts/update_counts.ts +284 -0
- package/src/scripts/update_prices.ts +219 -0
- package/src/scripts/validate_agent_settings.ts +265 -0
- package/src/scripts/validate_decision_engine.ts +366 -0
- package/src/scripts/validate_discovery_manifest.ts +160 -0
- package/src/scripts/validate_frontmatter.ts +1030 -0
- package/src/scripts/validate_pack_yaml.ts +0 -0
- package/src/scripts/validate_safe_paths.ts +164 -0
- package/src/scripts/validate_telegraph_carveouts.ts +485 -0
- package/src/scripts/verify_before_complete_hook.ts +306 -0
- package/src/scripts/verify_physical_move.ts +411 -0
- package/src/scripts/wrapper_freshness_hook.ts +179 -0
- package/dist/agent-src/scripts/archive_completed_roadmaps.py +0 -171
- package/dist/agent-src/scripts/update_roadmap_progress.py +0 -537
- package/dist/agent-src/skills/corpus-grounding/scripts/bm25_search.py +0 -212
- package/dist/agent-src/skills/corpus-grounding/scripts/decision_engine.py +0 -438
- package/dist/agent-src/skills/corpus-grounding/scripts/ground.py +0 -166
- package/dist/agent-src/skills/corpus-grounding/scripts/schema_validator.py +0 -160
- package/dist/agent-src/skills/design-tokens/scripts/tokens.py +0 -296
- package/dist/agent-src/skills/react-shadcn-ui/scripts/shadcn_add.py +0 -299
- package/dist/agent-src/skills/tailwind-engineer/scripts/tailwind_config_gen.py +0 -463
- package/dist/agent-src/templates/scripts/check_memory.py +0 -282
- package/dist/agent-src/templates/scripts/check_memory_proposal.py +0 -180
- package/dist/agent-src/templates/scripts/implement_ticket/__init__.py +0 -94
- package/dist/agent-src/templates/scripts/implement_ticket/__main__.py +0 -15
- package/dist/agent-src/templates/scripts/memory_hash.py +0 -75
- package/dist/agent-src/templates/scripts/memory_lookup.py +0 -436
- package/dist/agent-src/templates/scripts/memory_report.py +0 -314
- package/dist/agent-src/templates/scripts/memory_signal.py +0 -165
- package/dist/agent-src/templates/scripts/memory_status.py +0 -76
- package/dist/agent-src/templates/scripts/pr_review_routing.py +0 -340
- package/dist/agent-src/templates/scripts/pr_risk_review.py +0 -211
- package/dist/agent-src/templates/scripts/telemetry/__init__.py +0 -42
- package/dist/agent-src/templates/scripts/telemetry/aggregator.py +0 -169
- package/dist/agent-src/templates/scripts/telemetry/boundary.py +0 -171
- package/dist/agent-src/templates/scripts/telemetry/engagement.py +0 -297
- package/dist/agent-src/templates/scripts/telemetry/report_renderer.py +0 -197
- package/dist/agent-src/templates/scripts/telemetry/settings.py +0 -177
- package/dist/agent-src/templates/scripts/telemetry_record.py +0 -179
- package/dist/agent-src/templates/scripts/telemetry_report.py +0 -161
- package/dist/agent-src/templates/scripts/telemetry_status.py +0 -142
- package/dist/agent-src/templates/scripts/tier_usage_report.py +0 -183
- package/dist/agent-src/templates/scripts/work_engine/__init__.py +0 -58
- package/dist/agent-src/templates/scripts/work_engine/__main__.py +0 -9
- package/dist/agent-src/templates/scripts/work_engine/_lib/__init__.py +0 -7
- package/dist/agent-src/templates/scripts/work_engine/_lib/agent_settings.py +0 -840
- package/dist/agent-src/templates/scripts/work_engine/_lib/user_global_paths.py +0 -249
- package/dist/agent-src/templates/scripts/work_engine/cli.py +0 -195
- package/dist/agent-src/templates/scripts/work_engine/cli_args.py +0 -116
- package/dist/agent-src/templates/scripts/work_engine/delivery_state.py +0 -137
- package/dist/agent-src/templates/scripts/work_engine/directives/__init__.py +0 -33
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/__init__.py +0 -98
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/analyze.py +0 -98
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/implement.py +0 -145
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/memory.py +0 -136
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/plan.py +0 -175
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/refine.py +0 -396
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/report.py +0 -227
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/test.py +0 -180
- package/dist/agent-src/templates/scripts/work_engine/directives/backend/verify.py +0 -170
- package/dist/agent-src/templates/scripts/work_engine/directives/mixed/__init__.py +0 -116
- package/dist/agent-src/templates/scripts/work_engine/directives/mixed/contract.py +0 -254
- package/dist/agent-src/templates/scripts/work_engine/directives/mixed/stitch.py +0 -229
- package/dist/agent-src/templates/scripts/work_engine/directives/mixed/ui.py +0 -231
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/__init__.py +0 -113
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/_passthrough.py +0 -44
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/apply.py +0 -241
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/audit.py +0 -414
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/design.py +0 -335
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/polish.py +0 -513
- package/dist/agent-src/templates/scripts/work_engine/directives/ui/review.py +0 -471
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/__init__.py +0 -119
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/_skipped.py +0 -37
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/apply.py +0 -165
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/refine.py +0 -66
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/report.py +0 -62
- package/dist/agent-src/templates/scripts/work_engine/directives/ui_trivial/test.py +0 -115
- package/dist/agent-src/templates/scripts/work_engine/dispatcher.py +0 -331
- package/dist/agent-src/templates/scripts/work_engine/emitters.py +0 -68
- package/dist/agent-src/templates/scripts/work_engine/errors.py +0 -19
- package/dist/agent-src/templates/scripts/work_engine/hook_bootstrap.py +0 -91
- package/dist/agent-src/templates/scripts/work_engine/hooks/__init__.py +0 -54
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +0 -35
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/_chat_history_base.py +0 -59
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_append.py +0 -43
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_halt_append.py +0 -41
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/decision_gate.py +0 -162
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/decision_trace.py +0 -163
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/directive_set_guard.py +0 -53
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/halt_surface_audit.py +0 -50
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +0 -141
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/state_shape_validation.py +0 -52
- package/dist/agent-src/templates/scripts/work_engine/hooks/builtin/trace.py +0 -84
- package/dist/agent-src/templates/scripts/work_engine/hooks/context.py +0 -66
- package/dist/agent-src/templates/scripts/work_engine/hooks/events.py +0 -44
- package/dist/agent-src/templates/scripts/work_engine/hooks/exceptions.py +0 -79
- package/dist/agent-src/templates/scripts/work_engine/hooks/registry.py +0 -60
- package/dist/agent-src/templates/scripts/work_engine/hooks/runner.py +0 -73
- package/dist/agent-src/templates/scripts/work_engine/hooks/settings.py +0 -196
- package/dist/agent-src/templates/scripts/work_engine/input_builders.py +0 -163
- package/dist/agent-src/templates/scripts/work_engine/intent/__init__.py +0 -47
- package/dist/agent-src/templates/scripts/work_engine/intent/classify.py +0 -280
- package/dist/agent-src/templates/scripts/work_engine/migration/__init__.py +0 -8
- package/dist/agent-src/templates/scripts/work_engine/migration/v0_to_v1.py +0 -231
- package/dist/agent-src/templates/scripts/work_engine/orchestration.py +0 -193
- package/dist/agent-src/templates/scripts/work_engine/persona_policy.py +0 -85
- package/dist/agent-src/templates/scripts/work_engine/resolvers/__init__.py +0 -22
- package/dist/agent-src/templates/scripts/work_engine/resolvers/diff.py +0 -106
- package/dist/agent-src/templates/scripts/work_engine/resolvers/file.py +0 -113
- package/dist/agent-src/templates/scripts/work_engine/resolvers/prompt.py +0 -90
- package/dist/agent-src/templates/scripts/work_engine/scoring/__init__.py +0 -14
- package/dist/agent-src/templates/scripts/work_engine/scoring/confidence.py +0 -300
- package/dist/agent-src/templates/scripts/work_engine/scoring/decision_engine.py +0 -351
- package/dist/agent-src/templates/scripts/work_engine/scoring/decision_trace.py +0 -141
- package/dist/agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +0 -283
- package/dist/agent-src/templates/scripts/work_engine/stack/__init__.py +0 -31
- package/dist/agent-src/templates/scripts/work_engine/stack/detect.py +0 -187
- package/dist/agent-src/templates/scripts/work_engine/stack/runner.py +0 -481
- package/dist/agent-src/templates/scripts/work_engine/state.py +0 -694
- package/dist/agent-src/templates/scripts/work_engine/state_io.py +0 -202
- package/dist/cli/python/resolvePython.js +0 -38
- package/dist/cli/python/resolvePython.js.map +0 -1
- package/src/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/src/scripts/_archive/_backfill_skill_domains.py +0 -140
- package/src/scripts/_archive/_bootstrap_tier_frontmatter.py +0 -151
- package/src/scripts/_archive/_p43_bodies.py +0 -235
- package/src/scripts/_archive/_p43_condense.py +0 -118
- package/src/scripts/_archive/_p4_migrate.py +0 -199
- package/src/scripts/_archive/_phase2_shim_helper.py +0 -109
- package/src/scripts/_archive/_pilot_council_question.py +0 -57
- package/src/scripts/_cli/__init__.py +0 -0
- package/src/scripts/_cli/cmd_doctor.py +0 -1669
- package/src/scripts/_cli/cmd_explain.py +0 -355
- package/src/scripts/_cli/cmd_export.py +0 -157
- package/src/scripts/_cli/cmd_migrate.py +0 -524
- package/src/scripts/_cli/cmd_prune.py +0 -322
- package/src/scripts/_cli/cmd_refresh.py +0 -179
- package/src/scripts/_cli/cmd_settings_check.py +0 -171
- package/src/scripts/_cli/cmd_settings_migrate.py +0 -147
- package/src/scripts/_cli/cmd_sync.py +0 -166
- package/src/scripts/_cli/cmd_uninstall.py +0 -476
- package/src/scripts/_cli/cmd_update.py +0 -279
- package/src/scripts/_cli/cmd_upgrade.py +0 -172
- package/src/scripts/_cli/cmd_validate.py +0 -177
- package/src/scripts/_cli/cmd_versions.py +0 -160
- package/src/scripts/_cli/explain_last/__init__.py +0 -122
- package/src/scripts/_cli/explain_last/assumptions.py +0 -59
- package/src/scripts/_cli/explain_last/council.py +0 -105
- package/src/scripts/_cli/explain_last/halt.py +0 -44
- package/src/scripts/_cli/explain_last/inputs.py +0 -128
- package/src/scripts/_cli/explain_last/memory.py +0 -94
- package/src/scripts/_cli/explain_last/provider.py +0 -52
- package/src/scripts/_cli/explain_last/render.py +0 -52
- package/src/scripts/_cli/explain_last/route.py +0 -59
- package/src/scripts/_cli/explain_last/scrubber.py +0 -105
- package/src/scripts/_cli/explain_last/sections/__init__.py +0 -35
- package/src/scripts/_cli/explain_last/sections/assumptions.py +0 -21
- package/src/scripts/_cli/explain_last/sections/council.py +0 -27
- package/src/scripts/_cli/explain_last/sections/halt.py +0 -31
- package/src/scripts/_cli/explain_last/sections/header.py +0 -24
- package/src/scripts/_cli/explain_last/sections/inputs.py +0 -27
- package/src/scripts/_cli/explain_last/sections/memory.py +0 -21
- package/src/scripts/_cli/explain_last/sections/pack.py +0 -16
- package/src/scripts/_cli/explain_last/sections/provider.py +0 -26
- package/src/scripts/_cli/explain_last/sections/route.py +0 -22
- package/src/scripts/_cli/explain_last/state_loader.py +0 -76
- package/src/scripts/_emit_domain_table.py +0 -35
- package/src/scripts/_lib/__init__.py +0 -5
- package/src/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/src/scripts/_lib/agent_settings.py +0 -840
- package/src/scripts/_lib/agent_src.py +0 -491
- package/src/scripts/_lib/agents_overlay.py +0 -120
- package/src/scripts/_lib/bench_ab_cache.py +0 -162
- package/src/scripts/_lib/bench_ab_scoring.py +0 -209
- package/src/scripts/_lib/bench_ab_scoring_v2.py +0 -227
- package/src/scripts/_lib/bench_cost.py +0 -138
- package/src/scripts/_lib/bench_quality.py +0 -118
- package/src/scripts/_lib/bench_report.py +0 -149
- package/src/scripts/_lib/bench_telegraph.py +0 -273
- package/src/scripts/_lib/bench_telegraph_report.py +0 -151
- package/src/scripts/_lib/changelog_eras.py +0 -330
- package/src/scripts/_lib/claude_desktop_bundler.py +0 -238
- package/src/scripts/_lib/cli_wrapper.py +0 -64
- package/src/scripts/_lib/fs_atomic.py +0 -116
- package/src/scripts/_lib/global_deploy_inventory.py +0 -312
- package/src/scripts/_lib/install_regenerator.py +0 -134
- package/src/scripts/_lib/installed_lock.py +0 -256
- package/src/scripts/_lib/installed_tools.py +0 -381
- package/src/scripts/_lib/json_pointers.py +0 -260
- package/src/scripts/_lib/link_crypto.py +0 -206
- package/src/scripts/_lib/linked_projects.py +0 -238
- package/src/scripts/_lib/model_tier.py +0 -52
- package/src/scripts/_lib/module_detection.py +0 -223
- package/src/scripts/_lib/pin_resolver.py +0 -152
- package/src/scripts/_lib/script_output.py +0 -144
- package/src/scripts/_lib/security_lint.py +0 -228
- package/src/scripts/_lib/token_count.py +0 -95
- package/src/scripts/_lib/update_check.py +0 -207
- package/src/scripts/_lib/user_global_paths.py +0 -249
- package/src/scripts/_lib/value_ladder.py +0 -696
- package/src/scripts/_lib/value_report.py +0 -455
- package/src/scripts/_phase4_bucket.py +0 -210
- package/src/scripts/_pilot_measure.py +0 -53
- package/src/scripts/_tmp_scan_framework_leakage.py +0 -119
- package/src/scripts/adoption_report.py +0 -195
- package/src/scripts/adoption_snapshot.py +0 -219
- package/src/scripts/adoption_status.py +0 -166
- package/src/scripts/adr/regenerate_index.py +0 -79
- package/src/scripts/ai-video/lib/adapter-common.sh +0 -231
- package/src/scripts/ai-video/lib/adapter-contract.md +0 -329
- package/src/scripts/ai-video/lib/fixtures/comfyui/result.json +0 -1
- package/src/scripts/ai-video/lib/fixtures/fal/result.json +0 -1
- package/src/scripts/ai-video/lib/fixtures/gemini-veo/result.json +0 -1
- package/src/scripts/ai-video/lib/fixtures/higgsfield/result.json +0 -1
- package/src/scripts/ai-video/lib/fixtures/kling/result.json +0 -1
- package/src/scripts/ai-video/lib/fixtures/musetalk/result.json +0 -1
- package/src/scripts/ai-video/lib/fixtures/openai-images/result.json +0 -1
- package/src/scripts/ai-video/lib/fixtures/replicate/result.json +0 -1
- package/src/scripts/ai-video/lib/fixtures/sora/result.json +0 -1
- package/src/scripts/ai-video/lib/fixtures/syncso/result.json +0 -1
- package/src/scripts/ai-video/lib/load-config.sh +0 -180
- package/src/scripts/ai-video/lib/redact.sh +0 -85
- package/src/scripts/ai_council/__init__.py +0 -40
- package/src/scripts/ai_council/_default_prices.py +0 -50
- package/src/scripts/ai_council/advisors.py +0 -148
- package/src/scripts/ai_council/airgap.py +0 -165
- package/src/scripts/ai_council/budget_guard.py +0 -202
- package/src/scripts/ai_council/bundler.py +0 -263
- package/src/scripts/ai_council/cli_hints.py +0 -123
- package/src/scripts/ai_council/clients.py +0 -1385
- package/src/scripts/ai_council/compile_corpus.py +0 -179
- package/src/scripts/ai_council/confidence_gate.py +0 -156
- package/src/scripts/ai_council/config.py +0 -1419
- package/src/scripts/ai_council/consensus.py +0 -329
- package/src/scripts/ai_council/events_log.py +0 -141
- package/src/scripts/ai_council/learn_low_impact_preview.py +0 -252
- package/src/scripts/ai_council/low_impact.py +0 -714
- package/src/scripts/ai_council/low_impact_corpus.py +0 -466
- package/src/scripts/ai_council/low_impact_intake.py +0 -163
- package/src/scripts/ai_council/modes.py +0 -131
- package/src/scripts/ai_council/necessity.py +0 -782
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_2a4_acceptance.py +0 -208
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_add_quiet.py +0 -149
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_budget_v2_audit.py +0 -206
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_context_layer_v1_estimate.py +0 -67
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_context_layer_v1_review.py +0 -292
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_followups_review.py +0 -259
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_inject_quiet_flag.py +0 -33
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_v2.sh +0 -36
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_measure_verbosity.sh +0 -26
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_nondestructive_inline_audit.py +0 -209
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_per_task.sh +0 -41
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_phase4_dispatch_latency.py +0 -108
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_phase6_trigger_jaccard.py +0 -92
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_phase_2a_budget_rebalance.py +0 -257
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_phase_2a_post_revert.py +0 -197
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_rebalancing_audit.py +0 -149
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_roundtrip.py +0 -111
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_rule_hardening_v1.py +0 -251
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_silent_taskfiles.py +0 -98
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_open_questions.py +0 -232
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_optimization.py +0 -144
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_v3_gaps.py +0 -252
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_structural_v3_review.py +0 -240
- package/src/scripts/ai_council/one_off_archive/2026-05/_one_off_tier_retrofit.py +0 -180
- package/src/scripts/ai_council/orchestrator.py +0 -1206
- package/src/scripts/ai_council/pricing.py +0 -215
- package/src/scripts/ai_council/probation_gate.py +0 -152
- package/src/scripts/ai_council/project_context.py +0 -159
- package/src/scripts/ai_council/prompts.py +0 -567
- package/src/scripts/ai_council/redact_low_impact_entry.py +0 -155
- package/src/scripts/ai_council/replay.py +0 -155
- package/src/scripts/ai_council/session.py +0 -366
- package/src/scripts/ai_council/shadow_dispatch.py +0 -235
- package/src/scripts/ai_council/solo_dispatch.py +0 -226
- package/src/scripts/annotate_discovery.py +0 -149
- package/src/scripts/apply_modules_config.py +0 -290
- package/src/scripts/audit_adr_coverage.py +0 -173
- package/src/scripts/audit_auto_rules.py +0 -175
- package/src/scripts/audit_cloud_compatibility.py +0 -362
- package/src/scripts/audit_command_surface.py +0 -682
- package/src/scripts/audit_initial_context.py +0 -237
- package/src/scripts/audit_likelihood.py +0 -148
- package/src/scripts/audit_mcp_tools.py +0 -146
- package/src/scripts/audit_overlap.py +0 -145
- package/src/scripts/audit_skill_descriptions.py +0 -180
- package/src/scripts/audit_skill_overlap.py +0 -207
- package/src/scripts/audit_user_type_axis.py +0 -140
- package/src/scripts/backfill_model_tier.py +0 -184
- package/src/scripts/bench_ab_cache_dispatch.py +0 -68
- package/src/scripts/bench_ab_clone.py +0 -220
- package/src/scripts/bench_ab_diff.py +0 -220
- package/src/scripts/bench_ab_integrity.py +0 -143
- package/src/scripts/bench_ab_run.py +0 -235
- package/src/scripts/bench_ab_task_runner.py +0 -814
- package/src/scripts/bench_ab_tracka_run.py +0 -202
- package/src/scripts/bench_ab_v2_run.py +0 -247
- package/src/scripts/bench_ab_v2_stats.py +0 -347
- package/src/scripts/bench_baseline_ready.py +0 -108
- package/src/scripts/bench_condense_memory.py +0 -168
- package/src/scripts/bench_drift_check.py +0 -151
- package/src/scripts/bench_per_tool.py +0 -216
- package/src/scripts/bench_rtk_savings.py +0 -320
- package/src/scripts/bench_run.py +0 -272
- package/src/scripts/bench_runner.py +0 -158
- package/src/scripts/build_cloud_bundle.py +0 -458
- package/src/scripts/build_discovery_manifest.py +0 -757
- package/src/scripts/build_linear_digest.py +0 -260
- package/src/scripts/build_mcp_registry_manifest.py +0 -181
- package/src/scripts/build_rule_trigger_matrix.py +0 -350
- package/src/scripts/capture_showcase_session.py +0 -361
- package/src/scripts/chat_history.py +0 -1799
- package/src/scripts/check_always_budget.py +0 -532
- package/src/scripts/check_artefact_checksums.py +0 -111
- package/src/scripts/check_augment_description_cap.py +0 -79
- package/src/scripts/check_augmentignore.py +0 -72
- package/src/scripts/check_beta_review_markers.py +0 -127
- package/src/scripts/check_bite_sized_granularity.py +0 -98
- package/src/scripts/check_cluster_patterns.py +0 -206
- package/src/scripts/check_command_count_messaging.py +0 -152
- package/src/scripts/check_condensation.py +0 -375
- package/src/scripts/check_condensed_paths.py +0 -231
- package/src/scripts/check_context_paths.py +0 -202
- package/src/scripts/check_council_layout.py +0 -125
- package/src/scripts/check_council_references.py +0 -228
- package/src/scripts/check_discovery_determinism.py +0 -70
- package/src/scripts/check_gate_paths.py +0 -128
- package/src/scripts/check_iron_law_prominence.py +0 -145
- package/src/scripts/check_kernel_rule_bundle.py +0 -151
- package/src/scripts/check_md_language.py +0 -161
- package/src/scripts/check_memory.py +0 -429
- package/src/scripts/check_memory_proposal.py +0 -182
- package/src/scripts/check_module_management_neutral.py +0 -147
- package/src/scripts/check_no_external_sources.py +0 -101
- package/src/scripts/check_no_local_settings_committed.py +0 -51
- package/src/scripts/check_no_new_legacy_path.py +0 -100
- package/src/scripts/check_no_roadmap_refs.py +0 -155
- package/src/scripts/check_one_off_location.py +0 -81
- package/src/scripts/check_overlay_cascade_subdirs.py +0 -129
- package/src/scripts/check_portability.py +0 -574
- package/src/scripts/check_proposal.py +0 -269
- package/src/scripts/check_public_catalog_links.py +0 -125
- package/src/scripts/check_public_links.py +0 -185
- package/src/scripts/check_references.py +0 -559
- package/src/scripts/check_release_includes_discovery.py +0 -61
- package/src/scripts/check_release_pr_shape.py +0 -123
- package/src/scripts/check_release_published.py +0 -145
- package/src/scripts/check_release_trunk_sync.py +0 -152
- package/src/scripts/check_reply_consistency.py +0 -169
- package/src/scripts/check_roadmap_trackable.py +0 -114
- package/src/scripts/check_role_doc_links.py +0 -110
- package/src/scripts/check_safety_floor_untouched.py +0 -125
- package/src/scripts/check_skill_requires.py +0 -147
- package/src/scripts/check_template_pin_drift.py +0 -129
- package/src/scripts/check_test_coverage_diff.py +0 -180
- package/src/scripts/check_token_optimizer_freshness.py +0 -146
- package/src/scripts/check_update_banner.py +0 -86
- package/src/scripts/ci_status.py +0 -301
- package/src/scripts/ci_summary.py +0 -131
- package/src/scripts/ci_time_ratio.py +0 -168
- package/src/scripts/command_suggester/__init__.py +0 -51
- package/src/scripts/command_suggester/cooldown.py +0 -132
- package/src/scripts/command_suggester/loader.py +0 -73
- package/src/scripts/command_suggester/match.py +0 -180
- package/src/scripts/command_suggester/rank.py +0 -120
- package/src/scripts/command_suggester/render.py +0 -86
- package/src/scripts/command_suggester/sanitize.py +0 -113
- package/src/scripts/command_suggester/settings.py +0 -127
- package/src/scripts/command_suggester/types.py +0 -78
- package/src/scripts/compile_router.py +0 -232
- package/src/scripts/condense.py +0 -1919
- package/src/scripts/condense_memory.py +0 -178
- package/src/scripts/config/__init__.py +0 -9
- package/src/scripts/config/packs.py +0 -157
- package/src/scripts/config/presets.py +0 -224
- package/src/scripts/config/profile_explain.py +0 -89
- package/src/scripts/config/profiles.py +0 -191
- package/src/scripts/config/session_profiles.py +0 -542
- package/src/scripts/context_hygiene_hook.py +0 -181
- package/src/scripts/cost_by_conversation.py +0 -78
- package/src/scripts/cost_summary.py +0 -97
- package/src/scripts/council_cli.py +0 -2571
- package/src/scripts/council_prune.py +0 -81
- package/src/scripts/cross_repo_retrieve.py +0 -172
- package/src/scripts/discovery_stats.py +0 -70
- package/src/scripts/extract_audit_patterns.py +0 -202
- package/src/scripts/first_run_gate_hook.py +0 -179
- package/src/scripts/gen_discovery_baseline.py +0 -127
- package/src/scripts/generate_catalog.py +0 -116
- package/src/scripts/generate_command_flows.py +0 -191
- package/src/scripts/generate_index.py +0 -303
- package/src/scripts/generate_ownership_matrix.py +0 -378
- package/src/scripts/generate_pack_manifests.py +0 -340
- package/src/scripts/hooks/__init__.py +0 -1
- package/src/scripts/hooks/dispatch_hook.py +0 -461
- package/src/scripts/hooks/dispatch_issues.py +0 -136
- package/src/scripts/hooks/envelope.py +0 -98
- package/src/scripts/hooks/replay_hook.py +0 -144
- package/src/scripts/hooks/state_io.py +0 -145
- package/src/scripts/hooks_doctor.py +0 -223
- package/src/scripts/hooks_status.py +0 -157
- package/src/scripts/injection_scan_hook.py +0 -145
- package/src/scripts/install.py +0 -5258
- package/src/scripts/inventory_abstraction_budget.py +0 -622
- package/src/scripts/inventory_frontmatter.py +0 -164
- package/src/scripts/inventory_meta_layers.py +0 -288
- package/src/scripts/iron_law_sha.py +0 -107
- package/src/scripts/linked_projects_list.py +0 -91
- package/src/scripts/lint_agent_security.py +0 -112
- package/src/scripts/lint_agent_skill_names.py +0 -150
- package/src/scripts/lint_agents_layout.py +0 -197
- package/src/scripts/lint_agents_md.py +0 -210
- package/src/scripts/lint_archived_skills.py +0 -159
- package/src/scripts/lint_artefact_frontmatter.py +0 -188
- package/src/scripts/lint_bench_ab.py +0 -173
- package/src/scripts/lint_bench_corpus.py +0 -255
- package/src/scripts/lint_command_flow_coverage.py +0 -132
- package/src/scripts/lint_command_routing.py +0 -160
- package/src/scripts/lint_command_tiers.py +0 -216
- package/src/scripts/lint_command_verbs.py +0 -206
- package/src/scripts/lint_commit_subjects.py +0 -139
- package/src/scripts/lint_context_spine_usage.py +0 -137
- package/src/scripts/lint_discovery_manifest.py +0 -176
- package/src/scripts/lint_discovery_vocabulary.py +0 -222
- package/src/scripts/lint_empty_roadmaps.py +0 -80
- package/src/scripts/lint_examples.py +0 -102
- package/src/scripts/lint_explain_trace.py +0 -80
- package/src/scripts/lint_featured_skills.py +0 -144
- package/src/scripts/lint_flows.py +0 -215
- package/src/scripts/lint_framework_leakage.py +0 -375
- package/src/scripts/lint_frontmatter_boilerplate.py +0 -77
- package/src/scripts/lint_ghostwriter_source.py +0 -242
- package/src/scripts/lint_global_paths.py +0 -148
- package/src/scripts/lint_handoffs.py +0 -217
- package/src/scripts/lint_hidden_unicode.py +0 -132
- package/src/scripts/lint_hook_concern_budget.py +0 -207
- package/src/scripts/lint_hook_manifest.py +0 -217
- package/src/scripts/lint_instruction_smuggling.py +0 -107
- package/src/scripts/lint_load_context.py +0 -196
- package/src/scripts/lint_marketplace.py +0 -180
- package/src/scripts/lint_marketplace_install_completeness.py +0 -198
- package/src/scripts/lint_mcp_config_security.py +0 -124
- package/src/scripts/lint_mcp_registry_manifest.py +0 -69
- package/src/scripts/lint_media_policy_linkage.py +0 -140
- package/src/scripts/lint_model_tier_coverage.py +0 -73
- package/src/scripts/lint_namespace.py +0 -135
- package/src/scripts/lint_namespace_collisions.py +0 -103
- package/src/scripts/lint_new_skill_gate.py +0 -144
- package/src/scripts/lint_no_new_atomic_commands.py +0 -180
- package/src/scripts/lint_one_off_age.py +0 -184
- package/src/scripts/lint_orchestration_dsl.py +0 -217
- package/src/scripts/lint_orchestrator_auto_detect.py +0 -111
- package/src/scripts/lint_pack_boundaries.py +0 -147
- package/src/scripts/lint_pack_dependencies.py +0 -137
- package/src/scripts/lint_pack_first_win.py +0 -121
- package/src/scripts/lint_persona_governance.py +0 -164
- package/src/scripts/lint_positioning.py +0 -143
- package/src/scripts/lint_profile_overlay_set_only.py +0 -179
- package/src/scripts/lint_readme_jargon.py +0 -131
- package/src/scripts/lint_readme_size.py +0 -33
- package/src/scripts/lint_regression.py +0 -251
- package/src/scripts/lint_roadmap_ci_steps.py +0 -186
- package/src/scripts/lint_roadmap_complexity.py +0 -220
- package/src/scripts/lint_role_experiences.py +0 -255
- package/src/scripts/lint_rule_interactions.py +0 -170
- package/src/scripts/lint_rule_tiers.py +0 -90
- package/src/scripts/lint_showcase_sessions.py +0 -148
- package/src/scripts/lint_skill_frontmatter_safety.py +0 -144
- package/src/scripts/lint_skill_tools.py +0 -168
- package/src/scripts/lint_topics_yaml.py +0 -89
- package/src/scripts/lint_trust_coherence.py +0 -212
- package/src/scripts/lint_value_dashboard.py +0 -218
- package/src/scripts/lint_workspace_boundary.py +0 -122
- package/src/scripts/mcp_parity_smoke.py +0 -316
- package/src/scripts/mcp_render.py +0 -173
- package/src/scripts/mcp_server/__init__.py +0 -19
- package/src/scripts/mcp_server/__main__.py +0 -12
- package/src/scripts/mcp_server/catalog.py +0 -125
- package/src/scripts/mcp_server/metadata.py +0 -75
- package/src/scripts/mcp_server/prompts.py +0 -442
- package/src/scripts/mcp_server/requirements.txt +0 -4
- package/src/scripts/mcp_server/resources.py +0 -201
- package/src/scripts/mcp_server/server.py +0 -270
- package/src/scripts/mcp_server/telemetry.py +0 -128
- package/src/scripts/mcp_server/tools.py +0 -926
- package/src/scripts/mcp_telemetry_health.py +0 -214
- package/src/scripts/mcp_telemetry_query.py +0 -203
- package/src/scripts/mcp_telemetry_store.py +0 -211
- package/src/scripts/measure_augment_budget.py +0 -214
- package/src/scripts/measure_density.py +0 -232
- package/src/scripts/measure_frugality_savings.py +0 -164
- package/src/scripts/measure_markitdown_lift.py +0 -127
- package/src/scripts/measure_patterns.py +0 -376
- package/src/scripts/measure_projection_bytes.py +0 -159
- package/src/scripts/measure_rule_budget.py +0 -347
- package/src/scripts/measure_skill_reduction.py +0 -102
- package/src/scripts/memory_hash.py +0 -75
- package/src/scripts/memory_lookup.py +0 -436
- package/src/scripts/memory_report.py +0 -314
- package/src/scripts/memory_signal.py +0 -165
- package/src/scripts/memory_status.py +0 -76
- package/src/scripts/migrate_command_suggestions.py +0 -151
- package/src/scripts/migrate_frontmatter_defaults.py +0 -245
- package/src/scripts/mine_session.py +0 -356
- package/src/scripts/minimal_safe_diff_hook.py +0 -245
- package/src/scripts/move_artefact.py +0 -143
- package/src/scripts/new_skill.py +0 -148
- package/src/scripts/onboarding_gate_hook.py +0 -142
- package/src/scripts/pack_mcp_content.py +0 -293
- package/src/scripts/plan_physical_move.py +0 -353
- package/src/scripts/prediction-pool/poisson_sim.py +0 -167
- package/src/scripts/prediction-pool/pool_winsim.py +0 -236
- package/src/scripts/prediction-pool/score_ev.py +0 -188
- package/src/scripts/print_required_checks.py +0 -196
- package/src/scripts/probe_projection_fidelity.py +0 -202
- package/src/scripts/probe_skill_registration.py +0 -413
- package/src/scripts/profile_staleness_hook.py +0 -69
- package/src/scripts/profile_use.py +0 -164
- package/src/scripts/project_thin_rules.py +0 -168
- package/src/scripts/propose_modules_config.py +0 -145
- package/src/scripts/prototype_lint_contradictions.py +0 -150
- package/src/scripts/prove_pack_extractable.py +0 -187
- package/src/scripts/readme_linter.py +0 -589
- package/src/scripts/redact_hook_capture.py +0 -148
- package/src/scripts/refine_ticket_detect.py +0 -646
- package/src/scripts/release.py +0 -1091
- package/src/scripts/render_benchmark_md.py +0 -401
- package/src/scripts/render_value_md.py +0 -347
- package/src/scripts/requirements-evals.txt +0 -8
- package/src/scripts/roadmap_progress_hook.py +0 -274
- package/src/scripts/router_telemetry.py +0 -470
- package/src/scripts/run_skill_evals.py +0 -185
- package/src/scripts/runtime_dispatcher.py +0 -276
- package/src/scripts/runtime_handler.py +0 -148
- package/src/scripts/runtime_registry.py +0 -166
- package/src/scripts/score_skill_selection.py +0 -198
- package/src/scripts/security_audit_config.py +0 -153
- package/src/scripts/setup_eval_venv.sh +0 -58
- package/src/scripts/skill_collision_clusters.py +0 -162
- package/src/scripts/skill_discovery.py +0 -254
- package/src/scripts/skill_linter.py +0 -3694
- package/src/scripts/skill_overlap.py +0 -204
- package/src/scripts/skill_preview.py +0 -179
- package/src/scripts/skill_tools/__init__.py +0 -22
- package/src/scripts/skill_tools/audit_persona_coverage.py +0 -147
- package/src/scripts/skill_tools/audit_user_type_coverage.py +0 -148
- package/src/scripts/skill_tools/run_block_d_eval.py +0 -129
- package/src/scripts/skill_tools/score_skill_relevance.py +0 -169
- package/src/scripts/skill_tools/suggest_skill_for_task.py +0 -113
- package/src/scripts/skill_trigger_eval.py +0 -682
- package/src/scripts/skill_usage_collect.py +0 -191
- package/src/scripts/skill_usage_report.py +0 -162
- package/src/scripts/smoke_path_resolution.py +0 -93
- package/src/scripts/smoke_quickstart.py +0 -144
- package/src/scripts/snapshot_agent_outputs.py +0 -144
- package/src/scripts/spotcheck_thin_root.py +0 -134
- package/src/scripts/sync_agent_settings.py +0 -180
- package/src/scripts/sync_github_metadata.py +0 -147
- package/src/scripts/sync_gitignore.py +0 -291
- package/src/scripts/sync_yaml_rt.py +0 -734
- package/src/scripts/telegraph_stats.py +0 -119
- package/src/scripts/tool_registry.py +0 -146
- package/src/scripts/tools/__init__.py +0 -1
- package/src/scripts/tools/adapter_errors.py +0 -63
- package/src/scripts/tools/base_adapter.py +0 -91
- package/src/scripts/tools/github_adapter.py +0 -128
- package/src/scripts/tools/jira_adapter.py +0 -115
- package/src/scripts/trigger_coverage.py +0 -129
- package/src/scripts/update_counts.py +0 -199
- package/src/scripts/update_prices.py +0 -125
- package/src/scripts/validate_agent_settings.py +0 -124
- package/src/scripts/validate_decision_engine.py +0 -136
- package/src/scripts/validate_discovery_manifest.py +0 -94
- package/src/scripts/validate_frontmatter.py +0 -647
- package/src/scripts/validate_pack_yaml.py +0 -179
- package/src/scripts/validate_safe_paths.py +0 -118
- package/src/scripts/validate_telegraph_carveouts.py +0 -129
- package/src/scripts/verify_before_complete_hook.py +0 -216
- package/src/scripts/verify_physical_move.py +0 -185
- package/src/scripts/wrapper_freshness_hook.py +0 -86
- /package/dist/agent-src/skills/design-intelligence/data/{typography.csv → font-pairings-reference.csv} +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/allin1/analysis.json +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/comfyui/scene-0001.mp4 +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/fal/scene-0001.mp4 +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/gemini-veo/scene-0001.mp4 +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/higgsfield/scene-0001.mp4 +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/kling/scene-0001.mp4 +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/musetalk/lipsync-0001.mp4 +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/openai-images/scene-0001.png +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/replicate/scene-0001.mp4 +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/sora/scene-0001.mp4 +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/syncso/lipsync-0001.mp4 +0 -0
- /package/src/scripts/{ai-video → media}/lib/fixtures/whisperx/transcript.json +0 -0
- /package/src/scripts/{ai-video → media}/lib/telemetry.sh +0 -0
|
@@ -1,814 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Track B — task runner for the package-impact A/B bench.
|
|
3
|
-
|
|
4
|
-
Phase 4 Step 2 of `agents/roadmaps/road-to-package-impact-benchmark.md`.
|
|
5
|
-
|
|
6
|
-
For each task in `internal/bench/corpora/ab-trackb.yaml`, in each variant:
|
|
7
|
-
|
|
8
|
-
1. Snapshot the variant clone's file tree.
|
|
9
|
-
2. Invoke the `claude` CLI with the task prompt — OR dry-run, depending
|
|
10
|
-
on `--mode`.
|
|
11
|
-
3. Capture the transcript, tool-call events, wall-time, and (if available)
|
|
12
|
-
token + cost counts.
|
|
13
|
-
4. Snapshot the post-run tree.
|
|
14
|
-
5. Score the task via scripts/_lib/bench_ab_scoring.py.
|
|
15
|
-
|
|
16
|
-
Modes:
|
|
17
|
-
|
|
18
|
-
- `dry-run` (default) — record the would-run shell command, write a stub
|
|
19
|
-
transcript naming the variant, score against the unchanged tree. The
|
|
20
|
-
result is structural-zero for every check that requires a file write,
|
|
21
|
-
but the scoring + reporting pipeline runs end-to-end. This is what the
|
|
22
|
-
bench produces in CI by default — fast, free, repeatable.
|
|
23
|
-
- `live` — actually invoke the `claude` CLI with `--print` (one-shot
|
|
24
|
-
mode) and the task prompt. Reads `CLAUDE_CLI` from env if set, falls
|
|
25
|
-
back to `claude` on PATH. Captures stdout as the transcript. Honors
|
|
26
|
-
`--samples N` for repeated runs.
|
|
27
|
-
|
|
28
|
-
The runner ALWAYS resets the clone to a clean state before each task and
|
|
29
|
-
ALWAYS records the mode in the report header so a reader can never mistake
|
|
30
|
-
a dry-run report for a real measurement.
|
|
31
|
-
"""
|
|
32
|
-
from __future__ import annotations
|
|
33
|
-
|
|
34
|
-
import argparse
|
|
35
|
-
import hashlib
|
|
36
|
-
import json
|
|
37
|
-
import os
|
|
38
|
-
import shutil
|
|
39
|
-
import subprocess
|
|
40
|
-
import sys
|
|
41
|
-
import threading
|
|
42
|
-
import time
|
|
43
|
-
from datetime import datetime, timezone
|
|
44
|
-
from pathlib import Path
|
|
45
|
-
|
|
46
|
-
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
|
|
47
|
-
sys.path.insert(0, str(REPO_ROOT / "src" / "scripts"))
|
|
48
|
-
|
|
49
|
-
from _lib import bench_ab_cache # type: ignore[import-not-found] # noqa: E402
|
|
50
|
-
from _lib import bench_ab_scoring # type: ignore[import-not-found] # noqa: E402
|
|
51
|
-
|
|
52
|
-
try:
|
|
53
|
-
import yaml
|
|
54
|
-
except ImportError:
|
|
55
|
-
sys.stderr.write("bench_ab_task_runner: PyYAML required (pip install pyyaml)\n")
|
|
56
|
-
raise SystemExit(2)
|
|
57
|
-
|
|
58
|
-
CORPUS_PATH = REPO_ROOT / "internal" / "bench" / "corpora" / "ab-trackb.yaml"
|
|
59
|
-
CLONES_DIR = REPO_ROOT / "internal" / "bench" / "ab" / "clones"
|
|
60
|
-
REPORTS_DIR = REPO_ROOT / "internal" / "bench" / "reports" / "ab"
|
|
61
|
-
|
|
62
|
-
# How far we descend into a clone when snapshotting. The fixture is shallow.
|
|
63
|
-
SNAPSHOT_MAX_DEPTH = 6
|
|
64
|
-
|
|
65
|
-
# --- Activation (proven mechanism) ---
|
|
66
|
-
# agent-config is a GLOBAL Claude Code plugin (enabledPlugins in ~/.claude
|
|
67
|
-
# settings), so plain `claude --print` already runs WITH the package. The clean
|
|
68
|
-
# control is `--setting-sources project,local`, which excludes the user settings
|
|
69
|
-
# where `enabledPlugins` lives → plugin OFF, but auth survives. Measured proof:
|
|
70
|
-
# plain --print = ~35.5k input tokens; --setting-sources project,local = ~11.9k
|
|
71
|
-
# → the ~24k delta IS the package's always-on footprint. So:
|
|
72
|
-
# without = `--setting-sources project,local` (plugin OFF, base model)
|
|
73
|
-
# with = plain `--print` (the real installed plugin = package)
|
|
74
|
-
# with-rdp = plain `--print` + RDP rules injected (RDP not yet in the release plugin)
|
|
75
|
-
# (`--bare` is NOT used — it disables auth too.)
|
|
76
|
-
RDP_EXTRA_FILES = (
|
|
77
|
-
REPO_ROOT / "src" / "rules" / "notes-first-reasoning.md",
|
|
78
|
-
REPO_ROOT / "src" / "agent-src" / "contexts" / "execution" / "rdp-gate.md",
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def _concat_rules(paths) -> str:
|
|
83
|
-
parts: list[str] = []
|
|
84
|
-
for p in paths:
|
|
85
|
-
try:
|
|
86
|
-
parts.append(p.read_text(encoding="utf-8"))
|
|
87
|
-
except OSError:
|
|
88
|
-
continue
|
|
89
|
-
return "\n\n---\n\n".join(parts)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def system_prompt_for(variant: str) -> str | None:
|
|
93
|
-
"""Extra rules injected on top of the plugin. Only `with-rdp` injects (the RDP
|
|
94
|
-
artifacts aren't in the released plugin yet); `with` uses the real plugin,
|
|
95
|
-
`without` runs plugin-off."""
|
|
96
|
-
if variant == "with-rdp":
|
|
97
|
-
return _concat_rules([p for p in RDP_EXTRA_FILES if p.exists()])
|
|
98
|
-
return None
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def setting_sources_for(variant: str) -> str | None:
|
|
102
|
-
"""`without` excludes user settings to drop the global plugin (auth survives)."""
|
|
103
|
-
return "project,local" if variant == "without" else None
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def utc_stamp() -> str:
|
|
107
|
-
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def snapshot_clone(clone_root: Path, *, max_depth: int = SNAPSHOT_MAX_DEPTH) -> dict[str, str]:
|
|
111
|
-
"""Return {relpath: sha256-short} for every fixture file under the clone.
|
|
112
|
-
|
|
113
|
-
Skips the agent-config surface (.claude, .augment, AGENTS.md, CLAUDE.md, manifest)
|
|
114
|
-
because that's the variant axis, not the task surface.
|
|
115
|
-
"""
|
|
116
|
-
skip_roots = {".claude", ".augment"}
|
|
117
|
-
skip_files = {"AGENTS.md", "CLAUDE.md", ".bench-ab-manifest.json"}
|
|
118
|
-
out: dict[str, str] = {}
|
|
119
|
-
for path in sorted(clone_root.rglob("*")):
|
|
120
|
-
if not path.is_file():
|
|
121
|
-
continue
|
|
122
|
-
rel = path.relative_to(clone_root)
|
|
123
|
-
parts = rel.parts
|
|
124
|
-
if parts and parts[0] in skip_roots:
|
|
125
|
-
continue
|
|
126
|
-
if rel.as_posix() in skip_files:
|
|
127
|
-
continue
|
|
128
|
-
if len(parts) > max_depth:
|
|
129
|
-
continue
|
|
130
|
-
h = hashlib.sha256()
|
|
131
|
-
try:
|
|
132
|
-
h.update(path.read_bytes())
|
|
133
|
-
except OSError:
|
|
134
|
-
continue
|
|
135
|
-
out[rel.as_posix()] = h.hexdigest()[:16]
|
|
136
|
-
return out
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def reset_clone(variant: str) -> Path:
|
|
140
|
-
"""Rebuild the clone so each task starts from the same state."""
|
|
141
|
-
import importlib.util
|
|
142
|
-
|
|
143
|
-
spec = importlib.util.spec_from_file_location(
|
|
144
|
-
"bench_ab_clone", REPO_ROOT / "src" / "scripts" / "bench_ab_clone.py"
|
|
145
|
-
)
|
|
146
|
-
if spec is None or spec.loader is None:
|
|
147
|
-
raise RuntimeError("cannot load bench_ab_clone helper")
|
|
148
|
-
module = importlib.util.module_from_spec(spec)
|
|
149
|
-
spec.loader.exec_module(module)
|
|
150
|
-
return module.clone(variant, refresh=True, quiet=True) # type: ignore[attr-defined]
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
def claude_executable() -> str | None:
|
|
154
|
-
"""Resolve the claude CLI binary (env override → PATH)."""
|
|
155
|
-
override = os.environ.get("CLAUDE_CLI")
|
|
156
|
-
if override:
|
|
157
|
-
return override
|
|
158
|
-
# Resolve to an absolute path so the subprocess (run with cwd=clone_root)
|
|
159
|
-
# cannot miss it on a PATH/cwd quirk — the failure that showed up as a
|
|
160
|
-
# spurious "claude CLI not found" on a later arm of the first full run.
|
|
161
|
-
return shutil.which("claude")
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def run_live(
|
|
165
|
-
task: dict,
|
|
166
|
-
clone_root: Path,
|
|
167
|
-
*,
|
|
168
|
-
timeout_s: int,
|
|
169
|
-
sysprompt_file: "Path | None" = None,
|
|
170
|
-
setting_sources: "str | None" = None,
|
|
171
|
-
max_budget: "float | None" = None,
|
|
172
|
-
model: "str | None" = None,
|
|
173
|
-
) -> dict:
|
|
174
|
-
"""Invoke claude in print/one-shot mode against the task prompt.
|
|
175
|
-
|
|
176
|
-
`setting_sources` (e.g. "project,local") drops the global plugin for the
|
|
177
|
-
`without` arm while keeping auth. `sysprompt_file` injects extra rules
|
|
178
|
-
(the `with-rdp` arm). `with` passes neither → the real installed plugin.
|
|
179
|
-
"""
|
|
180
|
-
binary = claude_executable()
|
|
181
|
-
if binary is None:
|
|
182
|
-
return {
|
|
183
|
-
"mode": "live-skipped",
|
|
184
|
-
"reason": "claude CLI not found; set CLAUDE_CLI or install it",
|
|
185
|
-
"transcript": "",
|
|
186
|
-
"exit_code": None,
|
|
187
|
-
"wall_time_seconds": 0.0,
|
|
188
|
-
"tokens": 0,
|
|
189
|
-
"tokens_breakdown": {},
|
|
190
|
-
"errored": True,
|
|
191
|
-
}
|
|
192
|
-
prompt = task.get("prompt", "")
|
|
193
|
-
# --output-format json yields a `usage` block for token counts. The global
|
|
194
|
-
# plugin is dropped per-arm via --setting-sources (NOT --bare, which kills auth).
|
|
195
|
-
# bypassPermissions on EVERY arm: the clone is a throwaway fixture, and this
|
|
196
|
-
# equalizes file-edit capability across arms (else `without`, which excludes
|
|
197
|
-
# user settings, would lack edit perms and fail tasks for the wrong reason).
|
|
198
|
-
cmd = [binary, "--print", "--output-format", "json", "--permission-mode", "bypassPermissions"]
|
|
199
|
-
if model:
|
|
200
|
-
# Pin ONE model across every arm. The session default here is Opus-4.8-1M,
|
|
201
|
-
# whose ~$1.78 first-turn cache-creation trips any sane budget cap instantly
|
|
202
|
-
# and makes a full corpus run blow the account quota. Holding the model
|
|
203
|
-
# constant is also a validity requirement: the bench measures the package
|
|
204
|
-
# LIFT on a fixed host, not model-vs-model.
|
|
205
|
-
cmd += ["--model", model]
|
|
206
|
-
if max_budget:
|
|
207
|
-
# Caps per-task API spend so one runaway agentic loop can't exhaust the
|
|
208
|
-
# account quota (the failure mode that starved later arms on the first run).
|
|
209
|
-
cmd += ["--max-budget-usd", str(max_budget)]
|
|
210
|
-
if setting_sources:
|
|
211
|
-
cmd += ["--setting-sources", setting_sources]
|
|
212
|
-
if sysprompt_file is not None:
|
|
213
|
-
cmd += ["--append-system-prompt-file", str(sysprompt_file)]
|
|
214
|
-
cmd += ["--", prompt]
|
|
215
|
-
started = time.monotonic()
|
|
216
|
-
try:
|
|
217
|
-
proc = subprocess.run(
|
|
218
|
-
cmd,
|
|
219
|
-
cwd=clone_root,
|
|
220
|
-
capture_output=True,
|
|
221
|
-
text=True,
|
|
222
|
-
timeout=timeout_s,
|
|
223
|
-
check=False,
|
|
224
|
-
)
|
|
225
|
-
except subprocess.TimeoutExpired as exc:
|
|
226
|
-
return {
|
|
227
|
-
"mode": "live",
|
|
228
|
-
"reason": f"timeout after {timeout_s}s",
|
|
229
|
-
"transcript": (exc.stdout or "") + "\n[TIMEOUT]",
|
|
230
|
-
"exit_code": -1,
|
|
231
|
-
"wall_time_seconds": round(time.monotonic() - started, 3),
|
|
232
|
-
"tokens": 0,
|
|
233
|
-
"tokens_breakdown": {},
|
|
234
|
-
"errored": True,
|
|
235
|
-
}
|
|
236
|
-
duration = time.monotonic() - started
|
|
237
|
-
# Parse the JSON envelope: `result` is the model text; `usage` holds tokens.
|
|
238
|
-
transcript = proc.stdout
|
|
239
|
-
tokens = 0
|
|
240
|
-
is_error = False
|
|
241
|
-
err_reason = "ok"
|
|
242
|
-
num_turns = 0
|
|
243
|
-
subtype = ""
|
|
244
|
-
breakdown = {
|
|
245
|
-
"input_tokens": 0,
|
|
246
|
-
"output_tokens": 0,
|
|
247
|
-
"cache_read_input_tokens": 0,
|
|
248
|
-
"cache_creation_input_tokens": 0,
|
|
249
|
-
}
|
|
250
|
-
try:
|
|
251
|
-
obj = json.loads(proc.stdout)
|
|
252
|
-
is_error = bool(obj.get("is_error"))
|
|
253
|
-
transcript = obj.get("result") or obj.get("text") or proc.stdout
|
|
254
|
-
usage = obj.get("usage") or {}
|
|
255
|
-
breakdown = {
|
|
256
|
-
k: int(usage.get(k, 0) or 0)
|
|
257
|
-
for k in (
|
|
258
|
-
"input_tokens",
|
|
259
|
-
"output_tokens",
|
|
260
|
-
"cache_read_input_tokens",
|
|
261
|
-
"cache_creation_input_tokens",
|
|
262
|
-
)
|
|
263
|
-
}
|
|
264
|
-
tokens = sum(breakdown.values())
|
|
265
|
-
# The top-level `usage` block is zeroed on a budget-capped / errored run
|
|
266
|
-
# (and unreliable even on some completions). `modelUsage` carries the
|
|
267
|
-
# authoritative per-model counts — sum it as the fallback so token deltas
|
|
268
|
-
# survive even when a task hits its cap mid-flight.
|
|
269
|
-
if tokens == 0:
|
|
270
|
-
mu = obj.get("modelUsage") or {}
|
|
271
|
-
agg = {
|
|
272
|
-
"input_tokens": 0,
|
|
273
|
-
"output_tokens": 0,
|
|
274
|
-
"cache_read_input_tokens": 0,
|
|
275
|
-
"cache_creation_input_tokens": 0,
|
|
276
|
-
}
|
|
277
|
-
for stats in mu.values():
|
|
278
|
-
agg["input_tokens"] += int(stats.get("inputTokens", 0) or 0)
|
|
279
|
-
agg["output_tokens"] += int(stats.get("outputTokens", 0) or 0)
|
|
280
|
-
agg["cache_read_input_tokens"] += int(
|
|
281
|
-
stats.get("cacheReadInputTokens", 0) or 0
|
|
282
|
-
)
|
|
283
|
-
agg["cache_creation_input_tokens"] += int(
|
|
284
|
-
stats.get("cacheCreationInputTokens", 0) or 0
|
|
285
|
-
)
|
|
286
|
-
mu_total = sum(agg.values())
|
|
287
|
-
if mu_total > 0:
|
|
288
|
-
breakdown = agg
|
|
289
|
-
tokens = mu_total
|
|
290
|
-
num_turns = int(obj.get("num_turns", 0) or 0)
|
|
291
|
-
subtype = str(obj.get("subtype") or "")
|
|
292
|
-
# Surface WHY a task errored (budget cap vs. other) without leaking $.
|
|
293
|
-
if is_error:
|
|
294
|
-
err_reason = obj.get("subtype") or "error"
|
|
295
|
-
except (json.JSONDecodeError, AttributeError, ValueError):
|
|
296
|
-
transcript = proc.stdout
|
|
297
|
-
return {
|
|
298
|
-
"mode": "live",
|
|
299
|
-
"reason": err_reason if is_error else ("ok" if proc.returncode == 0 else f"exit {proc.returncode}"),
|
|
300
|
-
"transcript": str(transcript) + "\n" + proc.stderr,
|
|
301
|
-
"exit_code": proc.returncode,
|
|
302
|
-
"wall_time_seconds": round(duration, 3),
|
|
303
|
-
"tokens": tokens,
|
|
304
|
-
"tokens_breakdown": breakdown,
|
|
305
|
-
"errored": is_error or proc.returncode != 0,
|
|
306
|
-
"num_turns": num_turns,
|
|
307
|
-
"subtype": subtype,
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
def run_dry(task: dict, clone_root: Path, variant: str) -> dict:
|
|
312
|
-
"""Record what would have run; produce a deterministic stub transcript.
|
|
313
|
-
|
|
314
|
-
The stub deliberately does NOT echo the user prompt: doing so would let
|
|
315
|
-
transcript-keyword criteria spuriously match against the prompt text
|
|
316
|
-
instead of the agent's response. The stub is therefore inert for every
|
|
317
|
-
`transcript_contains_*` criterion, which is the honest dry-run signal.
|
|
318
|
-
"""
|
|
319
|
-
stub_transcript = (
|
|
320
|
-
"[bench_ab_task_runner dry-run]\n"
|
|
321
|
-
f"variant={variant}\n"
|
|
322
|
-
f"clone={clone_root}\n"
|
|
323
|
-
f"task_id={task.get('id')}\n"
|
|
324
|
-
"[no claude invocation; --mode live to execute for real]\n"
|
|
325
|
-
)
|
|
326
|
-
return {
|
|
327
|
-
"mode": "dry-run",
|
|
328
|
-
"reason": "ok",
|
|
329
|
-
"transcript": stub_transcript,
|
|
330
|
-
"exit_code": 0,
|
|
331
|
-
"wall_time_seconds": 0.0,
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
def count_ask_events(transcript: str) -> dict[str, int]:
|
|
336
|
-
"""Crude ask-vs-act heuristic over the transcript."""
|
|
337
|
-
if not transcript:
|
|
338
|
-
return {"asked": 0, "acted_with_commit": 0, "ratio": 0}
|
|
339
|
-
lt = transcript.lower()
|
|
340
|
-
ask_markers = ["should i", "do you want", "shall i", "soll ich", "möchtest du"]
|
|
341
|
-
asked = sum(lt.count(m) for m in ask_markers)
|
|
342
|
-
commit_markers = ["git commit", "git push", "gh pr create", "gh pr merge"]
|
|
343
|
-
acted = sum(lt.count(m) for m in commit_markers)
|
|
344
|
-
total = asked + acted
|
|
345
|
-
ratio = round(asked / total, 3) if total else 0
|
|
346
|
-
return {"asked": asked, "acted_with_commit": acted, "ratio": ratio}
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
PROGRESS_PATH = REPORTS_DIR / ".progress.json"
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
def _write_progress(state: dict) -> None:
|
|
353
|
-
"""Mirror live state to .progress.json for `task bench:ab:watch` (best-effort)."""
|
|
354
|
-
try:
|
|
355
|
-
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
356
|
-
PROGRESS_PATH.write_text(json.dumps(state, indent=2) + "\n")
|
|
357
|
-
except OSError:
|
|
358
|
-
pass
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
class Progress:
|
|
362
|
-
"""Live per-task progress. stdlib-only, TTY-aware, log-safe.
|
|
363
|
-
|
|
364
|
-
style: auto (bar if stderr is a TTY, else one plain line per task) | bar |
|
|
365
|
-
plain | none. Mirrors state to .progress.json regardless of style.
|
|
366
|
-
"""
|
|
367
|
-
|
|
368
|
-
BAR_WIDTH = 24
|
|
369
|
-
|
|
370
|
-
def __init__(self, total: int, *, mode: str, style: str = "auto", stream=sys.stderr) -> None:
|
|
371
|
-
self.total = max(total, 1)
|
|
372
|
-
self.mode = mode
|
|
373
|
-
self.stream = stream
|
|
374
|
-
self.done = 0
|
|
375
|
-
self.started = time.monotonic()
|
|
376
|
-
if style in ("bar", "plain", "none"):
|
|
377
|
-
self.kind = style
|
|
378
|
-
else: # auto
|
|
379
|
-
self.kind = "bar" if getattr(stream, "isatty", lambda: False)() else "plain"
|
|
380
|
-
self._cur = ""
|
|
381
|
-
self._task_started = 0.0
|
|
382
|
-
self._hb_stop: "threading.Event | None" = None
|
|
383
|
-
self._hb_thread: "threading.Thread | None" = None
|
|
384
|
-
|
|
385
|
-
def _elapsed(self, since: float) -> str:
|
|
386
|
-
s = int(time.monotonic() - since)
|
|
387
|
-
return f"{s // 60}m{s % 60:02d}s" if s >= 60 else f"{s}s"
|
|
388
|
-
|
|
389
|
-
def _bar(self) -> str:
|
|
390
|
-
filled = int(self.BAR_WIDTH * self.done / self.total)
|
|
391
|
-
return "█" * filled + "░" * (self.BAR_WIDTH - filled)
|
|
392
|
-
|
|
393
|
-
def _render_bar(self, suffix: str = "") -> None:
|
|
394
|
-
line = f"\r[{self._bar()}] {self.done}/{self.total} · {self._cur} · {self._elapsed(self.started)}{suffix}"
|
|
395
|
-
self.stream.write(line.ljust(90)[:160])
|
|
396
|
-
self.stream.flush()
|
|
397
|
-
|
|
398
|
-
def _start_heartbeat(self) -> None:
|
|
399
|
-
if self.kind != "bar" or self.mode != "live":
|
|
400
|
-
return
|
|
401
|
-
self._hb_stop = threading.Event()
|
|
402
|
-
|
|
403
|
-
def _tick() -> None:
|
|
404
|
-
assert self._hb_stop is not None
|
|
405
|
-
while not self._hb_stop.wait(1.0):
|
|
406
|
-
self._render_bar(suffix=f" · {self._elapsed(self._task_started)}…")
|
|
407
|
-
|
|
408
|
-
self._hb_thread = threading.Thread(target=_tick, daemon=True)
|
|
409
|
-
self._hb_thread.start()
|
|
410
|
-
|
|
411
|
-
def _stop_heartbeat(self) -> None:
|
|
412
|
-
if self._hb_stop is not None:
|
|
413
|
-
self._hb_stop.set()
|
|
414
|
-
if self._hb_thread is not None:
|
|
415
|
-
self._hb_thread.join(timeout=2.0)
|
|
416
|
-
self._hb_stop = self._hb_thread = None
|
|
417
|
-
|
|
418
|
-
def start_task(self, variant: str, idx: int, count: int, task_id: str) -> None:
|
|
419
|
-
self._cur = f"{variant} {idx}/{count} · {task_id}"
|
|
420
|
-
self._task_started = time.monotonic()
|
|
421
|
-
_write_progress({
|
|
422
|
-
"mode": self.mode, "variant": variant, "task_idx": idx, "task_count": count,
|
|
423
|
-
"total_done": self.done, "total": self.total, "current_id": task_id,
|
|
424
|
-
"started_at": utc_stamp(), "last_result": None,
|
|
425
|
-
})
|
|
426
|
-
if self.kind == "none":
|
|
427
|
-
return
|
|
428
|
-
if self.kind == "bar":
|
|
429
|
-
self._render_bar(suffix=" · running…" if self.mode == "live" else "")
|
|
430
|
-
self._start_heartbeat()
|
|
431
|
-
elif self.mode == "live": # plain: a start marker so a long task isn't mistaken for a hang
|
|
432
|
-
self.stream.write(f"[{self.done + 1}/{self.total}] ▶ {self._cur}\n")
|
|
433
|
-
self.stream.flush()
|
|
434
|
-
|
|
435
|
-
def end_task(self, *, passed: bool, wall: float, variant: str, task_id: str) -> None:
|
|
436
|
-
self._stop_heartbeat()
|
|
437
|
-
self.done += 1
|
|
438
|
-
mark = "✓" if passed else "✗"
|
|
439
|
-
_write_progress({
|
|
440
|
-
"mode": self.mode, "variant": variant, "total_done": self.done,
|
|
441
|
-
"total": self.total, "current_id": task_id, "updated_at": utc_stamp(),
|
|
442
|
-
"last_result": "pass" if passed else "fail",
|
|
443
|
-
})
|
|
444
|
-
if self.kind == "none":
|
|
445
|
-
return
|
|
446
|
-
if self.kind == "bar":
|
|
447
|
-
self._render_bar(suffix=f" · {mark}")
|
|
448
|
-
else:
|
|
449
|
-
self.stream.write(f"[{self.done}/{self.total}] {mark} {variant} · {task_id} · {wall:.1f}s\n")
|
|
450
|
-
self.stream.flush()
|
|
451
|
-
|
|
452
|
-
def variant_done(self, line: str) -> None:
|
|
453
|
-
"""Print a per-variant summary line without corrupting an active bar."""
|
|
454
|
-
if self.kind == "bar":
|
|
455
|
-
self.stream.write("\n")
|
|
456
|
-
self.stream.write(line if line.endswith("\n") else line + "\n")
|
|
457
|
-
self.stream.flush()
|
|
458
|
-
|
|
459
|
-
def finish(self) -> None:
|
|
460
|
-
if self.kind == "bar":
|
|
461
|
-
self.stream.write("\n")
|
|
462
|
-
if self.kind != "none":
|
|
463
|
-
self.stream.write(
|
|
464
|
-
f"bench progress: {self.done}/{self.total} tasks · total {self._elapsed(self.started)}\n"
|
|
465
|
-
)
|
|
466
|
-
self.stream.flush()
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
def per_category_aggregate(per_task: list[dict]) -> dict[str, dict]:
|
|
470
|
-
by_cat: dict[str, list[dict]] = {}
|
|
471
|
-
for entry in per_task:
|
|
472
|
-
by_cat.setdefault(entry.get("category", "unknown"), []).append(entry)
|
|
473
|
-
out: dict[str, dict] = {}
|
|
474
|
-
for cat, entries in by_cat.items():
|
|
475
|
-
done = [e for e in entries if not e.get("errored")]
|
|
476
|
-
passed = sum(1 for e in done if e.get("score", {}).get("passed"))
|
|
477
|
-
total = len(entries)
|
|
478
|
-
completed = len(done)
|
|
479
|
-
out[cat] = {
|
|
480
|
-
"passed": passed,
|
|
481
|
-
"total": total,
|
|
482
|
-
"completed": completed,
|
|
483
|
-
"errored": total - completed,
|
|
484
|
-
"completion_rate": round(passed / completed, 4) if completed else 0,
|
|
485
|
-
"mean_wall_time": round(
|
|
486
|
-
sum(e.get("wall_time_seconds", 0) for e in done) / completed, 3
|
|
487
|
-
)
|
|
488
|
-
if completed
|
|
489
|
-
else 0,
|
|
490
|
-
"mean_tokens": round(sum(e.get("tokens", 0) for e in done) / completed)
|
|
491
|
-
if completed
|
|
492
|
-
else 0,
|
|
493
|
-
}
|
|
494
|
-
return out
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
def per_cell_aggregate(per_task: list[dict]) -> dict[str, dict]:
|
|
498
|
-
"""Aggregate by the 2×2 (duration × cognitive) cell — the value-benchmark axis.
|
|
499
|
-
|
|
500
|
-
Compared across conditions this answers "are short tasks more expensive?"
|
|
501
|
-
(cell `short/mechanical`) and "do long tasks get cheaper / better?"
|
|
502
|
-
(cell `long/reasoning-heavy`). Cell key is `"<duration>/<cognitive>"`.
|
|
503
|
-
"""
|
|
504
|
-
by_cell: dict[str, list[dict]] = {}
|
|
505
|
-
for entry in per_task:
|
|
506
|
-
cell = f"{entry.get('duration', 'untagged')}/{entry.get('cognitive', 'untagged')}"
|
|
507
|
-
by_cell.setdefault(cell, []).append(entry)
|
|
508
|
-
out: dict[str, dict] = {}
|
|
509
|
-
for cell, entries in by_cell.items():
|
|
510
|
-
done = [e for e in entries if not e.get("errored")]
|
|
511
|
-
passed = sum(1 for e in done if e.get("score", {}).get("passed"))
|
|
512
|
-
total = len(entries)
|
|
513
|
-
completed = len(done)
|
|
514
|
-
out[cell] = {
|
|
515
|
-
"passed": passed,
|
|
516
|
-
"total": total,
|
|
517
|
-
"completed": completed,
|
|
518
|
-
"errored": total - completed,
|
|
519
|
-
"completion_rate": round(passed / completed, 4) if completed else 0,
|
|
520
|
-
"mean_wall_time": round(
|
|
521
|
-
sum(e.get("wall_time_seconds", 0) for e in done) / completed, 3
|
|
522
|
-
)
|
|
523
|
-
if completed
|
|
524
|
-
else 0,
|
|
525
|
-
"mean_tokens": round(sum(e.get("tokens", 0) for e in done) / completed)
|
|
526
|
-
if completed
|
|
527
|
-
else 0,
|
|
528
|
-
}
|
|
529
|
-
return out
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
def write_report(
|
|
533
|
-
variant: str,
|
|
534
|
-
*,
|
|
535
|
-
mode: str,
|
|
536
|
-
per_task: list[dict],
|
|
537
|
-
duration: float,
|
|
538
|
-
) -> Path:
|
|
539
|
-
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
540
|
-
cache_key = bench_ab_cache.CacheKey(
|
|
541
|
-
corpus_hash=bench_ab_cache.hash_file(CORPUS_PATH),
|
|
542
|
-
claude_cli_version=bench_ab_cache.claude_cli_version(),
|
|
543
|
-
target_shape_hash=bench_ab_cache.target_shape_hash(),
|
|
544
|
-
)
|
|
545
|
-
total = len(per_task)
|
|
546
|
-
done = [e for e in per_task if not e.get("errored")]
|
|
547
|
-
completed = len(done)
|
|
548
|
-
errored = total - completed
|
|
549
|
-
passed = sum(1 for e in done if e.get("score", {}).get("passed"))
|
|
550
|
-
results = {
|
|
551
|
-
"mode": mode,
|
|
552
|
-
# Hit-rate is over COMPLETED tasks only — errored (rate-limit / budget /
|
|
553
|
-
# timeout / CLI-fail) tasks are excluded so a transient quota trip does
|
|
554
|
-
# not read as a content failure of the package.
|
|
555
|
-
"completion_rate": round(passed / completed, 4) if completed else 0,
|
|
556
|
-
"passed": passed,
|
|
557
|
-
"completed": completed,
|
|
558
|
-
"errored": errored,
|
|
559
|
-
"total": total,
|
|
560
|
-
"per_category": per_category_aggregate(per_task),
|
|
561
|
-
"per_cell": per_cell_aggregate(per_task),
|
|
562
|
-
"mean_wall_time": round(
|
|
563
|
-
sum(e.get("wall_time_seconds", 0) for e in done) / completed, 3
|
|
564
|
-
)
|
|
565
|
-
if completed
|
|
566
|
-
else 0,
|
|
567
|
-
"total_tokens": sum(e.get("tokens", 0) for e in done),
|
|
568
|
-
"mean_tokens": round(sum(e.get("tokens", 0) for e in done) / completed)
|
|
569
|
-
if completed
|
|
570
|
-
else 0,
|
|
571
|
-
"ask_vs_act_ratio": round(
|
|
572
|
-
sum(e.get("ask_events", {}).get("ratio", 0) for e in done) / completed, 3
|
|
573
|
-
)
|
|
574
|
-
if completed
|
|
575
|
-
else 0,
|
|
576
|
-
"per_task": per_task,
|
|
577
|
-
}
|
|
578
|
-
stamp = utc_stamp()
|
|
579
|
-
payload = {
|
|
580
|
-
"schema": "ab-bench/0.1",
|
|
581
|
-
"stamp": stamp,
|
|
582
|
-
"variant": variant,
|
|
583
|
-
"corpus": "ab-trackb",
|
|
584
|
-
"cache_key": cache_key.to_dict(),
|
|
585
|
-
"duration_seconds": round(duration, 3),
|
|
586
|
-
"results": results,
|
|
587
|
-
}
|
|
588
|
-
path = REPORTS_DIR / f"{stamp}-ab-trackb-{variant}.json"
|
|
589
|
-
path.write_text(json.dumps(payload, indent=2) + "\n")
|
|
590
|
-
md = path.with_suffix(".md")
|
|
591
|
-
md.write_text(
|
|
592
|
-
f"# Track B · {variant} · {mode}\n\n"
|
|
593
|
-
f"- Stamp: `{stamp}`\n"
|
|
594
|
-
f"- Completion rate: **{results['completion_rate'] * 100:.1f}%**"
|
|
595
|
-
f" ({passed}/{completed} completed; {errored} errored of {total})\n"
|
|
596
|
-
f"- Mean wall-time: {results['mean_wall_time']}s\n"
|
|
597
|
-
f"- Ask vs. act ratio: {results['ask_vs_act_ratio']}\n"
|
|
598
|
-
f"\n## Per-category\n\n"
|
|
599
|
-
+ "\n".join(
|
|
600
|
-
f"- `{cat}` — {info['passed']}/{info['total']} "
|
|
601
|
-
f"({info['completion_rate'] * 100:.1f}%)"
|
|
602
|
-
for cat, info in results["per_category"].items()
|
|
603
|
-
)
|
|
604
|
-
+ "\n"
|
|
605
|
-
)
|
|
606
|
-
return path
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
def run_variant(
|
|
610
|
-
variant: str,
|
|
611
|
-
tasks: list[dict],
|
|
612
|
-
*,
|
|
613
|
-
mode: str,
|
|
614
|
-
timeout_s: int,
|
|
615
|
-
max_budget: "float | None" = None,
|
|
616
|
-
model: "str | None" = None,
|
|
617
|
-
progress: "Progress | None" = None,
|
|
618
|
-
) -> dict:
|
|
619
|
-
started = time.monotonic()
|
|
620
|
-
# Build the injected rule corpus once per variant (live only).
|
|
621
|
-
sp_file: "Path | None" = None
|
|
622
|
-
if mode == "live":
|
|
623
|
-
sp_text = system_prompt_for(variant)
|
|
624
|
-
if sp_text:
|
|
625
|
-
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
626
|
-
sp_file = REPORTS_DIR / f".sysprompt-{variant}.txt"
|
|
627
|
-
sp_file.write_text(sp_text, encoding="utf-8")
|
|
628
|
-
per_task: list[dict] = []
|
|
629
|
-
for i, task in enumerate(tasks):
|
|
630
|
-
if progress is not None:
|
|
631
|
-
progress.start_task(variant, i + 1, len(tasks), str(task.get("id")))
|
|
632
|
-
# Fixture-only working dir, identical for every arm — the package is NOT
|
|
633
|
-
# in the clone files; activation is the injected system prompt (sp_file).
|
|
634
|
-
clone_root = reset_clone("without")
|
|
635
|
-
pre = snapshot_clone(clone_root)
|
|
636
|
-
if mode == "live":
|
|
637
|
-
run_result = run_live(
|
|
638
|
-
task,
|
|
639
|
-
clone_root,
|
|
640
|
-
timeout_s=timeout_s,
|
|
641
|
-
sysprompt_file=sp_file,
|
|
642
|
-
setting_sources=setting_sources_for(variant),
|
|
643
|
-
max_budget=max_budget,
|
|
644
|
-
model=model,
|
|
645
|
-
)
|
|
646
|
-
else:
|
|
647
|
-
run_result = run_dry(task, clone_root, variant)
|
|
648
|
-
post = snapshot_clone(clone_root)
|
|
649
|
-
score = bench_ab_scoring.score_task(
|
|
650
|
-
task,
|
|
651
|
-
pre_snapshot=pre,
|
|
652
|
-
post_snapshot=post,
|
|
653
|
-
clone_root=clone_root,
|
|
654
|
-
transcript=run_result.get("transcript", ""),
|
|
655
|
-
)
|
|
656
|
-
per_task.append(
|
|
657
|
-
{
|
|
658
|
-
"id": task.get("id"),
|
|
659
|
-
"category": task.get("category"),
|
|
660
|
-
"duration": task.get("duration"),
|
|
661
|
-
"cognitive": task.get("cognitive"),
|
|
662
|
-
"score": score,
|
|
663
|
-
# `errored` = the run did not complete on merit (rate-limit,
|
|
664
|
-
# budget-cap, timeout, CLI failure). Distinct from a content
|
|
665
|
-
# fail (`score.passed == False`). Errored tasks are excluded
|
|
666
|
-
# from the hit-rate so a transient quota trip can't masquerade
|
|
667
|
-
# as the package "not working".
|
|
668
|
-
"errored": bool(run_result.get("errored", False)),
|
|
669
|
-
"wall_time_seconds": run_result.get("wall_time_seconds", 0.0),
|
|
670
|
-
"tokens": run_result.get("tokens", 0),
|
|
671
|
-
"tokens_breakdown": run_result.get("tokens_breakdown", {}),
|
|
672
|
-
"exit_code": run_result.get("exit_code"),
|
|
673
|
-
"mode": run_result.get("mode", mode),
|
|
674
|
-
"reason": run_result.get("reason", ""),
|
|
675
|
-
"ask_events": count_ask_events(run_result.get("transcript", "")),
|
|
676
|
-
}
|
|
677
|
-
)
|
|
678
|
-
if progress is not None:
|
|
679
|
-
progress.end_task(
|
|
680
|
-
passed=bool(score.get("passed")),
|
|
681
|
-
wall=float(run_result.get("wall_time_seconds", 0.0) or 0.0),
|
|
682
|
-
variant=variant,
|
|
683
|
-
task_id=str(task.get("id")),
|
|
684
|
-
)
|
|
685
|
-
duration = time.monotonic() - started
|
|
686
|
-
path = write_report(variant, mode=mode, per_task=per_task, duration=duration)
|
|
687
|
-
summary = (
|
|
688
|
-
f"bench_ab_task_runner: {variant} ({mode}) → "
|
|
689
|
-
f"{sum(1 for e in per_task if e['score']['passed'])}/{len(per_task)} "
|
|
690
|
-
f"passed — {path.relative_to(REPO_ROOT)}"
|
|
691
|
-
)
|
|
692
|
-
if progress is not None:
|
|
693
|
-
progress.variant_done(summary)
|
|
694
|
-
else:
|
|
695
|
-
sys.stdout.write(summary + "\n")
|
|
696
|
-
return {"path": path, "per_task": per_task, "duration": duration}
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
def parse_args(argv: list[str]) -> argparse.Namespace:
|
|
700
|
-
parser = argparse.ArgumentParser(description="Run Track B tasks per variant.")
|
|
701
|
-
parser.add_argument(
|
|
702
|
-
"--variant",
|
|
703
|
-
choices=("with", "without", "with-rdp", "both", "all"),
|
|
704
|
-
default="both",
|
|
705
|
-
help="with | without | with-rdp | both (=with+without, back-compat "
|
|
706
|
-
"default) | all (=the 3-condition value-benchmark set).",
|
|
707
|
-
)
|
|
708
|
-
parser.add_argument(
|
|
709
|
-
"--mode",
|
|
710
|
-
choices=("dry-run", "live"),
|
|
711
|
-
default="dry-run",
|
|
712
|
-
help=(
|
|
713
|
-
"dry-run: stub transcript, no CLI invocation (fast, free). "
|
|
714
|
-
"live: invoke `claude --print` per task (cost-bearing)."
|
|
715
|
-
),
|
|
716
|
-
)
|
|
717
|
-
parser.add_argument(
|
|
718
|
-
"--timeout",
|
|
719
|
-
type=int,
|
|
720
|
-
default=120,
|
|
721
|
-
help="Live mode: per-task timeout in seconds (default 120).",
|
|
722
|
-
)
|
|
723
|
-
parser.add_argument(
|
|
724
|
-
"--progress",
|
|
725
|
-
choices=("auto", "bar", "plain", "none"),
|
|
726
|
-
default="auto",
|
|
727
|
-
help="Live display: auto (TTY→bar, else plain line-per-task) | bar | plain | none.",
|
|
728
|
-
)
|
|
729
|
-
parser.add_argument(
|
|
730
|
-
"--limit",
|
|
731
|
-
type=int,
|
|
732
|
-
default=0,
|
|
733
|
-
help="Run only the first N tasks per variant (0 = all). For cheap smoke tests.",
|
|
734
|
-
)
|
|
735
|
-
parser.add_argument(
|
|
736
|
-
"--tasks",
|
|
737
|
-
default="",
|
|
738
|
-
help=(
|
|
739
|
-
"Comma-separated task IDs to run (e.g. trackb-bugfix-01,trackb-refactor-01). "
|
|
740
|
-
"Overrides --limit. Use to span the 2×2 cells in a bounded run instead of "
|
|
741
|
-
"taking the first-N in file order."
|
|
742
|
-
),
|
|
743
|
-
)
|
|
744
|
-
parser.add_argument(
|
|
745
|
-
"--model",
|
|
746
|
-
default="claude-sonnet-4-6",
|
|
747
|
-
help=(
|
|
748
|
-
"Pin ONE model across all arms (live mode). Default claude-sonnet-4-6 — "
|
|
749
|
-
"capable enough to complete the coding tasks, ~2.3x cheaper per turn than "
|
|
750
|
-
"the Opus-4.8-1M session default whose cache-creation blows the quota. "
|
|
751
|
-
"Empty string = inherit the session default (expensive)."
|
|
752
|
-
),
|
|
753
|
-
)
|
|
754
|
-
parser.add_argument(
|
|
755
|
-
"--budget",
|
|
756
|
-
type=float,
|
|
757
|
-
default=2.0,
|
|
758
|
-
help=(
|
|
759
|
-
"Live mode: per-task API spend cap in USD (passed to "
|
|
760
|
-
"`claude --max-budget-usd`). Stops a runaway agentic loop from "
|
|
761
|
-
"exhausting the account quota and starving later arms. 0 = uncapped. "
|
|
762
|
-
"Default 2.0."
|
|
763
|
-
),
|
|
764
|
-
)
|
|
765
|
-
return parser.parse_args(argv)
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
def main(argv: list[str] | None = None) -> int:
|
|
769
|
-
args = parse_args(argv if argv is not None else sys.argv[1:])
|
|
770
|
-
if not CORPUS_PATH.exists():
|
|
771
|
-
sys.stderr.write(f"bench_ab_task_runner: corpus missing at {CORPUS_PATH}\n")
|
|
772
|
-
return 1
|
|
773
|
-
data = yaml.safe_load(CORPUS_PATH.read_text())
|
|
774
|
-
tasks = data.get("tasks") or []
|
|
775
|
-
if not tasks:
|
|
776
|
-
sys.stderr.write("bench_ab_task_runner: corpus has no tasks\n")
|
|
777
|
-
return 1
|
|
778
|
-
if args.tasks.strip():
|
|
779
|
-
wanted = [s.strip() for s in args.tasks.split(",") if s.strip()]
|
|
780
|
-
by_id = {t.get("id"): t for t in tasks}
|
|
781
|
-
missing = [w for w in wanted if w not in by_id]
|
|
782
|
-
if missing:
|
|
783
|
-
sys.stderr.write(
|
|
784
|
-
f"bench_ab_task_runner: unknown task id(s): {', '.join(missing)}\n"
|
|
785
|
-
)
|
|
786
|
-
return 1
|
|
787
|
-
tasks = [by_id[w] for w in wanted]
|
|
788
|
-
elif args.limit and args.limit > 0:
|
|
789
|
-
tasks = tasks[: args.limit]
|
|
790
|
-
if args.variant == "both":
|
|
791
|
-
variants = ("with", "without")
|
|
792
|
-
elif args.variant == "all":
|
|
793
|
-
variants = ("with", "without", "with-rdp")
|
|
794
|
-
else:
|
|
795
|
-
variants = (args.variant,)
|
|
796
|
-
max_budget = args.budget if args.budget and args.budget > 0 else None
|
|
797
|
-
model = args.model or None
|
|
798
|
-
progress = Progress(len(variants) * len(tasks), mode=args.mode, style=args.progress)
|
|
799
|
-
for variant in variants:
|
|
800
|
-
run_variant(
|
|
801
|
-
variant,
|
|
802
|
-
tasks,
|
|
803
|
-
mode=args.mode,
|
|
804
|
-
timeout_s=args.timeout,
|
|
805
|
-
max_budget=max_budget,
|
|
806
|
-
model=model,
|
|
807
|
-
progress=progress,
|
|
808
|
-
)
|
|
809
|
-
progress.finish()
|
|
810
|
-
return 0
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
if __name__ == "__main__":
|
|
814
|
-
raise SystemExit(main())
|