mednotes-opencode 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.opencode/agents/med-chat-triager.md +204 -0
- package/.opencode/agents/med-flashcard-maker.md +63 -0
- package/.opencode/agents/med-knowledge-architect.md +230 -0
- package/.opencode/agents/med-link-graph-curator.md +177 -0
- package/.opencode/agents/med-publish-guard.md +62 -0
- package/.opencode/commands/flashcards.md +25 -0
- package/.opencode/commands/mednotes/create.md +25 -0
- package/.opencode/commands/mednotes/enrich.md +27 -0
- package/.opencode/commands/mednotes/fix-wiki.md +27 -0
- package/.opencode/commands/mednotes/history.md +22 -0
- package/.opencode/commands/mednotes/link-body.md +25 -0
- package/.opencode/commands/mednotes/link-related.md +27 -0
- package/.opencode/commands/mednotes/link.md +27 -0
- package/.opencode/commands/mednotes/pdf-library.md +27 -0
- package/.opencode/commands/mednotes/process-chats.md +23 -0
- package/.opencode/commands/mednotes/setup.md +21 -0
- package/.opencode/commands/mednotes/status.md +27 -0
- package/.opencode/commands/mednotes/telemetry.md +27 -0
- package/.opencode/commands/report.md +26 -0
- package/.opencode/mednotes/AGENTS.md +57 -0
- package/.opencode/mednotes/agents/med-chat-triager.md +197 -0
- package/.opencode/mednotes/agents/med-flashcard-maker.md +56 -0
- package/.opencode/mednotes/agents/med-knowledge-architect.md +224 -0
- package/.opencode/mednotes/agents/med-link-graph-curator.md +171 -0
- package/.opencode/mednotes/agents/med-publish-guard.md +55 -0
- package/.opencode/mednotes/contracts/.gitkeep +1 -0
- package/.opencode/mednotes/contracts/agents.json +116 -0
- package/.opencode/mednotes/contracts/opencode-plugin.json +70 -0
- package/.opencode/mednotes/docs/agent-prompt-hardening.md +567 -0
- package/.opencode/mednotes/docs/agent-role-contracts.md +94 -0
- package/.opencode/mednotes/docs/anki-mcp-twenty-rules.md +214 -0
- package/.opencode/mednotes/docs/anki-templates/README.md +39 -0
- package/.opencode/mednotes/docs/anki-templates/cloze.back.html +23 -0
- package/.opencode/mednotes/docs/anki-templates/cloze.front.html +14 -0
- package/.opencode/mednotes/docs/anki-templates/qa.back.html +24 -0
- package/.opencode/mednotes/docs/anki-templates/qa.front.html +14 -0
- package/.opencode/mednotes/docs/anki-templates/style.css +182 -0
- package/.opencode/mednotes/docs/atomicity-splitting-policy.md +113 -0
- package/.opencode/mednotes/docs/extension-docs.md +40 -0
- package/.opencode/mednotes/docs/flashcard-ingestion.md +278 -0
- package/.opencode/mednotes/docs/knowledge-architect.md +208 -0
- package/.opencode/mednotes/docs/merge-policy.md +110 -0
- package/.opencode/mednotes/docs/public-vocabulary.md +104 -0
- package/.opencode/mednotes/docs/semantic-linker.md +141 -0
- package/.opencode/mednotes/docs/taxonomy-policy.md +90 -0
- package/.opencode/mednotes/docs/triage-policy.md +187 -0
- package/.opencode/mednotes/docs/vault-version-control.md +758 -0
- package/.opencode/mednotes/docs/vocabulary-db-recovery.md +58 -0
- package/.opencode/mednotes/docs/workflow-output-contract.md +779 -0
- package/.opencode/mednotes/hooks/hooks.json +79 -0
- package/.opencode/mednotes/package-lock.json +6361 -0
- package/.opencode/mednotes/package.json +15 -0
- package/.opencode/mednotes/pyproject.toml +48 -0
- package/.opencode/mednotes/scripts/bootstrap_windows_python_uv.cmd +13 -0
- package/.opencode/mednotes/scripts/bootstrap_windows_python_uv.ps1 +172 -0
- package/.opencode/mednotes/scripts/enrich_notes.py +23 -0
- package/.opencode/mednotes/scripts/full_reset_windows_python_uv.cmd +13 -0
- package/.opencode/mednotes/scripts/hooks/antigravity_hook_status.mjs +212 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/antigravity.mjs +169 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/harness_payload.mjs +103 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/opencode_plugin.mjs +341 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/opencode_user_config_sync.mjs +177 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/anki_preflight.mjs +214 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/cli.mjs +143 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/diagnostics.mjs +11 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/domain/agent_directive_core.mjs +160 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/fsm_directive.mjs +1470 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/hook_errors.mjs +120 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/retention.mjs +114 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/runtime.mjs +174 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/telemetry_capture.mjs +511 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/vault_guard.mjs +624 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook.mjs +5 -0
- package/.opencode/mednotes/scripts/mednotes/_runtime_paths.py +24 -0
- package/.opencode/mednotes/scripts/mednotes/anki_model_validator.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/capture_extension_diff.py +1562 -0
- package/.opencode/mednotes/scripts/mednotes/feedback_report.py +16 -0
- package/.opencode/mednotes/scripts/mednotes/flashcard_index.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/flashcard_pipeline.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/flashcard_report.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/flashcard_sources.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/obsidian/README.md +6 -0
- package/.opencode/mednotes/scripts/mednotes/obsidian_note_utils.py +20 -0
- package/.opencode/mednotes/scripts/mednotes/pdf_library/cli.py +16 -0
- package/.opencode/mednotes/scripts/mednotes/project_fsm.py +229 -0
- package/.opencode/mednotes/scripts/mednotes/setup_telemetry_email.py +404 -0
- package/.opencode/mednotes/scripts/mednotes/sync_anki_twenty_rules.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/sync_opencode_user_config.py +36 -0
- package/.opencode/mednotes/scripts/mednotes/wiki/cli.py +20 -0
- package/.opencode/mednotes/scripts/mednotes/wiki_graph.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/wiki_tree.py +134 -0
- package/.opencode/mednotes/scripts/reset_windows_python_uv.ps1 +625 -0
- package/.opencode/mednotes/scripts/run_python.mjs +109 -0
- package/.opencode/mednotes/scripts/vault/vault_commit.ps1 +19 -0
- package/.opencode/mednotes/scripts/vault/vault_commit.sh +18 -0
- package/.opencode/mednotes/scripts/vault/vault_git.ps1 +19 -0
- package/.opencode/mednotes/scripts/vault/vault_git.py +3107 -0
- package/.opencode/mednotes/scripts/vault/vault_git.sh +18 -0
- package/.opencode/mednotes/scripts/vault/vault_precommit.ps1 +19 -0
- package/.opencode/mednotes/scripts/vault/vault_precommit.sh +18 -0
- package/.opencode/mednotes/skills/THIRD_PARTY_NOTICES.md +45 -0
- package/.opencode/mednotes/skills/create-medical-flashcards/SKILL.md +113 -0
- package/.opencode/mednotes/skills/create-medical-note/SKILL.md +90 -0
- package/.opencode/mednotes/skills/enrich-medical-note/SKILL.md +120 -0
- package/.opencode/mednotes/skills/fix-medical-wiki/SKILL.md +559 -0
- package/.opencode/mednotes/skills/link-medical-wiki/SKILL.md +224 -0
- package/.opencode/mednotes/skills/obsidian-cli/SKILL.md +118 -0
- package/.opencode/mednotes/skills/obsidian-markdown/SKILL.md +207 -0
- package/.opencode/mednotes/skills/obsidian-markdown/references/CALLOUTS.md +58 -0
- package/.opencode/mednotes/skills/obsidian-markdown/references/EMBEDS.md +63 -0
- package/.opencode/mednotes/skills/obsidian-markdown/references/PROPERTIES.md +61 -0
- package/.opencode/mednotes/skills/obsidian-ops/SKILL.md +136 -0
- package/.opencode/mednotes/skills/pdf-library/SKILL.md +45 -0
- package/.opencode/mednotes/skills/process-medical-chats/SKILL.md +246 -0
- package/.opencode/mednotes/skills/workflow-report/SKILL.md +100 -0
- package/.opencode/mednotes/src/mednotes/__init__.py +5 -0
- package/.opencode/mednotes/src/mednotes/domains/__init__.py +5 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/README.md +26 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/__init__.py +2 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/build_demo_apkg.py +177 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/contracts.py +385 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/flashcards_machine.py +522 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/fsm.py +817 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/index.py +630 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/install_models.py +445 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/model.py +359 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/obsidian_links.py +135 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/obsidian_note_utils.py +546 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/pipeline.py +580 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/report.py +510 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/sources.py +682 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/sync_rules.py +184 -0
- package/.opencode/mednotes/src/mednotes/domains/history/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/history/history_fsm.py +852 -0
- package/.opencode/mednotes/src/mednotes/domains/history/history_machine.py +453 -0
- package/.opencode/mednotes/src/mednotes/domains/setup/__init__.py +7 -0
- package/.opencode/mednotes/src/mednotes/domains/setup/setup_fsm.py +808 -0
- package/.opencode/mednotes/src/mednotes/domains/setup/setup_machine.py +973 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/README.md +64 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/api.py +668 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/batch_state.py +102 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/atomicity/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/atomicity/atomicity.py +877 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/body_link/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/body_link/body_linker.py +1562 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/effect_adapters.py +949 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/fix_wiki_runtime_adapters.py +433 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/coverage.py +413 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/graph.py +396 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/graph_fixes.py +161 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/hygiene/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/hygiene/hygiene.py +483 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/__init__.py +2 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/anchors.py +185 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/__init__.py +0 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/cache.py +223 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/config.py +131 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/download.py +224 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/frontmatter.py +59 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/insert.py +227 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/local_import.py +54 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/__init__.py +42 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/web_profiles.py +99 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/web_search.py +203 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/wikimedia.py +102 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_db_adapter.mjs +434 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_node_runtime.py +274 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_query.py +227 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/artifacts.py +605 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/canonical_merge.py +277 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/markdown_zones.py +85 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/meaning_planner.py +307 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_iter.py +67 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_merge.py +278 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_plan.py +409 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_policy.py +22 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/__init__.py +79 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/fixes.py +264 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/frontmatter.py +435 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/models.py +208 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/prompts.py +37 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/tables.py +236 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/validate.py +404 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/provenance.py +478 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/raw_chats.py +273 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/sources_backfill.py +235 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/__init__.py +10 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/anchors.py +16 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/captions.py +47 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/cli.py +179 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/cloud.py +52 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/config.py +196 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/context_packets.py +76 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/db.py +81 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/doctor.py +102 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/figure_ids.py +42 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/ingest.py +326 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/insert.py +316 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/mentions.py +57 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/ocr.py +71 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/paths.py +35 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/pdf_engine.py +77 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/schema.py +155 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/search.py +188 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/app.py +89 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/image_backend.py +29 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/state.py +65 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish.py +1139 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish_receipts.py +365 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish_recovery.py +240 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_behavior_corpus.py +2069 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_report_validation.py +4448 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_run_audit.py +852 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/architect_prompt_eval.py +341 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/body_linker_eval.py +240 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/curator_output_validation.py +175 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/curator_prompt_eval.py +865 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/triager_prompt_eval.py +1295 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/related_notes.py +1920 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/related_notes_headless.py +1186 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/plan_attestation.py +148 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_receipts.py +360 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_runtime.py +52 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_task_runner.py +2470 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/style/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/style/style.py +1952 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/subagents/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/subagents/agents.py +1767 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/alias_projection.py +331 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/link_terms.py +151 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/llm_disambiguation.py +182 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/__init__.py +116 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/audit.py +201 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/migration.py +314 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/normalize.py +72 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/policy.py +135 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/resolve.py +413 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/schema.py +157 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/status.py +137 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_bootstrap.py +509 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_curator_batch.py +1115 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_ingestion.py +632 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_map.py +930 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_recovery.py +1388 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/cli.py +6665 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/common.py +69 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/config.py +210 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/__init__.py +74 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agent_report.py +242 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agent_run_audit.py +196 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agents.py +601 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/curator.py +256 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/effect_payloads.py +519 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/happy_path.py +190 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/link_git.py +110 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/link_runtime_artifact.py +52 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/note_plan.py +75 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/paths.py +114 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/public_report.py +53 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/publish.py +111 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/raw_coverage.py +217 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes.py +136 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes_headless.py +153 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes_runtime.py +395 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/schema_registry.py +637 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/specialist.py +432 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/status.py +62 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/style_rewrite.py +568 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/vocabulary_ingestion.py +223 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_blockers.py +510 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_guardrails.py +637 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_outcomes.py +121 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_receipts.py +100 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/__main__.py +4 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/cli.py +275 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/__init__.py +2 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/candidates.py +193 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/cli.py +189 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/gemini.py +220 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/inputs.py +120 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/models.py +34 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/parsing.py +48 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/prompts.py +216 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/quality.py +54 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/reporting.py +24 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/runner.py +433 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/utils.py +39 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/vault_guard_bridge.py +17 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_context_packets.py +454 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_decision_projection.py +133 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_effects.py +1260 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_fsm.py +2768 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_machine.py +1588 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_plan.py +306 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_primary_objective.py +316 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_problem.py +153 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_receipt_evidence.py +306 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_states.py +290 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_user_report.py +342 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/health.py +6332 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_fsm.py +1119 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_git.py +638 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_machine.py +1106 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_retry_governance.py +374 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_runtime_result.py +485 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_triggers.py +183 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/linking.py +2758 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/reference_repair.py +718 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/related_notes_fsm.py +1855 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link_related/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link_related/link_related_machine.py +834 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_fsm.py +1592 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_machine.py +3097 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_primary_objective.py +28 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_runtime_result.py +185 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/performance.py +97 -0
- package/.opencode/mednotes/src/mednotes/kernel/__init__.py +6 -0
- package/.opencode/mednotes/src/mednotes/kernel/agent_directive.py +336 -0
- package/.opencode/mednotes/src/mednotes/kernel/base.py +51 -0
- package/.opencode/mednotes/src/mednotes/kernel/blockers.py +39 -0
- package/.opencode/mednotes/src/mednotes/kernel/effect_executor.py +55 -0
- package/.opencode/mednotes/src/mednotes/kernel/effect_intent.py +69 -0
- package/.opencode/mednotes/src/mednotes/kernel/effects.py +160 -0
- package/.opencode/mednotes/src/mednotes/kernel/errors.py +38 -0
- package/.opencode/mednotes/src/mednotes/kernel/fsm_event.py +35 -0
- package/.opencode/mednotes/src/mednotes/kernel/fsm_model.py +55 -0
- package/.opencode/mednotes/src/mednotes/kernel/fsm_transition_result.py +75 -0
- package/.opencode/mednotes/src/mednotes/kernel/guardrails.py +188 -0
- package/.opencode/mednotes/src/mednotes/kernel/progress.py +319 -0
- package/.opencode/mednotes/src/mednotes/kernel/public_report.py +346 -0
- package/.opencode/mednotes/src/mednotes/kernel/state_machine.py +164 -0
- package/.opencode/mednotes/src/mednotes/kernel/workflow.py +619 -0
- package/.opencode/mednotes/src/mednotes/platform/__init__.py +5 -0
- package/.opencode/mednotes/src/mednotes/platform/backup_policy.py +382 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/__init__.py +62 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/cli.py +275 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/contracts.py +83 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/core.py +4168 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/integrity.py +989 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/operational_contract.py +2293 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/telemetry.py +875 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/telemetry_config.py +65 -0
- package/.opencode/mednotes/src/mednotes/platform/opencode_runtime_config.py +182 -0
- package/.opencode/mednotes/src/mednotes/platform/paths/__init__.py +1560 -0
- package/.opencode/mednotes/src/mednotes/platform/secrets.py +89 -0
- package/.opencode/mednotes/src/mednotes/platform/user_config.py +103 -0
- package/.opencode/mednotes/src/mednotes/platform/vault_guard.py +214 -0
- package/.opencode/mednotes/uv.lock +932 -0
- package/.opencode/mednotes.generated.json +395 -0
- package/.opencode/opencode.json +31 -0
- package/.opencode/plugins/mednotes-fsm.mjs +7 -0
- package/.opencode/plugins/mednotes_hook/adapters/antigravity.mjs +169 -0
- package/.opencode/plugins/mednotes_hook/adapters/harness_payload.mjs +103 -0
- package/.opencode/plugins/mednotes_hook/adapters/opencode_plugin.mjs +341 -0
- package/.opencode/plugins/mednotes_hook/adapters/opencode_user_config_sync.mjs +177 -0
- package/.opencode/plugins/mednotes_hook/anki_preflight.mjs +214 -0
- package/.opencode/plugins/mednotes_hook/cli.mjs +143 -0
- package/.opencode/plugins/mednotes_hook/diagnostics.mjs +11 -0
- package/.opencode/plugins/mednotes_hook/domain/agent_directive_core.mjs +160 -0
- package/.opencode/plugins/mednotes_hook/fsm_directive.mjs +1470 -0
- package/.opencode/plugins/mednotes_hook/hook_errors.mjs +120 -0
- package/.opencode/plugins/mednotes_hook/retention.mjs +114 -0
- package/.opencode/plugins/mednotes_hook/runtime.mjs +174 -0
- package/.opencode/plugins/mednotes_hook/telemetry_capture.mjs +511 -0
- package/.opencode/plugins/mednotes_hook/vault_guard.mjs +624 -0
- package/AGENTS.md +57 -0
- package/README.md +194 -0
- package/adapters/antigravity/agents.json +80 -0
- package/adapters/antigravity/templates/med-chat-triager.md +214 -0
- package/adapters/antigravity/templates/med-flashcard-maker.md +72 -0
- package/adapters/antigravity/templates/med-knowledge-architect.md +241 -0
- package/adapters/antigravity/templates/med-link-graph-curator.md +187 -0
- package/adapters/antigravity/templates/med-publish-guard.md +71 -0
- package/adapters/gemini-cli/gemini-extension.json +14 -0
- package/adapters/gemini-cli/package.json +15 -0
- package/adapters/gemini-cli/pyproject.toml +48 -0
- package/bin/mednotes-opencode.mjs +155 -0
- package/contracts/agents.json +116 -0
- package/core/agents/med-chat-triager.md +197 -0
- package/core/agents/med-flashcard-maker.md +56 -0
- package/core/agents/med-knowledge-architect.md +224 -0
- package/core/agents/med-link-graph-curator.md +171 -0
- package/core/agents/med-publish-guard.md +55 -0
- package/core/commands/flashcards.toml +22 -0
- package/core/commands/mednotes/create.toml +22 -0
- package/core/commands/mednotes/enrich.toml +24 -0
- package/core/commands/mednotes/fix-wiki.toml +24 -0
- package/core/commands/mednotes/history.toml +19 -0
- package/core/commands/mednotes/link-body.toml +22 -0
- package/core/commands/mednotes/link-related.toml +24 -0
- package/core/commands/mednotes/link.toml +24 -0
- package/core/commands/mednotes/pdf-library.toml +24 -0
- package/core/commands/mednotes/process-chats.toml +20 -0
- package/core/commands/mednotes/setup.toml +18 -0
- package/core/commands/mednotes/status.toml +24 -0
- package/core/commands/mednotes/telemetry.toml +24 -0
- package/core/commands/report.toml +23 -0
- package/core/skills/THIRD_PARTY_NOTICES.md +45 -0
- package/core/skills/create-medical-flashcards/SKILL.md +113 -0
- package/core/skills/create-medical-note/SKILL.md +90 -0
- package/core/skills/enrich-medical-note/SKILL.md +120 -0
- package/core/skills/fix-medical-wiki/SKILL.md +559 -0
- package/core/skills/link-medical-wiki/SKILL.md +224 -0
- package/core/skills/obsidian-cli/SKILL.md +118 -0
- package/core/skills/obsidian-markdown/SKILL.md +207 -0
- package/core/skills/obsidian-markdown/references/CALLOUTS.md +58 -0
- package/core/skills/obsidian-markdown/references/EMBEDS.md +63 -0
- package/core/skills/obsidian-markdown/references/PROPERTIES.md +61 -0
- package/core/skills/obsidian-ops/SKILL.md +136 -0
- package/core/skills/pdf-library/SKILL.md +45 -0
- package/core/skills/process-medical-chats/SKILL.md +246 -0
- package/core/skills/workflow-report/SKILL.md +100 -0
- package/package.json +45 -0
package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/triager_prompt_eval.py
ADDED
|
@@ -0,0 +1,1295 @@
|
|
|
1
|
+
"""Offline prompt-quality evaluation for med-chat-triager outputs."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import base64
|
|
5
|
+
import binascii
|
|
6
|
+
import hashlib
|
|
7
|
+
import hmac
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import secrets
|
|
11
|
+
import unicodedata
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from datetime import UTC, datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Literal
|
|
16
|
+
|
|
17
|
+
from cryptography.exceptions import InvalidSignature
|
|
18
|
+
from cryptography.hazmat.primitives import serialization
|
|
19
|
+
from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey, Ed25519PublicKey
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictStr
|
|
21
|
+
from pydantic import ValidationError as PydanticValidationError
|
|
22
|
+
|
|
23
|
+
from mednotes.domains.wiki.capabilities.notes.note_plan import (
|
|
24
|
+
NOT_A_NOTE_ACTION,
|
|
25
|
+
PLANNED_MEANING_ACTION,
|
|
26
|
+
TRIAGE_NOTE_PLAN_SCHEMA,
|
|
27
|
+
normalize_triage_note_plan,
|
|
28
|
+
note_plan_hash,
|
|
29
|
+
note_plan_summary,
|
|
30
|
+
)
|
|
31
|
+
from mednotes.domains.wiki.common import MissingPathError, ValidationError
|
|
32
|
+
from mednotes.domains.wiki.config import _user_state_dir
|
|
33
|
+
from mednotes.kernel.base import JsonObject, JsonObjectAdapter
|
|
34
|
+
from mednotes.platform.paths import extension_root as _resolve_extension_root
|
|
35
|
+
|
|
36
|
+
TRIAGER_PROMPT_EVAL_SCHEMA = "medical-notes-workbench.triager-prompt-eval.v1"
|
|
37
|
+
TRIAGER_PROMPT_EXPECTATIONS_SCHEMA = "medical-notes-workbench.triager-prompt-expectations.v1"
|
|
38
|
+
SUBAGENT_RUN_RECEIPT_SCHEMA = "medical-notes-workbench.subagent-run-receipt.v1"
|
|
39
|
+
SUBAGENT_RUN_RECEIPT_ATTESTATION_SCHEMA = "medical-notes-workbench.subagent-run-receipt-attestation.v1"
|
|
40
|
+
SUBAGENT_RUN_RECEIPT_ATTESTATION_KIND = "workbench_ed25519.v1"
|
|
41
|
+
SUBAGENT_RUN_RECEIPT_ATTESTATION_CREATED_BY = "mednotes-subagent-runner"
|
|
42
|
+
_SUBAGENT_PRIVATE_KEY_ENV = "MEDNOTES_SUBAGENT_RUN_RECEIPT_ATTESTATION_PRIVATE_KEY"
|
|
43
|
+
_SUBAGENT_PRIVATE_KEY_PATH_ENV = "MEDNOTES_SUBAGENT_RUN_RECEIPT_ATTESTATION_PRIVATE_KEY_PATH"
|
|
44
|
+
_SUBAGENT_PUBLIC_KEY_ENV = "MEDNOTES_SUBAGENT_RUN_RECEIPT_ATTESTATION_PUBLIC_KEY"
|
|
45
|
+
_SUBAGENT_PUBLIC_KEY_PATH_ENV = "MEDNOTES_SUBAGENT_RUN_RECEIPT_ATTESTATION_PUBLIC_KEY_PATH"
|
|
46
|
+
_SUBAGENT_PUBLIC_KEY_FILENAME = "subagent-run-receipt-attestation.ed25519.public.key"
|
|
47
|
+
# Raiz do repositório (pai do bundle/) — independente da profundidade do módulo.
|
|
48
|
+
REPO_ROOT = _resolve_extension_root().parent
|
|
49
|
+
TRIAGER_EVAL_RETRY_NEXT_ACTION = (
|
|
50
|
+
"reenviar error_context ao med-chat-triager e gerar novo output/eval; "
|
|
51
|
+
"não remendar output JSON, note_plan ou agent_metrics manualmente antes de triage --note-plan"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class _SubagentRunReceiptAttestation(BaseModel):
|
|
56
|
+
model_config = ConfigDict(extra="forbid")
|
|
57
|
+
|
|
58
|
+
schema_: Literal["medical-notes-workbench.subagent-run-receipt-attestation.v1"] = Field(alias="schema")
|
|
59
|
+
attestation_kind: Literal["workbench_ed25519.v1"]
|
|
60
|
+
created_by: Literal["mednotes-subagent-runner"]
|
|
61
|
+
receipt_schema: Literal["medical-notes-workbench.subagent-run-receipt.v1"]
|
|
62
|
+
receipt_hash: StrictStr
|
|
63
|
+
agent: StrictStr
|
|
64
|
+
work_item_id: StrictStr
|
|
65
|
+
raw_file_hash: StrictStr
|
|
66
|
+
output_hash: StrictStr
|
|
67
|
+
key_id: StrictStr
|
|
68
|
+
nonce: StrictStr
|
|
69
|
+
issued_at: StrictStr
|
|
70
|
+
signature: StrictStr
|
|
71
|
+
|
|
72
|
+
def normalized(self) -> dict[str, str]:
|
|
73
|
+
return {
|
|
74
|
+
"schema": SUBAGENT_RUN_RECEIPT_ATTESTATION_SCHEMA,
|
|
75
|
+
"attestation_kind": self.attestation_kind,
|
|
76
|
+
"created_by": self.created_by,
|
|
77
|
+
"receipt_schema": self.receipt_schema,
|
|
78
|
+
"receipt_hash": self.receipt_hash.strip(),
|
|
79
|
+
"agent": self.agent.strip(),
|
|
80
|
+
"work_item_id": self.work_item_id.strip(),
|
|
81
|
+
"raw_file_hash": self.raw_file_hash.strip(),
|
|
82
|
+
"output_hash": self.output_hash.strip(),
|
|
83
|
+
"key_id": self.key_id.strip(),
|
|
84
|
+
"nonce": self.nonce.strip(),
|
|
85
|
+
"issued_at": self.issued_at.strip(),
|
|
86
|
+
"signature": self.signature.strip(),
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _json_object_from_model(model: BaseModel, **dump_options: Any) -> JsonObject:
|
|
91
|
+
# Contract models are the source of truth; this adapter keeps public JSON
|
|
92
|
+
# payloads serializable without letting arbitrary Python objects leak back
|
|
93
|
+
# into workflow decisions.
|
|
94
|
+
return JsonObjectAdapter.validate_python(model.model_dump(mode="json", by_alias=True, **dump_options))
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class _SubagentRunReceipt(BaseModel):
|
|
98
|
+
model_config = ConfigDict(extra="forbid")
|
|
99
|
+
|
|
100
|
+
schema_: Literal["medical-notes-workbench.subagent-run-receipt.v1"] = Field(alias="schema")
|
|
101
|
+
issuer: StrictStr
|
|
102
|
+
agent: StrictStr
|
|
103
|
+
work_item_id: StrictStr
|
|
104
|
+
raw_file: StrictStr
|
|
105
|
+
raw_file_hash: StrictStr
|
|
106
|
+
output_path: StrictStr = ""
|
|
107
|
+
output_hash: StrictStr
|
|
108
|
+
signature: StrictStr = ""
|
|
109
|
+
receipt_attestation: _SubagentRunReceiptAttestation | None = None
|
|
110
|
+
|
|
111
|
+
def payload_without_attestation(self) -> JsonObject:
|
|
112
|
+
# Receipt hashes intentionally use only fields that were present in the
|
|
113
|
+
# original receipt so existing runner signatures do not drift when the
|
|
114
|
+
# Pydantic contract supplies defaults.
|
|
115
|
+
return _json_object_from_model(
|
|
116
|
+
self,
|
|
117
|
+
exclude={"receipt_attestation"},
|
|
118
|
+
exclude_unset=True,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
def legacy_signature_payload(self) -> JsonObject:
|
|
122
|
+
return _json_object_from_model(self, exclude={"signature"}, exclude_unset=True)
|
|
123
|
+
|
|
124
|
+
def attested_payload(self, attestation: _SubagentRunReceiptAttestation) -> JsonObject:
|
|
125
|
+
payload = self.payload_without_attestation()
|
|
126
|
+
payload["receipt_attestation"] = attestation.normalized()
|
|
127
|
+
return JsonObjectAdapter.validate_python(payload)
|
|
128
|
+
|
|
129
|
+
def normalized(self) -> dict[str, str | dict[str, str] | None]:
|
|
130
|
+
return {
|
|
131
|
+
"schema": SUBAGENT_RUN_RECEIPT_SCHEMA,
|
|
132
|
+
"issuer": self.issuer.strip(),
|
|
133
|
+
"agent": self.agent.strip(),
|
|
134
|
+
"work_item_id": self.work_item_id.strip(),
|
|
135
|
+
"raw_file": self.raw_file.strip(),
|
|
136
|
+
"raw_file_hash": self.raw_file_hash.strip(),
|
|
137
|
+
"output_path": self.output_path.strip(),
|
|
138
|
+
"output_hash": self.output_hash.strip(),
|
|
139
|
+
"signature": self.signature.strip(),
|
|
140
|
+
"receipt_attestation": self.receipt_attestation.normalized()
|
|
141
|
+
if self.receipt_attestation is not None
|
|
142
|
+
else None,
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class _SubagentRunReceiptStatus(BaseModel):
|
|
147
|
+
model_config = ConfigDict(extra="forbid")
|
|
148
|
+
|
|
149
|
+
present: bool
|
|
150
|
+
valid: bool
|
|
151
|
+
required: bool
|
|
152
|
+
issuer: StrictStr = ""
|
|
153
|
+
agent: StrictStr = ""
|
|
154
|
+
work_item_id: StrictStr = ""
|
|
155
|
+
path: StrictStr = ""
|
|
156
|
+
receipt_hash: StrictStr = ""
|
|
157
|
+
signature_status: StrictStr = "not_present"
|
|
158
|
+
|
|
159
|
+
def to_payload(self) -> JsonObject:
|
|
160
|
+
return _json_object_from_model(self)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class _TriagerEvalInputFingerprints(BaseModel):
|
|
164
|
+
model_config = ConfigDict(extra="forbid")
|
|
165
|
+
|
|
166
|
+
raw_file: StrictStr = ""
|
|
167
|
+
raw_file_hash: StrictStr = ""
|
|
168
|
+
output_hash: StrictStr = ""
|
|
169
|
+
output_file_hash: StrictStr = ""
|
|
170
|
+
subagent_run_receipt_path: StrictStr = ""
|
|
171
|
+
subagent_run_receipt_hash: StrictStr = ""
|
|
172
|
+
note_plan_hash: StrictStr = ""
|
|
173
|
+
evaluation_expectations_present: bool = False
|
|
174
|
+
evaluation_expectations_hash: StrictStr = ""
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class _TriagerEvalAggregate(BaseModel):
|
|
178
|
+
model_config = ConfigDict(extra="forbid")
|
|
179
|
+
|
|
180
|
+
score: int = 0
|
|
181
|
+
issue_count: int = 0
|
|
182
|
+
error_count: int = 0
|
|
183
|
+
redaction_issue_count: int = 0
|
|
184
|
+
quality_flags: list[StrictStr] = Field(default_factory=list)
|
|
185
|
+
metric_coverage: JsonObject = Field(default_factory=dict)
|
|
186
|
+
subagent_run_receipt_coverage: _SubagentRunReceiptStatus = Field(
|
|
187
|
+
default_factory=lambda: _SubagentRunReceiptStatus(present=False, valid=False, required=False)
|
|
188
|
+
)
|
|
189
|
+
expectation_coverage: JsonObject = Field(default_factory=dict)
|
|
190
|
+
efficiency: JsonObject = Field(default_factory=dict)
|
|
191
|
+
note_plan: JsonObject = Field(default_factory=dict)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class _TriagerPromptEvalReport(BaseModel):
|
|
195
|
+
model_config = ConfigDict(extra="forbid")
|
|
196
|
+
|
|
197
|
+
schema_: Literal["medical-notes-workbench.triager-prompt-eval.v1"] = Field(alias="schema")
|
|
198
|
+
phase: StrictStr = ""
|
|
199
|
+
input_fingerprints: _TriagerEvalInputFingerprints
|
|
200
|
+
status: StrictStr
|
|
201
|
+
aggregate: _TriagerEvalAggregate
|
|
202
|
+
issues: list[JsonObject] = Field(default_factory=list)
|
|
203
|
+
agent_metrics: JsonObject = Field(default_factory=dict)
|
|
204
|
+
subagent_run_receipt: _SubagentRunReceiptStatus = Field(
|
|
205
|
+
default_factory=lambda: _SubagentRunReceiptStatus(present=False, valid=False, required=False)
|
|
206
|
+
)
|
|
207
|
+
next_action: StrictStr = ""
|
|
208
|
+
comparison: JsonObject | None = None
|
|
209
|
+
|
|
210
|
+
def to_payload(self) -> JsonObject:
|
|
211
|
+
return _json_object_from_model(self, exclude_none=True)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@dataclass(frozen=True)
|
|
215
|
+
class _TriagerOutputParts:
|
|
216
|
+
decision: str
|
|
217
|
+
note_plan: JsonObject | None
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def canonical_payload_hash(payload: Any) -> str:
|
|
221
|
+
encoded = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":")).encode("utf-8")
|
|
222
|
+
return f"sha256:{hashlib.sha256(encoded).hexdigest()}"
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _file_sha256(path: Path) -> str:
|
|
226
|
+
return f"sha256:{hashlib.sha256(path.read_bytes()).hexdigest()}"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _sha256_bytes(value: bytes) -> str:
|
|
230
|
+
return f"sha256:{hashlib.sha256(value).hexdigest()}"
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _receipt_without_attestation(receipt: _SubagentRunReceipt) -> JsonObject:
|
|
234
|
+
return receipt.payload_without_attestation()
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def subagent_run_receipt_hash(receipt: _SubagentRunReceipt) -> str:
|
|
238
|
+
encoded = json.dumps(
|
|
239
|
+
_receipt_without_attestation(receipt),
|
|
240
|
+
ensure_ascii=False,
|
|
241
|
+
sort_keys=True,
|
|
242
|
+
separators=(",", ":"),
|
|
243
|
+
).encode("utf-8")
|
|
244
|
+
return _sha256_bytes(encoded)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _base64_decode_key(raw: str, *, label: str) -> bytes:
|
|
248
|
+
compact = raw.strip()
|
|
249
|
+
if not compact:
|
|
250
|
+
raise ValidationError(f"subagent run receipt attestation {label} required")
|
|
251
|
+
try:
|
|
252
|
+
return base64.b64decode(compact, validate=True)
|
|
253
|
+
except (ValueError, binascii.Error) as exc:
|
|
254
|
+
raise ValidationError(f"subagent run receipt attestation {label} must be base64") from exc
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _key_bytes_from_env_or_path(*, env_name: str, path_env_name: str, label: str) -> bytes | None:
|
|
258
|
+
configured = os.getenv(env_name, "").strip()
|
|
259
|
+
if configured:
|
|
260
|
+
return _base64_decode_key(configured, label=label)
|
|
261
|
+
configured_path = os.getenv(path_env_name, "").strip()
|
|
262
|
+
if configured_path:
|
|
263
|
+
key_path = Path(configured_path).expanduser()
|
|
264
|
+
if not key_path.exists():
|
|
265
|
+
raise MissingPathError(f"subagent run receipt attestation {label} not found: {key_path}")
|
|
266
|
+
return _base64_decode_key(key_path.read_text(encoding="utf-8"), label=label)
|
|
267
|
+
return None
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _local_public_key_path() -> Path:
|
|
271
|
+
return _user_state_dir() / _SUBAGENT_PUBLIC_KEY_FILENAME
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _subagent_private_key() -> Ed25519PrivateKey:
|
|
275
|
+
key_bytes = _key_bytes_from_env_or_path(
|
|
276
|
+
env_name=_SUBAGENT_PRIVATE_KEY_ENV,
|
|
277
|
+
path_env_name=_SUBAGENT_PRIVATE_KEY_PATH_ENV,
|
|
278
|
+
label="private signing key",
|
|
279
|
+
)
|
|
280
|
+
if key_bytes is None:
|
|
281
|
+
raise MissingPathError(
|
|
282
|
+
"subagent run receipt attestation private signing key not configured; "
|
|
283
|
+
f"set {_SUBAGENT_PRIVATE_KEY_ENV} or {_SUBAGENT_PRIVATE_KEY_PATH_ENV}"
|
|
284
|
+
)
|
|
285
|
+
try:
|
|
286
|
+
return Ed25519PrivateKey.from_private_bytes(key_bytes)
|
|
287
|
+
except ValueError as exc:
|
|
288
|
+
raise ValidationError("subagent run receipt attestation private signing key invalid") from exc
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _subagent_public_key() -> Ed25519PublicKey:
|
|
292
|
+
key_bytes = _key_bytes_from_env_or_path(
|
|
293
|
+
env_name=_SUBAGENT_PUBLIC_KEY_ENV,
|
|
294
|
+
path_env_name=_SUBAGENT_PUBLIC_KEY_PATH_ENV,
|
|
295
|
+
label="trusted public key",
|
|
296
|
+
)
|
|
297
|
+
if key_bytes is None:
|
|
298
|
+
local_public_key = _local_public_key_path()
|
|
299
|
+
if not local_public_key.exists():
|
|
300
|
+
raise MissingPathError(
|
|
301
|
+
"subagent run receipt attestation trusted public key not configured; "
|
|
302
|
+
f"set {_SUBAGENT_PUBLIC_KEY_ENV} or {_SUBAGENT_PUBLIC_KEY_PATH_ENV}"
|
|
303
|
+
)
|
|
304
|
+
key_bytes = _base64_decode_key(local_public_key.read_text(encoding="utf-8"), label="trusted public key")
|
|
305
|
+
try:
|
|
306
|
+
return Ed25519PublicKey.from_public_bytes(key_bytes)
|
|
307
|
+
except ValueError as exc:
|
|
308
|
+
raise ValidationError("subagent run receipt attestation trusted public key invalid") from exc
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _public_key_raw_bytes(public_key: Ed25519PublicKey) -> bytes:
|
|
312
|
+
return public_key.public_bytes(
|
|
313
|
+
encoding=serialization.Encoding.Raw,
|
|
314
|
+
format=serialization.PublicFormat.Raw,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _public_key_id(public_key: Ed25519PublicKey) -> str:
|
|
319
|
+
return _sha256_bytes(_public_key_raw_bytes(public_key))
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _attestation_signing_payload(payload: JsonObject) -> bytes:
|
|
323
|
+
unsigned = {key: value for key, value in payload.items() if key != "signature"}
|
|
324
|
+
return json.dumps(unsigned, ensure_ascii=False, sort_keys=True, separators=(",", ":")).encode("utf-8")
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _signature_bytes(signature: str) -> bytes:
|
|
328
|
+
prefix = "ed25519:"
|
|
329
|
+
if not signature.startswith(prefix):
|
|
330
|
+
raise ValidationError("subagent run receipt attestation invalid: signature_kind")
|
|
331
|
+
try:
|
|
332
|
+
return base64.urlsafe_b64decode(signature[len(prefix):].encode("ascii"))
|
|
333
|
+
except ValueError as exc:
|
|
334
|
+
raise ValidationError("subagent run receipt attestation invalid: signature_encoding") from exc
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def attach_subagent_run_receipt_attestation(payload: JsonObject) -> JsonObject:
|
|
338
|
+
try:
|
|
339
|
+
receipt = _SubagentRunReceipt.model_validate(payload)
|
|
340
|
+
except PydanticValidationError as exc:
|
|
341
|
+
raise ValidationError(f"subagent run receipt contract invalid: {exc}") from exc
|
|
342
|
+
private_key = _subagent_private_key()
|
|
343
|
+
public_key = private_key.public_key()
|
|
344
|
+
attestation_payload = {
|
|
345
|
+
"schema": SUBAGENT_RUN_RECEIPT_ATTESTATION_SCHEMA,
|
|
346
|
+
"attestation_kind": SUBAGENT_RUN_RECEIPT_ATTESTATION_KIND,
|
|
347
|
+
"created_by": SUBAGENT_RUN_RECEIPT_ATTESTATION_CREATED_BY,
|
|
348
|
+
"receipt_schema": receipt.schema_,
|
|
349
|
+
"receipt_hash": subagent_run_receipt_hash(receipt),
|
|
350
|
+
"agent": receipt.agent.strip(),
|
|
351
|
+
"work_item_id": receipt.work_item_id.strip(),
|
|
352
|
+
"raw_file_hash": receipt.raw_file_hash.strip(),
|
|
353
|
+
"output_hash": receipt.output_hash.strip(),
|
|
354
|
+
"key_id": _public_key_id(public_key),
|
|
355
|
+
"nonce": secrets.token_hex(16),
|
|
356
|
+
"issued_at": datetime.now(UTC).replace(microsecond=0).isoformat(),
|
|
357
|
+
}
|
|
358
|
+
signature = private_key.sign(_attestation_signing_payload(attestation_payload))
|
|
359
|
+
attestation_payload["signature"] = "ed25519:" + base64.urlsafe_b64encode(signature).decode("ascii")
|
|
360
|
+
attestation = _SubagentRunReceiptAttestation.model_validate(attestation_payload)
|
|
361
|
+
return receipt.attested_payload(attestation)
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def _validate_subagent_run_receipt_attestation(receipt: _SubagentRunReceipt) -> None:
|
|
365
|
+
if receipt.receipt_attestation is None:
|
|
366
|
+
raise ValidationError("subagent run receipt attestation required")
|
|
367
|
+
attestation = receipt.receipt_attestation
|
|
368
|
+
normalized = attestation.normalized()
|
|
369
|
+
if normalized["receipt_hash"] != subagent_run_receipt_hash(receipt):
|
|
370
|
+
raise ValidationError("subagent run receipt attestation invalid: receipt_hash")
|
|
371
|
+
for field in ("agent", "work_item_id", "raw_file_hash", "output_hash"):
|
|
372
|
+
if normalized[field] != str(getattr(receipt, field)).strip():
|
|
373
|
+
raise ValidationError(f"subagent run receipt attestation invalid: {field}")
|
|
374
|
+
try:
|
|
375
|
+
public_key = _subagent_public_key()
|
|
376
|
+
except (MissingPathError, ValidationError) as exc:
|
|
377
|
+
raise ValidationError(f"subagent run receipt attestation invalid: {exc}") from exc
|
|
378
|
+
if normalized["key_id"] != _public_key_id(public_key):
|
|
379
|
+
raise ValidationError("subagent run receipt attestation invalid: key_id")
|
|
380
|
+
try:
|
|
381
|
+
public_key.verify(_signature_bytes(normalized["signature"]), _attestation_signing_payload(normalized))
|
|
382
|
+
except InvalidSignature as exc:
|
|
383
|
+
raise ValidationError("subagent run receipt attestation invalid: signature") from exc
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _read_json_object(path: Path, *, label: str) -> dict[str, Any]:
|
|
387
|
+
try:
|
|
388
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
389
|
+
except FileNotFoundError as exc:
|
|
390
|
+
raise ValidationError(f"{label} not found: {path}") from exc
|
|
391
|
+
except json.JSONDecodeError as exc:
|
|
392
|
+
raise ValidationError(f"{label} is invalid JSON: {path}: {exc}") from exc
|
|
393
|
+
if not isinstance(payload, dict):
|
|
394
|
+
raise ValidationError(f"{label} must be a JSON object: {path}")
|
|
395
|
+
return JsonObjectAdapter.validate_python(payload)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _issue(*, code: str, severity: str, rubric_key: str, message: str) -> dict[str, str]:
|
|
399
|
+
return {"code": code, "severity": severity, "rubric_key": rubric_key, "message": message}
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _norm_text(value: Any) -> str:
|
|
403
|
+
text = unicodedata.normalize("NFKD", str(value or "").strip().casefold())
|
|
404
|
+
return "".join(char for char in text if not unicodedata.combining(char))
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _paths_match(left: str, right: Path) -> bool:
|
|
408
|
+
try:
|
|
409
|
+
return Path(left).resolve() == right.resolve()
|
|
410
|
+
except OSError:
|
|
411
|
+
return str(Path(left)) == str(right)
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def _forbidden_key_hits(value: Any, forbidden: set[str], *, prefix: str = "$") -> list[str]:
|
|
415
|
+
hits: list[str] = []
|
|
416
|
+
if isinstance(value, dict):
|
|
417
|
+
for key, nested in value.items():
|
|
418
|
+
key_text = str(key)
|
|
419
|
+
path = f"{prefix}.{key_text}"
|
|
420
|
+
if key_text in forbidden:
|
|
421
|
+
hits.append(path)
|
|
422
|
+
hits.extend(_forbidden_key_hits(nested, forbidden, prefix=path))
|
|
423
|
+
elif isinstance(value, list):
|
|
424
|
+
for index, nested in enumerate(value):
|
|
425
|
+
hits.extend(_forbidden_key_hits(nested, forbidden, prefix=f"{prefix}[{index}]"))
|
|
426
|
+
return hits
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def _score(issues: list[dict[str, str]]) -> int:
|
|
430
|
+
penalty = 0
|
|
431
|
+
for issue in issues:
|
|
432
|
+
penalty += 25 if issue.get("severity") == "error" else 10
|
|
433
|
+
return max(0, 100 - penalty)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def _agent_metrics(payload: dict[str, Any]) -> dict[str, Any] | None:
|
|
437
|
+
metrics = payload.get("agent_metrics")
|
|
438
|
+
return metrics if isinstance(metrics, dict) else None
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _is_repo_root_artifact(path: Path) -> bool:
|
|
442
|
+
try:
|
|
443
|
+
return path.resolve().parent == REPO_ROOT.resolve()
|
|
444
|
+
except OSError:
|
|
445
|
+
return path.parent == REPO_ROOT
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def subagent_run_receipt_signature_payload(receipt: _SubagentRunReceipt) -> JsonObject:
|
|
449
|
+
return receipt.legacy_signature_payload()
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def subagent_run_receipt_signature(receipt: _SubagentRunReceipt, *, signing_key: str) -> str:
|
|
453
|
+
encoded = json.dumps(
|
|
454
|
+
subagent_run_receipt_signature_payload(receipt),
|
|
455
|
+
ensure_ascii=False,
|
|
456
|
+
sort_keys=True,
|
|
457
|
+
separators=(",", ":"),
|
|
458
|
+
).encode("utf-8")
|
|
459
|
+
digest = hmac.new(signing_key.encode("utf-8"), encoded, hashlib.sha256).hexdigest()
|
|
460
|
+
return f"hmac-sha256:{digest}"
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def _subagent_run_receipt_issues(
|
|
464
|
+
*,
|
|
465
|
+
receipt_path: Path | None,
|
|
466
|
+
raw_file: Path,
|
|
467
|
+
output_path: Path,
|
|
468
|
+
require_subagent_run_receipt: bool,
|
|
469
|
+
signing_key: str = "",
|
|
470
|
+
) -> tuple[list[dict[str, str]], _SubagentRunReceiptStatus]:
|
|
471
|
+
severity = "error" if require_subagent_run_receipt else "warning"
|
|
472
|
+
if receipt_path is None:
|
|
473
|
+
if not require_subagent_run_receipt:
|
|
474
|
+
return [], _SubagentRunReceiptStatus(present=False, valid=False, required=False)
|
|
475
|
+
return [
|
|
476
|
+
_issue(
|
|
477
|
+
code="missing_subagent_run_receipt",
|
|
478
|
+
severity=severity,
|
|
479
|
+
rubric_key="agent_output_provenance",
|
|
480
|
+
message=(
|
|
481
|
+
"subagent_run_receipt is required; rerun the packaged med-chat-triager "
|
|
482
|
+
"through the official runner and do not fabricate or patch output in the parent."
|
|
483
|
+
),
|
|
484
|
+
)
|
|
485
|
+
], _SubagentRunReceiptStatus(present=False, valid=False, required=True)
|
|
486
|
+
try:
|
|
487
|
+
raw_receipt = _read_json_object(receipt_path, label="subagent run receipt")
|
|
488
|
+
except ValidationError as exc:
|
|
489
|
+
return [
|
|
490
|
+
_issue(
|
|
491
|
+
code="subagent_run_receipt_invalid",
|
|
492
|
+
severity="error",
|
|
493
|
+
rubric_key="agent_output_provenance",
|
|
494
|
+
message=str(exc),
|
|
495
|
+
)
|
|
496
|
+
], _SubagentRunReceiptStatus(present=True, valid=False, required=require_subagent_run_receipt)
|
|
497
|
+
issues: list[dict[str, str]] = []
|
|
498
|
+
try:
|
|
499
|
+
receipt = _SubagentRunReceipt.model_validate(raw_receipt)
|
|
500
|
+
except PydanticValidationError as exc:
|
|
501
|
+
return [
|
|
502
|
+
_issue(
|
|
503
|
+
code="subagent_run_receipt_invalid",
|
|
504
|
+
severity="error",
|
|
505
|
+
rubric_key="agent_output_provenance",
|
|
506
|
+
message=f"subagent_run_receipt contract invalid: {exc}",
|
|
507
|
+
)
|
|
508
|
+
], _SubagentRunReceiptStatus(present=True, valid=False, required=require_subagent_run_receipt)
|
|
509
|
+
normalized = receipt.normalized()
|
|
510
|
+
issuer = str(normalized["issuer"])
|
|
511
|
+
agent = str(normalized["agent"])
|
|
512
|
+
work_item_id = str(normalized["work_item_id"])
|
|
513
|
+
raw_file_value = str(normalized["raw_file"])
|
|
514
|
+
raw_file_hash = str(normalized["raw_file_hash"])
|
|
515
|
+
output_path_value = str(normalized["output_path"])
|
|
516
|
+
output_hash = str(normalized["output_hash"])
|
|
517
|
+
legacy_signature = str(normalized["signature"])
|
|
518
|
+
if issuer != "mednotes-subagent-runner":
|
|
519
|
+
issues.append(
|
|
520
|
+
_issue(
|
|
521
|
+
code="subagent_run_receipt_wrong_issuer",
|
|
522
|
+
severity="error",
|
|
523
|
+
rubric_key="agent_output_provenance",
|
|
524
|
+
message="subagent_run_receipt.issuer must be mednotes-subagent-runner.",
|
|
525
|
+
)
|
|
526
|
+
)
|
|
527
|
+
if agent != "med-chat-triager":
|
|
528
|
+
issues.append(
|
|
529
|
+
_issue(
|
|
530
|
+
code="subagent_run_receipt_wrong_agent",
|
|
531
|
+
severity="error",
|
|
532
|
+
rubric_key="agent_output_provenance",
|
|
533
|
+
message="subagent_run_receipt.agent must be med-chat-triager.",
|
|
534
|
+
)
|
|
535
|
+
)
|
|
536
|
+
for field in ("work_item_id", "raw_file_hash", "output_hash"):
|
|
537
|
+
if not normalized[field]:
|
|
538
|
+
issues.append(
|
|
539
|
+
_issue(
|
|
540
|
+
code=f"subagent_run_receipt_{field}_missing",
|
|
541
|
+
severity="error",
|
|
542
|
+
rubric_key="agent_output_provenance",
|
|
543
|
+
message=f"subagent_run_receipt.{field} must be non-empty.",
|
|
544
|
+
)
|
|
545
|
+
)
|
|
546
|
+
if not raw_file_value or not _paths_match(raw_file_value, raw_file):
|
|
547
|
+
issues.append(
|
|
548
|
+
_issue(
|
|
549
|
+
code="subagent_run_receipt_raw_file_mismatch",
|
|
550
|
+
severity="error",
|
|
551
|
+
rubric_key="agent_output_provenance",
|
|
552
|
+
message="subagent_run_receipt.raw_file does not match the assigned raw_file.",
|
|
553
|
+
)
|
|
554
|
+
)
|
|
555
|
+
if output_path_value and not _paths_match(output_path_value, output_path):
|
|
556
|
+
issues.append(
|
|
557
|
+
_issue(
|
|
558
|
+
code="subagent_run_receipt_output_path_mismatch",
|
|
559
|
+
severity="error",
|
|
560
|
+
rubric_key="agent_output_provenance",
|
|
561
|
+
message="subagent_run_receipt.output_path does not match the evaluated output path.",
|
|
562
|
+
)
|
|
563
|
+
)
|
|
564
|
+
actual_raw_hash = _file_sha256(raw_file)
|
|
565
|
+
if raw_file_hash and raw_file_hash != actual_raw_hash:
|
|
566
|
+
issues.append(
|
|
567
|
+
_issue(
|
|
568
|
+
code="subagent_run_receipt_raw_hash_mismatch",
|
|
569
|
+
severity="error",
|
|
570
|
+
rubric_key="agent_output_provenance",
|
|
571
|
+
message="subagent_run_receipt.raw_file_hash is stale for the assigned raw_file.",
|
|
572
|
+
)
|
|
573
|
+
)
|
|
574
|
+
actual_output_hash = _file_sha256(output_path)
|
|
575
|
+
if output_hash and output_hash != actual_output_hash:
|
|
576
|
+
issues.append(
|
|
577
|
+
_issue(
|
|
578
|
+
code="subagent_run_receipt_output_hash_mismatch",
|
|
579
|
+
severity="error",
|
|
580
|
+
rubric_key="agent_output_provenance",
|
|
581
|
+
message="subagent_run_receipt.output_hash is stale for the evaluated output.",
|
|
582
|
+
)
|
|
583
|
+
)
|
|
584
|
+
signature_status = "not_present"
|
|
585
|
+
if legacy_signature:
|
|
586
|
+
if not signing_key:
|
|
587
|
+
issues.append(
|
|
588
|
+
_issue(
|
|
589
|
+
code="subagent_run_receipt_signature_unverifiable",
|
|
590
|
+
severity="error",
|
|
591
|
+
rubric_key="agent_output_provenance",
|
|
592
|
+
message="subagent_run_receipt has a signature but no runner signing key was provided for verification.",
|
|
593
|
+
)
|
|
594
|
+
)
|
|
595
|
+
signature_status = "unverifiable"
|
|
596
|
+
else:
|
|
597
|
+
expected = subagent_run_receipt_signature(receipt, signing_key=signing_key)
|
|
598
|
+
if not hmac.compare_digest(legacy_signature, expected):
|
|
599
|
+
issues.append(
|
|
600
|
+
_issue(
|
|
601
|
+
code="subagent_run_receipt_signature_invalid",
|
|
602
|
+
severity="error",
|
|
603
|
+
rubric_key="agent_output_provenance",
|
|
604
|
+
message="subagent_run_receipt.signature does not match the runner-issued payload.",
|
|
605
|
+
)
|
|
606
|
+
)
|
|
607
|
+
signature_status = "invalid"
|
|
608
|
+
else:
|
|
609
|
+
signature_status = "valid"
|
|
610
|
+
if require_subagent_run_receipt:
|
|
611
|
+
try:
|
|
612
|
+
_validate_subagent_run_receipt_attestation(receipt)
|
|
613
|
+
signature_status = "valid"
|
|
614
|
+
except ValidationError as exc:
|
|
615
|
+
message = str(exc)
|
|
616
|
+
if "attestation required" in message:
|
|
617
|
+
code = "subagent_run_receipt_signature_required"
|
|
618
|
+
signature_status = "missing"
|
|
619
|
+
message = (
|
|
620
|
+
"subagent_run_receipt requires runner Ed25519 attestation when it authorizes "
|
|
621
|
+
"mutating process-chats triage."
|
|
622
|
+
)
|
|
623
|
+
elif "trusted public key" in message:
|
|
624
|
+
code = "subagent_run_receipt_signature_unverifiable"
|
|
625
|
+
signature_status = "unverifiable"
|
|
626
|
+
else:
|
|
627
|
+
code = "subagent_run_receipt_signature_invalid"
|
|
628
|
+
signature_status = "invalid"
|
|
629
|
+
issues.append(
|
|
630
|
+
_issue(
|
|
631
|
+
code=code,
|
|
632
|
+
severity="error",
|
|
633
|
+
rubric_key="agent_output_provenance",
|
|
634
|
+
message=message,
|
|
635
|
+
)
|
|
636
|
+
)
|
|
637
|
+
return issues, _SubagentRunReceiptStatus(
|
|
638
|
+
present=True,
|
|
639
|
+
valid=not issues,
|
|
640
|
+
required=require_subagent_run_receipt,
|
|
641
|
+
issuer=issuer,
|
|
642
|
+
agent=agent,
|
|
643
|
+
work_item_id=work_item_id,
|
|
644
|
+
path=str(receipt_path),
|
|
645
|
+
receipt_hash=_file_sha256(receipt_path),
|
|
646
|
+
signature_status=signature_status,
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def _metrics_issues(
|
|
651
|
+
payload: dict[str, Any],
|
|
652
|
+
*,
|
|
653
|
+
require_agent_metrics: bool,
|
|
654
|
+
) -> tuple[list[dict[str, str]], dict[str, Any]]:
|
|
655
|
+
severity = "error" if require_agent_metrics else "warning"
|
|
656
|
+
metrics = _agent_metrics(payload)
|
|
657
|
+
if metrics is None:
|
|
658
|
+
return [
|
|
659
|
+
_issue(
|
|
660
|
+
code="missing_agent_metrics",
|
|
661
|
+
severity=severity,
|
|
662
|
+
rubric_key="efficiency_routing",
|
|
663
|
+
message=(
|
|
664
|
+
"agent_metrics is unavailable; note_plan can still be evaluated, "
|
|
665
|
+
"but runtime efficiency cannot be trusted."
|
|
666
|
+
),
|
|
667
|
+
)
|
|
668
|
+
], {"present": False, "valid": False}
|
|
669
|
+
|
|
670
|
+
metrics_dict = metrics
|
|
671
|
+
token_accounting = str(metrics_dict.get("token_accounting") or "")
|
|
672
|
+
issues: list[dict[str, str]] = []
|
|
673
|
+
|
|
674
|
+
def required_int(field: str, *, minimum: int, code: str) -> int:
|
|
675
|
+
value = metrics_dict.get(field)
|
|
676
|
+
if isinstance(value, bool) or value is None:
|
|
677
|
+
issues.append(
|
|
678
|
+
_issue(
|
|
679
|
+
code=code,
|
|
680
|
+
severity=severity,
|
|
681
|
+
rubric_key="efficiency_routing",
|
|
682
|
+
message=f"agent_metrics.{field} is required and must be an integer >= {minimum}.",
|
|
683
|
+
)
|
|
684
|
+
)
|
|
685
|
+
return 0
|
|
686
|
+
try:
|
|
687
|
+
parsed = int(value)
|
|
688
|
+
except (TypeError, ValueError):
|
|
689
|
+
issues.append(
|
|
690
|
+
_issue(
|
|
691
|
+
code=code,
|
|
692
|
+
severity=severity,
|
|
693
|
+
rubric_key="efficiency_routing",
|
|
694
|
+
message=f"agent_metrics.{field} must be an integer >= {minimum}.",
|
|
695
|
+
)
|
|
696
|
+
)
|
|
697
|
+
return 0
|
|
698
|
+
if parsed < minimum:
|
|
699
|
+
issues.append(
|
|
700
|
+
_issue(
|
|
701
|
+
code=code,
|
|
702
|
+
severity=severity,
|
|
703
|
+
rubric_key="efficiency_routing",
|
|
704
|
+
message=f"agent_metrics.{field} must be >= {minimum}.",
|
|
705
|
+
)
|
|
706
|
+
)
|
|
707
|
+
return parsed
|
|
708
|
+
|
|
709
|
+
if token_accounting not in {"exact", "estimated", "unavailable"}:
|
|
710
|
+
issues.append(
|
|
711
|
+
_issue(
|
|
712
|
+
code="agent_metrics_token_accounting_missing",
|
|
713
|
+
severity=severity,
|
|
714
|
+
rubric_key="efficiency_routing",
|
|
715
|
+
message="agent_metrics.token_accounting must be exact, estimated, or unavailable.",
|
|
716
|
+
)
|
|
717
|
+
)
|
|
718
|
+
turns_used = required_int("turns_used", minimum=1, code="agent_metrics_turns_used_missing")
|
|
719
|
+
retries = required_int("retries", minimum=0, code="agent_metrics_retries_missing")
|
|
720
|
+
if token_accounting in {"exact", "estimated"}:
|
|
721
|
+
prompt_tokens = required_int("prompt_tokens", minimum=1, code="agent_metrics_prompt_tokens_missing")
|
|
722
|
+
completion_tokens = required_int(
|
|
723
|
+
"completion_tokens",
|
|
724
|
+
minimum=1,
|
|
725
|
+
code="agent_metrics_completion_tokens_missing",
|
|
726
|
+
)
|
|
727
|
+
else:
|
|
728
|
+
prompt_tokens = int(metrics.get("prompt_tokens") or 0)
|
|
729
|
+
completion_tokens = int(metrics.get("completion_tokens") or 0)
|
|
730
|
+
if turns_used > 12:
|
|
731
|
+
issues.append(
|
|
732
|
+
_issue(
|
|
733
|
+
code="turn_budget_exceeded",
|
|
734
|
+
severity="warning",
|
|
735
|
+
rubric_key="efficiency_routing",
|
|
736
|
+
message=f"turns_used={turns_used} exceeds triager max_turns=12.",
|
|
737
|
+
)
|
|
738
|
+
)
|
|
739
|
+
return issues, {
|
|
740
|
+
"present": True,
|
|
741
|
+
"valid": not issues,
|
|
742
|
+
"token_accounting": token_accounting,
|
|
743
|
+
"turns_used": turns_used,
|
|
744
|
+
"max_turns": 12,
|
|
745
|
+
"prompt_tokens": prompt_tokens,
|
|
746
|
+
"completion_tokens": completion_tokens,
|
|
747
|
+
"retries": retries,
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
def _output_parts(payload: JsonObject) -> _TriagerOutputParts:
|
|
752
|
+
if payload["schema"] == TRIAGE_NOTE_PLAN_SCHEMA if "schema" in payload else False:
|
|
753
|
+
return _TriagerOutputParts(decision="triage", note_plan=payload)
|
|
754
|
+
decision_value = payload["decision"] if "decision" in payload else ""
|
|
755
|
+
note_plan_value = payload["note_plan"] if "note_plan" in payload else None
|
|
756
|
+
note_plan = JsonObjectAdapter.validate_python(note_plan_value) if isinstance(note_plan_value, dict) else None
|
|
757
|
+
return _TriagerOutputParts(decision=str(decision_value or "").strip(), note_plan=note_plan)
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
def _titles_for_action(plan: dict[str, Any], action: str, *, field: str = "title") -> set[str]:
|
|
761
|
+
titles: set[str] = set()
|
|
762
|
+
for item in plan.get("items", []):
|
|
763
|
+
if not isinstance(item, dict) or item.get("action") != action:
|
|
764
|
+
continue
|
|
765
|
+
value = item.get(field) if field in item else item.get("staged_title") or item.get("title")
|
|
766
|
+
if str(value or "").strip():
|
|
767
|
+
titles.add(_norm_text(value))
|
|
768
|
+
return titles
|
|
769
|
+
|
|
770
|
+
|
|
771
|
+
def _expectation_issues(
|
|
772
|
+
*,
|
|
773
|
+
expectations: dict[str, Any],
|
|
774
|
+
decision: str,
|
|
775
|
+
normalized_plan: dict[str, Any] | None,
|
|
776
|
+
) -> list[dict[str, str]]:
|
|
777
|
+
if not expectations:
|
|
778
|
+
return []
|
|
779
|
+
|
|
780
|
+
issues: list[dict[str, str]] = []
|
|
781
|
+
unsupported_keys = [
|
|
782
|
+
key
|
|
783
|
+
for key in (
|
|
784
|
+
"required_create_titles",
|
|
785
|
+
"forbidden_create_titles",
|
|
786
|
+
"required_covered_existing_titles",
|
|
787
|
+
)
|
|
788
|
+
if key in expectations
|
|
789
|
+
]
|
|
790
|
+
if unsupported_keys:
|
|
791
|
+
issues.append(
|
|
792
|
+
_issue(
|
|
793
|
+
code="unsupported_triager_expectation_key",
|
|
794
|
+
severity="error",
|
|
795
|
+
rubric_key="golden_expectations",
|
|
796
|
+
message="unsupported triager expectation keys: " + ", ".join(unsupported_keys),
|
|
797
|
+
)
|
|
798
|
+
)
|
|
799
|
+
expected_decision = str(expectations.get("expected_decision") or "").strip()
|
|
800
|
+
if expected_decision and decision != expected_decision:
|
|
801
|
+
issues.append(
|
|
802
|
+
_issue(
|
|
803
|
+
code="expected_decision_mismatch",
|
|
804
|
+
severity="error",
|
|
805
|
+
rubric_key="golden_expectations",
|
|
806
|
+
message="decision does not match triager expectations.",
|
|
807
|
+
)
|
|
808
|
+
)
|
|
809
|
+
|
|
810
|
+
if normalized_plan is None:
|
|
811
|
+
if any(
|
|
812
|
+
isinstance(expectations.get(key), list)
|
|
813
|
+
for key in (
|
|
814
|
+
"required_planned_meaning_titles",
|
|
815
|
+
"forbidden_planned_meaning_titles",
|
|
816
|
+
"required_not_a_note_titles",
|
|
817
|
+
)
|
|
818
|
+
):
|
|
819
|
+
issues.append(
|
|
820
|
+
_issue(
|
|
821
|
+
code="expected_note_plan_absent",
|
|
822
|
+
severity="error",
|
|
823
|
+
rubric_key="golden_expectations",
|
|
824
|
+
message="triager expectations require note_plan, but no valid plan was available.",
|
|
825
|
+
)
|
|
826
|
+
)
|
|
827
|
+
return issues
|
|
828
|
+
|
|
829
|
+
planned_titles = _titles_for_action(normalized_plan, PLANNED_MEANING_ACTION)
|
|
830
|
+
not_a_note_titles = _titles_for_action(normalized_plan, NOT_A_NOTE_ACTION)
|
|
831
|
+
|
|
832
|
+
required_planned = list(expectations.get("required_planned_meaning_titles") or [])
|
|
833
|
+
forbidden_planned = list(expectations.get("forbidden_planned_meaning_titles") or [])
|
|
834
|
+
|
|
835
|
+
for title in required_planned:
|
|
836
|
+
if _norm_text(title) not in planned_titles:
|
|
837
|
+
issues.append(
|
|
838
|
+
_issue(
|
|
839
|
+
code="missing_required_planned_meaning_title",
|
|
840
|
+
severity="error",
|
|
841
|
+
rubric_key="golden_expectations",
|
|
842
|
+
message=f"required planned_meaning title absent: {title}",
|
|
843
|
+
)
|
|
844
|
+
)
|
|
845
|
+
for title in forbidden_planned:
|
|
846
|
+
if _norm_text(title) in planned_titles:
|
|
847
|
+
issues.append(
|
|
848
|
+
_issue(
|
|
849
|
+
code="forbidden_planned_meaning_title",
|
|
850
|
+
severity="error",
|
|
851
|
+
rubric_key="golden_expectations",
|
|
852
|
+
message=f"forbidden planned_meaning title present: {title}",
|
|
853
|
+
)
|
|
854
|
+
)
|
|
855
|
+
for title in expectations.get("required_not_a_note_titles") or []:
|
|
856
|
+
if _norm_text(title) not in not_a_note_titles:
|
|
857
|
+
issues.append(
|
|
858
|
+
_issue(
|
|
859
|
+
code="missing_required_not_a_note_title",
|
|
860
|
+
severity="error",
|
|
861
|
+
rubric_key="golden_expectations",
|
|
862
|
+
message=f"required not_a_note title absent: {title}",
|
|
863
|
+
)
|
|
864
|
+
)
|
|
865
|
+
return issues
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
def _aggregate_efficiency(report: dict[str, Any]) -> dict[str, Any]:
|
|
869
|
+
aggregate = report.get("aggregate") if isinstance(report.get("aggregate"), dict) else {}
|
|
870
|
+
return aggregate.get("efficiency") if isinstance(aggregate.get("efficiency"), dict) else {}
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def _input_fingerprints(report: dict[str, Any]) -> dict[str, Any]:
|
|
874
|
+
fingerprints = report.get("input_fingerprints")
|
|
875
|
+
return fingerprints if isinstance(fingerprints, dict) else {}
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
def _compare_to_baseline(*, current: dict[str, Any], baseline_path: Path) -> dict[str, Any]:
|
|
879
|
+
baseline = _read_json_object(baseline_path, label="triager prompt eval baseline")
|
|
880
|
+
if baseline.get("schema") != TRIAGER_PROMPT_EVAL_SCHEMA:
|
|
881
|
+
raise ValidationError(f"triager prompt eval baseline must use schema {TRIAGER_PROMPT_EVAL_SCHEMA}")
|
|
882
|
+
current_aggregate = current.get("aggregate") if isinstance(current.get("aggregate"), dict) else {}
|
|
883
|
+
baseline_aggregate = baseline.get("aggregate") if isinstance(baseline.get("aggregate"), dict) else {}
|
|
884
|
+
current_efficiency = _aggregate_efficiency(current)
|
|
885
|
+
baseline_efficiency = _aggregate_efficiency(baseline)
|
|
886
|
+
current_fingerprints = _input_fingerprints(current)
|
|
887
|
+
baseline_fingerprints = _input_fingerprints(baseline)
|
|
888
|
+
comparison: dict[str, Any] = {
|
|
889
|
+
"baseline_status": str(baseline.get("status") or ""),
|
|
890
|
+
"current_status": str(current.get("status") or ""),
|
|
891
|
+
"score_delta": int(current_aggregate.get("score") or 0) - int(baseline_aggregate.get("score") or 0),
|
|
892
|
+
"issue_count_delta": int(current_aggregate.get("issue_count") or 0)
|
|
893
|
+
- int(baseline_aggregate.get("issue_count") or 0),
|
|
894
|
+
"total_prompt_tokens_delta": int(current_efficiency.get("total_prompt_tokens") or 0)
|
|
895
|
+
- int(baseline_efficiency.get("total_prompt_tokens") or 0),
|
|
896
|
+
"total_completion_tokens_delta": int(current_efficiency.get("total_completion_tokens") or 0)
|
|
897
|
+
- int(baseline_efficiency.get("total_completion_tokens") or 0),
|
|
898
|
+
"total_retries_delta": int(current_efficiency.get("total_retries") or 0)
|
|
899
|
+
- int(baseline_efficiency.get("total_retries") or 0),
|
|
900
|
+
}
|
|
901
|
+
comparability_flags: list[str] = []
|
|
902
|
+
current_expectations_present = bool(current_fingerprints.get("evaluation_expectations_present"))
|
|
903
|
+
baseline_expectations_present = bool(baseline_fingerprints.get("evaluation_expectations_present"))
|
|
904
|
+
if current_expectations_present or baseline_expectations_present:
|
|
905
|
+
current_expectations_hash = str(current_fingerprints.get("evaluation_expectations_hash") or "")
|
|
906
|
+
baseline_expectations_hash = str(baseline_fingerprints.get("evaluation_expectations_hash") or "")
|
|
907
|
+
if current_expectations_hash != baseline_expectations_hash:
|
|
908
|
+
comparability_flags.append("evaluation_expectations_changed")
|
|
909
|
+
regression_flags: list[str] = []
|
|
910
|
+
if comparison["baseline_status"] == "pass" and comparison["current_status"] != "pass":
|
|
911
|
+
regression_flags.append("status_regression")
|
|
912
|
+
if int(comparison["score_delta"]) < 0:
|
|
913
|
+
regression_flags.append("score_regression")
|
|
914
|
+
if int(comparison["issue_count_delta"]) > 0:
|
|
915
|
+
regression_flags.append("issue_count_regression")
|
|
916
|
+
if int(comparison["total_prompt_tokens_delta"]) > 0:
|
|
917
|
+
regression_flags.append("prompt_token_regression")
|
|
918
|
+
if int(comparison["total_completion_tokens_delta"]) > 0:
|
|
919
|
+
regression_flags.append("completion_token_regression")
|
|
920
|
+
if int(comparison["total_retries_delta"]) > 0:
|
|
921
|
+
regression_flags.append("retry_regression")
|
|
922
|
+
comparison["comparability_flags"] = comparability_flags
|
|
923
|
+
comparison["regression_flags"] = regression_flags
|
|
924
|
+
if comparability_flags:
|
|
925
|
+
comparison["status"] = "not_comparable"
|
|
926
|
+
else:
|
|
927
|
+
comparison["status"] = "regressed" if regression_flags else "improved_or_equal"
|
|
928
|
+
return comparison
|
|
929
|
+
|
|
930
|
+
|
|
931
|
+
def load_triager_prompt_expectations(path: Path) -> dict[str, Any]:
|
|
932
|
+
payload = _read_json_object(path, label="triager prompt expectations")
|
|
933
|
+
if payload.get("schema") != TRIAGER_PROMPT_EXPECTATIONS_SCHEMA:
|
|
934
|
+
raise ValidationError(f"triager prompt expectations must use schema {TRIAGER_PROMPT_EXPECTATIONS_SCHEMA}")
|
|
935
|
+
expectations = payload.get("expectations")
|
|
936
|
+
if not isinstance(expectations, dict):
|
|
937
|
+
raise ValidationError("triager prompt expectations require expectations object")
|
|
938
|
+
return expectations
|
|
939
|
+
|
|
940
|
+
|
|
941
|
+
def evaluate_triager_prompt_output(
|
|
942
|
+
*,
|
|
943
|
+
raw_file: Path,
|
|
944
|
+
output_path: Path,
|
|
945
|
+
expectations: dict[str, Any] | None = None,
|
|
946
|
+
baseline_eval_path: Path | None = None,
|
|
947
|
+
require_agent_metrics: bool = False,
|
|
948
|
+
subagent_run_receipt_path: Path | None = None,
|
|
949
|
+
require_subagent_run_receipt: bool = False,
|
|
950
|
+
subagent_runner_signing_key: str = "",
|
|
951
|
+
) -> dict[str, Any]:
|
|
952
|
+
output = _read_json_object(output_path, label="triager output")
|
|
953
|
+
expectations = expectations or {}
|
|
954
|
+
issues: list[dict[str, str]] = []
|
|
955
|
+
|
|
956
|
+
if _is_repo_root_artifact(output_path):
|
|
957
|
+
issues.append(
|
|
958
|
+
_issue(
|
|
959
|
+
code="agent_artifact_written_to_repo_root",
|
|
960
|
+
severity="error",
|
|
961
|
+
rubric_key="artifact_hygiene",
|
|
962
|
+
message=(
|
|
963
|
+
"triager output was written directly in the workbench repo root; "
|
|
964
|
+
"use the work_item artifact path from plan-subagents."
|
|
965
|
+
),
|
|
966
|
+
)
|
|
967
|
+
)
|
|
968
|
+
|
|
969
|
+
for key_path in _forbidden_key_hits(
|
|
970
|
+
output,
|
|
971
|
+
{"raw_markdown", "clinical_body", "html", "images", "embeddings", "api_keys"},
|
|
972
|
+
):
|
|
973
|
+
issues.append(
|
|
974
|
+
_issue(
|
|
975
|
+
code="forbidden_output_key",
|
|
976
|
+
severity="error",
|
|
977
|
+
rubric_key="evidence_redaction",
|
|
978
|
+
message=f"output contains forbidden key {key_path}",
|
|
979
|
+
)
|
|
980
|
+
)
|
|
981
|
+
|
|
982
|
+
output_raw_file = str(output.get("raw_file") or "")
|
|
983
|
+
if not output_raw_file:
|
|
984
|
+
issues.append(
|
|
985
|
+
_issue(
|
|
986
|
+
code="missing_raw_file",
|
|
987
|
+
severity="error",
|
|
988
|
+
rubric_key="scope_control",
|
|
989
|
+
message="triager output must include raw_file.",
|
|
990
|
+
)
|
|
991
|
+
)
|
|
992
|
+
elif not _paths_match(output_raw_file, raw_file):
|
|
993
|
+
issues.append(
|
|
994
|
+
_issue(
|
|
995
|
+
code="raw_file_mismatch",
|
|
996
|
+
severity="error",
|
|
997
|
+
rubric_key="scope_control",
|
|
998
|
+
message="triager output raw_file differs from assigned raw_file.",
|
|
999
|
+
)
|
|
1000
|
+
)
|
|
1001
|
+
|
|
1002
|
+
output_parts = _output_parts(output)
|
|
1003
|
+
decision = output_parts.decision
|
|
1004
|
+
note_plan = output_parts.note_plan
|
|
1005
|
+
normalized_plan: dict[str, Any] | None = None
|
|
1006
|
+
if decision not in {"triage", "discard"}:
|
|
1007
|
+
issues.append(
|
|
1008
|
+
_issue(
|
|
1009
|
+
code="invalid_decision",
|
|
1010
|
+
severity="error",
|
|
1011
|
+
rubric_key="output_contract",
|
|
1012
|
+
message="triager decision must be triage or discard.",
|
|
1013
|
+
)
|
|
1014
|
+
)
|
|
1015
|
+
elif decision == "triage":
|
|
1016
|
+
if note_plan is None:
|
|
1017
|
+
issues.append(
|
|
1018
|
+
_issue(
|
|
1019
|
+
code="missing_note_plan",
|
|
1020
|
+
severity="error",
|
|
1021
|
+
rubric_key="output_contract",
|
|
1022
|
+
message="triage decision requires note_plan.",
|
|
1023
|
+
)
|
|
1024
|
+
)
|
|
1025
|
+
else:
|
|
1026
|
+
try:
|
|
1027
|
+
normalized_plan = normalize_triage_note_plan(note_plan, raw_file)
|
|
1028
|
+
except ValidationError as exc:
|
|
1029
|
+
issues.append(
|
|
1030
|
+
_issue(
|
|
1031
|
+
code="note_plan_invalid",
|
|
1032
|
+
severity="error",
|
|
1033
|
+
rubric_key="output_contract",
|
|
1034
|
+
message=str(exc),
|
|
1035
|
+
)
|
|
1036
|
+
)
|
|
1037
|
+
elif not str(output.get("reason") or "").strip():
|
|
1038
|
+
issues.append(
|
|
1039
|
+
_issue(
|
|
1040
|
+
code="missing_discard_reason",
|
|
1041
|
+
severity="error",
|
|
1042
|
+
rubric_key="output_contract",
|
|
1043
|
+
message="discard decision requires reason.",
|
|
1044
|
+
)
|
|
1045
|
+
)
|
|
1046
|
+
|
|
1047
|
+
metric_issues, metrics = _metrics_issues(output, require_agent_metrics=require_agent_metrics)
|
|
1048
|
+
issues.extend(metric_issues)
|
|
1049
|
+
receipt_issues, run_receipt = _subagent_run_receipt_issues(
|
|
1050
|
+
receipt_path=subagent_run_receipt_path,
|
|
1051
|
+
raw_file=raw_file,
|
|
1052
|
+
output_path=output_path,
|
|
1053
|
+
require_subagent_run_receipt=require_subagent_run_receipt,
|
|
1054
|
+
signing_key=subagent_runner_signing_key,
|
|
1055
|
+
)
|
|
1056
|
+
issues.extend(receipt_issues)
|
|
1057
|
+
expectation_issues = _expectation_issues(
|
|
1058
|
+
expectations=expectations,
|
|
1059
|
+
decision=decision,
|
|
1060
|
+
normalized_plan=normalized_plan,
|
|
1061
|
+
)
|
|
1062
|
+
issues.extend(expectation_issues)
|
|
1063
|
+
|
|
1064
|
+
issue_count = len(issues)
|
|
1065
|
+
error_count = sum(1 for issue in issues if issue.get("severity") == "error")
|
|
1066
|
+
failed_expectation_count = len(expectation_issues)
|
|
1067
|
+
quality_flags = []
|
|
1068
|
+
if not metrics.get("present") or not metrics.get("valid"):
|
|
1069
|
+
quality_flags.append("metric_coverage_incomplete")
|
|
1070
|
+
if receipt_issues or (require_subagent_run_receipt and not run_receipt.valid):
|
|
1071
|
+
quality_flags.append("agent_output_provenance_incomplete")
|
|
1072
|
+
if any(issue.get("rubric_key") == "artifact_hygiene" for issue in issues):
|
|
1073
|
+
quality_flags.append("agent_artifact_path_invalid")
|
|
1074
|
+
if failed_expectation_count:
|
|
1075
|
+
quality_flags.append("golden_expectation_failed")
|
|
1076
|
+
|
|
1077
|
+
report = {
|
|
1078
|
+
"schema": TRIAGER_PROMPT_EVAL_SCHEMA,
|
|
1079
|
+
"phase": "triage",
|
|
1080
|
+
"input_fingerprints": {
|
|
1081
|
+
"raw_file": str(raw_file),
|
|
1082
|
+
"raw_file_hash": _file_sha256(raw_file),
|
|
1083
|
+
"output_hash": canonical_payload_hash(output),
|
|
1084
|
+
"output_file_hash": _file_sha256(output_path),
|
|
1085
|
+
"subagent_run_receipt_path": str(subagent_run_receipt_path) if subagent_run_receipt_path else "",
|
|
1086
|
+
"subagent_run_receipt_hash": run_receipt.receipt_hash,
|
|
1087
|
+
"note_plan_hash": note_plan_hash(normalized_plan) if normalized_plan else "",
|
|
1088
|
+
"evaluation_expectations_present": bool(expectations),
|
|
1089
|
+
"evaluation_expectations_hash": canonical_payload_hash(expectations) if expectations else "",
|
|
1090
|
+
},
|
|
1091
|
+
"status": "pass" if error_count == 0 else "needs_review",
|
|
1092
|
+
"aggregate": {
|
|
1093
|
+
"score": _score(issues),
|
|
1094
|
+
"issue_count": issue_count,
|
|
1095
|
+
"error_count": error_count,
|
|
1096
|
+
"redaction_issue_count": sum(1 for issue in issues if issue.get("rubric_key") == "evidence_redaction"),
|
|
1097
|
+
"quality_flags": quality_flags,
|
|
1098
|
+
"metric_coverage": {
|
|
1099
|
+
"items_with_agent_metrics": 1 if metrics.get("present") and metrics.get("valid") else 0,
|
|
1100
|
+
"items_total": 1,
|
|
1101
|
+
"status": "complete" if metrics.get("present") and metrics.get("valid") else "incomplete",
|
|
1102
|
+
},
|
|
1103
|
+
"subagent_run_receipt_coverage": {
|
|
1104
|
+
"present": run_receipt.present,
|
|
1105
|
+
"valid": run_receipt.valid,
|
|
1106
|
+
"required": run_receipt.required,
|
|
1107
|
+
"signature_status": run_receipt.signature_status,
|
|
1108
|
+
},
|
|
1109
|
+
"expectation_coverage": {
|
|
1110
|
+
"items_with_expectations": 1 if expectations else 0,
|
|
1111
|
+
"items_total": 1,
|
|
1112
|
+
"failed_expectation_count": failed_expectation_count,
|
|
1113
|
+
},
|
|
1114
|
+
"efficiency": {
|
|
1115
|
+
"total_prompt_tokens": int(metrics.get("prompt_tokens") or 0),
|
|
1116
|
+
"total_completion_tokens": int(metrics.get("completion_tokens") or 0),
|
|
1117
|
+
"total_retries": int(metrics.get("retries") or 0),
|
|
1118
|
+
"turns_used": int(metrics.get("turns_used") or 0),
|
|
1119
|
+
"turn_budget_exceeded_count": sum(1 for issue in issues if issue.get("code") == "turn_budget_exceeded"),
|
|
1120
|
+
},
|
|
1121
|
+
"note_plan": note_plan_summary(normalized_plan)
|
|
1122
|
+
if normalized_plan
|
|
1123
|
+
else {
|
|
1124
|
+
"note_plan_item_count": 0,
|
|
1125
|
+
"note_plan_planned_meaning_count": 0,
|
|
1126
|
+
"note_plan_attach_count": 0,
|
|
1127
|
+
"note_plan_not_a_note_count": 0,
|
|
1128
|
+
"note_plan_needs_context_count": 0,
|
|
1129
|
+
},
|
|
1130
|
+
},
|
|
1131
|
+
"issues": issues,
|
|
1132
|
+
"agent_metrics": metrics,
|
|
1133
|
+
"subagent_run_receipt": run_receipt.to_payload(),
|
|
1134
|
+
"next_action": "" if error_count == 0 else TRIAGER_EVAL_RETRY_NEXT_ACTION,
|
|
1135
|
+
}
|
|
1136
|
+
if baseline_eval_path is not None:
|
|
1137
|
+
comparison = _compare_to_baseline(current=report, baseline_path=baseline_eval_path)
|
|
1138
|
+
report["comparison"] = comparison
|
|
1139
|
+
if comparison.get("status") == "not_comparable":
|
|
1140
|
+
report["aggregate"]["quality_flags"].append("baseline_not_comparable")
|
|
1141
|
+
report["status"] = "needs_review"
|
|
1142
|
+
report["next_action"] = "usar o mesmo corpus de ouro antes de comparar triager prompt baselines"
|
|
1143
|
+
elif comparison.get("status") == "regressed":
|
|
1144
|
+
report["aggregate"]["quality_flags"].append("baseline_regression")
|
|
1145
|
+
report["status"] = "needs_review"
|
|
1146
|
+
report["next_action"] = "revisar regressao contra baseline antes de triage --note-plan"
|
|
1147
|
+
return report
|
|
1148
|
+
|
|
1149
|
+
|
|
1150
|
+
def validate_triager_prompt_eval_for_note_plan(
|
|
1151
|
+
*,
|
|
1152
|
+
eval_path: Path,
|
|
1153
|
+
raw_file: Path,
|
|
1154
|
+
note_plan: dict[str, Any],
|
|
1155
|
+
require_subagent_run_receipt: bool = True,
|
|
1156
|
+
) -> dict[str, Any]:
|
|
1157
|
+
"""Validate that a triager eval report approves this exact raw/note_plan."""
|
|
1158
|
+
|
|
1159
|
+
raw_report = _read_json_object(eval_path, label="triager prompt eval")
|
|
1160
|
+
try:
|
|
1161
|
+
report_model = _TriagerPromptEvalReport.model_validate(raw_report)
|
|
1162
|
+
except PydanticValidationError as exc:
|
|
1163
|
+
raise ValidationError(
|
|
1164
|
+
"triager_eval_invalid: triager eval report contract invalid; regenerate with eval-triager-output"
|
|
1165
|
+
) from exc
|
|
1166
|
+
if report_model.schema_ != TRIAGER_PROMPT_EVAL_SCHEMA:
|
|
1167
|
+
raise ValidationError(
|
|
1168
|
+
f"triager_eval_invalid: triager eval report must use schema {TRIAGER_PROMPT_EVAL_SCHEMA}"
|
|
1169
|
+
)
|
|
1170
|
+
if report_model.status != "pass":
|
|
1171
|
+
raise ValidationError(
|
|
1172
|
+
f"triager_eval_failed: eval-triager-output did not pass; {TRIAGER_EVAL_RETRY_NEXT_ACTION}"
|
|
1173
|
+
)
|
|
1174
|
+
fingerprints = report_model.input_fingerprints
|
|
1175
|
+
report_raw_file = fingerprints.raw_file
|
|
1176
|
+
if not report_raw_file or not _paths_match(report_raw_file, raw_file):
|
|
1177
|
+
raise ValidationError(
|
|
1178
|
+
"triager_eval_stale: triager eval raw_file does not match --raw-file; regenerate eval-triager-output"
|
|
1179
|
+
)
|
|
1180
|
+
report_raw_hash = fingerprints.raw_file_hash
|
|
1181
|
+
if report_raw_hash and report_raw_hash != _file_sha256(raw_file):
|
|
1182
|
+
raise ValidationError(
|
|
1183
|
+
"triager_eval_stale: raw chat changed after eval-triager-output; regenerate triager output/eval"
|
|
1184
|
+
)
|
|
1185
|
+
normalized_plan = normalize_triage_note_plan(note_plan, raw_file)
|
|
1186
|
+
expected_hash = note_plan_hash(normalized_plan)
|
|
1187
|
+
report_plan_hash = fingerprints.note_plan_hash
|
|
1188
|
+
if not report_plan_hash:
|
|
1189
|
+
raise ValidationError(
|
|
1190
|
+
"triager_eval_stale: triager eval report missing note_plan_hash; regenerate eval-triager-output"
|
|
1191
|
+
)
|
|
1192
|
+
if report_plan_hash != expected_hash:
|
|
1193
|
+
raise ValidationError(
|
|
1194
|
+
"triager_eval_stale: triager eval note_plan_hash does not match --note-plan; "
|
|
1195
|
+
"regenerar eval-triager-output para o note_plan atual antes de triage"
|
|
1196
|
+
)
|
|
1197
|
+
if require_subagent_run_receipt:
|
|
1198
|
+
receipt_coverage = report_model.aggregate.subagent_run_receipt_coverage
|
|
1199
|
+
if receipt_coverage.valid is not True:
|
|
1200
|
+
raise ValidationError(
|
|
1201
|
+
"triager_eval_missing_subagent_run_receipt: triage mutante exige "
|
|
1202
|
+
"subagent_run_receipt válido emitido pelo runner oficial; rerun med-chat-triager "
|
|
1203
|
+
"pela rota oficial e repita eval-triager-output com --subagent-run-receipt."
|
|
1204
|
+
)
|
|
1205
|
+
receipt_path_text = fingerprints.subagent_run_receipt_path.strip()
|
|
1206
|
+
if not receipt_path_text:
|
|
1207
|
+
raise ValidationError(
|
|
1208
|
+
"triager_eval_invalid: triager eval claims subagent_run_receipt coverage but "
|
|
1209
|
+
"does not point to subagent_run_receipt_path; regenerate eval-triager-output with "
|
|
1210
|
+
"--subagent-run-receipt."
|
|
1211
|
+
)
|
|
1212
|
+
receipt_path = Path(receipt_path_text)
|
|
1213
|
+
report_receipt_hash = fingerprints.subagent_run_receipt_hash.strip()
|
|
1214
|
+
if not report_receipt_hash:
|
|
1215
|
+
raise ValidationError(
|
|
1216
|
+
"triager_eval_invalid: triager eval missing subagent_run_receipt_hash; "
|
|
1217
|
+
"regenerate eval-triager-output with --subagent-run-receipt."
|
|
1218
|
+
)
|
|
1219
|
+
try:
|
|
1220
|
+
actual_receipt_hash = _file_sha256(receipt_path)
|
|
1221
|
+
except FileNotFoundError as exc:
|
|
1222
|
+
raise ValidationError(
|
|
1223
|
+
"triager_eval_invalid: subagent_run_receipt_path not found; "
|
|
1224
|
+
"regenerate eval-triager-output with the official signed receipt."
|
|
1225
|
+
) from exc
|
|
1226
|
+
if report_receipt_hash != actual_receipt_hash:
|
|
1227
|
+
raise ValidationError(
|
|
1228
|
+
"triager_eval_stale: subagent_run_receipt changed after eval-triager-output; "
|
|
1229
|
+
"rerun med-chat-triager/eval through the official runner."
|
|
1230
|
+
)
|
|
1231
|
+
try:
|
|
1232
|
+
receipt_payload = _read_json_object(receipt_path, label="subagent run receipt")
|
|
1233
|
+
receipt = _SubagentRunReceipt.model_validate(receipt_payload)
|
|
1234
|
+
except (ValidationError, PydanticValidationError) as exc:
|
|
1235
|
+
raise ValidationError(
|
|
1236
|
+
"triager_eval_invalid: subagent_run_receipt contract invalid; "
|
|
1237
|
+
"regenerate eval-triager-output with an official signed subagent-run-receipt."
|
|
1238
|
+
) from exc
|
|
1239
|
+
output_path_text = receipt.output_path.strip()
|
|
1240
|
+
if not output_path_text:
|
|
1241
|
+
raise ValidationError(
|
|
1242
|
+
"triager_eval_invalid: subagent_run_receipt missing output_path; "
|
|
1243
|
+
"regenerate the triager output through the official runner."
|
|
1244
|
+
)
|
|
1245
|
+
output_path = Path(output_path_text)
|
|
1246
|
+
if not output_path.exists():
|
|
1247
|
+
raise ValidationError(
|
|
1248
|
+
"triager_eval_invalid: signed subagent_run_receipt points to missing triager output; "
|
|
1249
|
+
"regenerate the triager output through the official runner."
|
|
1250
|
+
)
|
|
1251
|
+
receipt_issues, receipt_status = _subagent_run_receipt_issues(
|
|
1252
|
+
receipt_path=receipt_path,
|
|
1253
|
+
raw_file=raw_file,
|
|
1254
|
+
output_path=output_path,
|
|
1255
|
+
require_subagent_run_receipt=True,
|
|
1256
|
+
)
|
|
1257
|
+
if receipt_issues or receipt_status.valid is not True:
|
|
1258
|
+
issue_codes = ", ".join(issue.get("code", "unknown") for issue in receipt_issues)
|
|
1259
|
+
raise ValidationError(
|
|
1260
|
+
"triager_eval_invalid: subagent_run_receipt failed signed-chain validation"
|
|
1261
|
+
+ (f" ({issue_codes})" if issue_codes else "")
|
|
1262
|
+
+ "; regenerate via the official med-chat-triager runner."
|
|
1263
|
+
)
|
|
1264
|
+
report_output_file_hash = fingerprints.output_file_hash.strip()
|
|
1265
|
+
if not report_output_file_hash:
|
|
1266
|
+
raise ValidationError(
|
|
1267
|
+
"triager_eval_invalid: triager eval missing output_file_hash; regenerate eval-triager-output."
|
|
1268
|
+
)
|
|
1269
|
+
actual_output_file_hash = _file_sha256(output_path)
|
|
1270
|
+
if report_output_file_hash != actual_output_file_hash:
|
|
1271
|
+
raise ValidationError(
|
|
1272
|
+
"triager_eval_stale: triager output changed after eval-triager-output; regenerate eval."
|
|
1273
|
+
)
|
|
1274
|
+
output_payload = _read_json_object(output_path, label="triager output")
|
|
1275
|
+
report_output_hash = fingerprints.output_hash.strip()
|
|
1276
|
+
if not report_output_hash:
|
|
1277
|
+
raise ValidationError(
|
|
1278
|
+
"triager_eval_invalid: triager eval missing output_hash; regenerate eval-triager-output."
|
|
1279
|
+
)
|
|
1280
|
+
if report_output_hash != canonical_payload_hash(output_payload):
|
|
1281
|
+
raise ValidationError(
|
|
1282
|
+
"triager_eval_stale: triager output payload changed after eval-triager-output; regenerate eval."
|
|
1283
|
+
)
|
|
1284
|
+
output_parts = _output_parts(output_payload)
|
|
1285
|
+
if output_parts.decision != "triage" or output_parts.note_plan is None:
|
|
1286
|
+
raise ValidationError(
|
|
1287
|
+
"triager_eval_invalid: signed triager output does not contain a triage note_plan."
|
|
1288
|
+
)
|
|
1289
|
+
output_plan = normalize_triage_note_plan(output_parts.note_plan, raw_file)
|
|
1290
|
+
if note_plan_hash(output_plan) != expected_hash:
|
|
1291
|
+
raise ValidationError(
|
|
1292
|
+
"triager_eval_stale: --note-plan does not match the signed med-chat-triager output; "
|
|
1293
|
+
"do not patch note_plan manually."
|
|
1294
|
+
)
|
|
1295
|
+
return report_model.to_payload()
|