mednotes-opencode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (430) hide show
  1. package/.opencode/agents/med-chat-triager.md +204 -0
  2. package/.opencode/agents/med-flashcard-maker.md +63 -0
  3. package/.opencode/agents/med-knowledge-architect.md +230 -0
  4. package/.opencode/agents/med-link-graph-curator.md +177 -0
  5. package/.opencode/agents/med-publish-guard.md +62 -0
  6. package/.opencode/commands/flashcards.md +25 -0
  7. package/.opencode/commands/mednotes/create.md +25 -0
  8. package/.opencode/commands/mednotes/enrich.md +27 -0
  9. package/.opencode/commands/mednotes/fix-wiki.md +27 -0
  10. package/.opencode/commands/mednotes/history.md +22 -0
  11. package/.opencode/commands/mednotes/link-body.md +25 -0
  12. package/.opencode/commands/mednotes/link-related.md +27 -0
  13. package/.opencode/commands/mednotes/link.md +27 -0
  14. package/.opencode/commands/mednotes/pdf-library.md +27 -0
  15. package/.opencode/commands/mednotes/process-chats.md +23 -0
  16. package/.opencode/commands/mednotes/setup.md +21 -0
  17. package/.opencode/commands/mednotes/status.md +27 -0
  18. package/.opencode/commands/mednotes/telemetry.md +27 -0
  19. package/.opencode/commands/report.md +26 -0
  20. package/.opencode/mednotes/AGENTS.md +57 -0
  21. package/.opencode/mednotes/agents/med-chat-triager.md +197 -0
  22. package/.opencode/mednotes/agents/med-flashcard-maker.md +56 -0
  23. package/.opencode/mednotes/agents/med-knowledge-architect.md +224 -0
  24. package/.opencode/mednotes/agents/med-link-graph-curator.md +171 -0
  25. package/.opencode/mednotes/agents/med-publish-guard.md +55 -0
  26. package/.opencode/mednotes/contracts/.gitkeep +1 -0
  27. package/.opencode/mednotes/contracts/agents.json +116 -0
  28. package/.opencode/mednotes/contracts/opencode-plugin.json +70 -0
  29. package/.opencode/mednotes/docs/agent-prompt-hardening.md +567 -0
  30. package/.opencode/mednotes/docs/agent-role-contracts.md +94 -0
  31. package/.opencode/mednotes/docs/anki-mcp-twenty-rules.md +214 -0
  32. package/.opencode/mednotes/docs/anki-templates/README.md +39 -0
  33. package/.opencode/mednotes/docs/anki-templates/cloze.back.html +23 -0
  34. package/.opencode/mednotes/docs/anki-templates/cloze.front.html +14 -0
  35. package/.opencode/mednotes/docs/anki-templates/qa.back.html +24 -0
  36. package/.opencode/mednotes/docs/anki-templates/qa.front.html +14 -0
  37. package/.opencode/mednotes/docs/anki-templates/style.css +182 -0
  38. package/.opencode/mednotes/docs/atomicity-splitting-policy.md +113 -0
  39. package/.opencode/mednotes/docs/extension-docs.md +40 -0
  40. package/.opencode/mednotes/docs/flashcard-ingestion.md +278 -0
  41. package/.opencode/mednotes/docs/knowledge-architect.md +208 -0
  42. package/.opencode/mednotes/docs/merge-policy.md +110 -0
  43. package/.opencode/mednotes/docs/public-vocabulary.md +104 -0
  44. package/.opencode/mednotes/docs/semantic-linker.md +141 -0
  45. package/.opencode/mednotes/docs/taxonomy-policy.md +90 -0
  46. package/.opencode/mednotes/docs/triage-policy.md +187 -0
  47. package/.opencode/mednotes/docs/vault-version-control.md +758 -0
  48. package/.opencode/mednotes/docs/vocabulary-db-recovery.md +58 -0
  49. package/.opencode/mednotes/docs/workflow-output-contract.md +779 -0
  50. package/.opencode/mednotes/hooks/hooks.json +79 -0
  51. package/.opencode/mednotes/package-lock.json +6361 -0
  52. package/.opencode/mednotes/package.json +15 -0
  53. package/.opencode/mednotes/pyproject.toml +48 -0
  54. package/.opencode/mednotes/scripts/bootstrap_windows_python_uv.cmd +13 -0
  55. package/.opencode/mednotes/scripts/bootstrap_windows_python_uv.ps1 +172 -0
  56. package/.opencode/mednotes/scripts/enrich_notes.py +23 -0
  57. package/.opencode/mednotes/scripts/full_reset_windows_python_uv.cmd +13 -0
  58. package/.opencode/mednotes/scripts/hooks/antigravity_hook_status.mjs +212 -0
  59. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/antigravity.mjs +169 -0
  60. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/harness_payload.mjs +103 -0
  61. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/opencode_plugin.mjs +341 -0
  62. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/opencode_user_config_sync.mjs +177 -0
  63. package/.opencode/mednotes/scripts/hooks/mednotes_hook/anki_preflight.mjs +214 -0
  64. package/.opencode/mednotes/scripts/hooks/mednotes_hook/cli.mjs +143 -0
  65. package/.opencode/mednotes/scripts/hooks/mednotes_hook/diagnostics.mjs +11 -0
  66. package/.opencode/mednotes/scripts/hooks/mednotes_hook/domain/agent_directive_core.mjs +160 -0
  67. package/.opencode/mednotes/scripts/hooks/mednotes_hook/fsm_directive.mjs +1470 -0
  68. package/.opencode/mednotes/scripts/hooks/mednotes_hook/hook_errors.mjs +120 -0
  69. package/.opencode/mednotes/scripts/hooks/mednotes_hook/retention.mjs +114 -0
  70. package/.opencode/mednotes/scripts/hooks/mednotes_hook/runtime.mjs +174 -0
  71. package/.opencode/mednotes/scripts/hooks/mednotes_hook/telemetry_capture.mjs +511 -0
  72. package/.opencode/mednotes/scripts/hooks/mednotes_hook/vault_guard.mjs +624 -0
  73. package/.opencode/mednotes/scripts/hooks/mednotes_hook.mjs +5 -0
  74. package/.opencode/mednotes/scripts/mednotes/_runtime_paths.py +24 -0
  75. package/.opencode/mednotes/scripts/mednotes/anki_model_validator.py +18 -0
  76. package/.opencode/mednotes/scripts/mednotes/capture_extension_diff.py +1562 -0
  77. package/.opencode/mednotes/scripts/mednotes/feedback_report.py +16 -0
  78. package/.opencode/mednotes/scripts/mednotes/flashcard_index.py +18 -0
  79. package/.opencode/mednotes/scripts/mednotes/flashcard_pipeline.py +18 -0
  80. package/.opencode/mednotes/scripts/mednotes/flashcard_report.py +18 -0
  81. package/.opencode/mednotes/scripts/mednotes/flashcard_sources.py +18 -0
  82. package/.opencode/mednotes/scripts/mednotes/obsidian/README.md +6 -0
  83. package/.opencode/mednotes/scripts/mednotes/obsidian_note_utils.py +20 -0
  84. package/.opencode/mednotes/scripts/mednotes/pdf_library/cli.py +16 -0
  85. package/.opencode/mednotes/scripts/mednotes/project_fsm.py +229 -0
  86. package/.opencode/mednotes/scripts/mednotes/setup_telemetry_email.py +404 -0
  87. package/.opencode/mednotes/scripts/mednotes/sync_anki_twenty_rules.py +18 -0
  88. package/.opencode/mednotes/scripts/mednotes/sync_opencode_user_config.py +36 -0
  89. package/.opencode/mednotes/scripts/mednotes/wiki/cli.py +20 -0
  90. package/.opencode/mednotes/scripts/mednotes/wiki_graph.py +18 -0
  91. package/.opencode/mednotes/scripts/mednotes/wiki_tree.py +134 -0
  92. package/.opencode/mednotes/scripts/reset_windows_python_uv.ps1 +625 -0
  93. package/.opencode/mednotes/scripts/run_python.mjs +109 -0
  94. package/.opencode/mednotes/scripts/vault/vault_commit.ps1 +19 -0
  95. package/.opencode/mednotes/scripts/vault/vault_commit.sh +18 -0
  96. package/.opencode/mednotes/scripts/vault/vault_git.ps1 +19 -0
  97. package/.opencode/mednotes/scripts/vault/vault_git.py +3107 -0
  98. package/.opencode/mednotes/scripts/vault/vault_git.sh +18 -0
  99. package/.opencode/mednotes/scripts/vault/vault_precommit.ps1 +19 -0
  100. package/.opencode/mednotes/scripts/vault/vault_precommit.sh +18 -0
  101. package/.opencode/mednotes/skills/THIRD_PARTY_NOTICES.md +45 -0
  102. package/.opencode/mednotes/skills/create-medical-flashcards/SKILL.md +113 -0
  103. package/.opencode/mednotes/skills/create-medical-note/SKILL.md +90 -0
  104. package/.opencode/mednotes/skills/enrich-medical-note/SKILL.md +120 -0
  105. package/.opencode/mednotes/skills/fix-medical-wiki/SKILL.md +559 -0
  106. package/.opencode/mednotes/skills/link-medical-wiki/SKILL.md +224 -0
  107. package/.opencode/mednotes/skills/obsidian-cli/SKILL.md +118 -0
  108. package/.opencode/mednotes/skills/obsidian-markdown/SKILL.md +207 -0
  109. package/.opencode/mednotes/skills/obsidian-markdown/references/CALLOUTS.md +58 -0
  110. package/.opencode/mednotes/skills/obsidian-markdown/references/EMBEDS.md +63 -0
  111. package/.opencode/mednotes/skills/obsidian-markdown/references/PROPERTIES.md +61 -0
  112. package/.opencode/mednotes/skills/obsidian-ops/SKILL.md +136 -0
  113. package/.opencode/mednotes/skills/pdf-library/SKILL.md +45 -0
  114. package/.opencode/mednotes/skills/process-medical-chats/SKILL.md +246 -0
  115. package/.opencode/mednotes/skills/workflow-report/SKILL.md +100 -0
  116. package/.opencode/mednotes/src/mednotes/__init__.py +5 -0
  117. package/.opencode/mednotes/src/mednotes/domains/__init__.py +5 -0
  118. package/.opencode/mednotes/src/mednotes/domains/flashcards/README.md +26 -0
  119. package/.opencode/mednotes/src/mednotes/domains/flashcards/__init__.py +2 -0
  120. package/.opencode/mednotes/src/mednotes/domains/flashcards/build_demo_apkg.py +177 -0
  121. package/.opencode/mednotes/src/mednotes/domains/flashcards/contracts.py +385 -0
  122. package/.opencode/mednotes/src/mednotes/domains/flashcards/flashcards_machine.py +522 -0
  123. package/.opencode/mednotes/src/mednotes/domains/flashcards/fsm.py +817 -0
  124. package/.opencode/mednotes/src/mednotes/domains/flashcards/index.py +630 -0
  125. package/.opencode/mednotes/src/mednotes/domains/flashcards/install_models.py +445 -0
  126. package/.opencode/mednotes/src/mednotes/domains/flashcards/model.py +359 -0
  127. package/.opencode/mednotes/src/mednotes/domains/flashcards/obsidian_links.py +135 -0
  128. package/.opencode/mednotes/src/mednotes/domains/flashcards/obsidian_note_utils.py +546 -0
  129. package/.opencode/mednotes/src/mednotes/domains/flashcards/pipeline.py +580 -0
  130. package/.opencode/mednotes/src/mednotes/domains/flashcards/report.py +510 -0
  131. package/.opencode/mednotes/src/mednotes/domains/flashcards/sources.py +682 -0
  132. package/.opencode/mednotes/src/mednotes/domains/flashcards/sync_rules.py +184 -0
  133. package/.opencode/mednotes/src/mednotes/domains/history/__init__.py +1 -0
  134. package/.opencode/mednotes/src/mednotes/domains/history/history_fsm.py +852 -0
  135. package/.opencode/mednotes/src/mednotes/domains/history/history_machine.py +453 -0
  136. package/.opencode/mednotes/src/mednotes/domains/setup/__init__.py +7 -0
  137. package/.opencode/mednotes/src/mednotes/domains/setup/setup_fsm.py +808 -0
  138. package/.opencode/mednotes/src/mednotes/domains/setup/setup_machine.py +973 -0
  139. package/.opencode/mednotes/src/mednotes/domains/wiki/README.md +64 -0
  140. package/.opencode/mednotes/src/mednotes/domains/wiki/__init__.py +1 -0
  141. package/.opencode/mednotes/src/mednotes/domains/wiki/api.py +668 -0
  142. package/.opencode/mednotes/src/mednotes/domains/wiki/batch_state.py +102 -0
  143. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/__init__.py +1 -0
  144. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/atomicity/__init__.py +1 -0
  145. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/atomicity/atomicity.py +877 -0
  146. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/body_link/__init__.py +1 -0
  147. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/body_link/body_linker.py +1562 -0
  148. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/__init__.py +1 -0
  149. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/effect_adapters.py +949 -0
  150. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/fix_wiki_runtime_adapters.py +433 -0
  151. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/__init__.py +1 -0
  152. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/coverage.py +413 -0
  153. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/graph.py +396 -0
  154. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/graph_fixes.py +161 -0
  155. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/hygiene/__init__.py +1 -0
  156. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/hygiene/hygiene.py +483 -0
  157. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/__init__.py +2 -0
  158. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/anchors.py +185 -0
  159. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/__init__.py +0 -0
  160. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/cache.py +223 -0
  161. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/config.py +131 -0
  162. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/download.py +224 -0
  163. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/frontmatter.py +59 -0
  164. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/insert.py +227 -0
  165. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/local_import.py +54 -0
  166. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/__init__.py +42 -0
  167. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/web_profiles.py +99 -0
  168. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/web_search.py +203 -0
  169. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/wikimedia.py +102 -0
  170. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/__init__.py +1 -0
  171. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_db_adapter.mjs +434 -0
  172. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_node_runtime.py +274 -0
  173. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_query.py +227 -0
  174. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/__init__.py +1 -0
  175. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/artifacts.py +605 -0
  176. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/canonical_merge.py +277 -0
  177. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/markdown_zones.py +85 -0
  178. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/meaning_planner.py +307 -0
  179. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_iter.py +67 -0
  180. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_merge.py +278 -0
  181. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_plan.py +409 -0
  182. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_policy.py +22 -0
  183. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/__init__.py +79 -0
  184. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/fixes.py +264 -0
  185. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/frontmatter.py +435 -0
  186. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/models.py +208 -0
  187. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/prompts.py +37 -0
  188. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/tables.py +236 -0
  189. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/validate.py +404 -0
  190. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/provenance.py +478 -0
  191. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/raw_chats.py +273 -0
  192. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/sources_backfill.py +235 -0
  193. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/__init__.py +10 -0
  194. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/anchors.py +16 -0
  195. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/captions.py +47 -0
  196. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/cli.py +179 -0
  197. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/cloud.py +52 -0
  198. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/config.py +196 -0
  199. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/context_packets.py +76 -0
  200. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/db.py +81 -0
  201. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/doctor.py +102 -0
  202. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/figure_ids.py +42 -0
  203. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/ingest.py +326 -0
  204. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/insert.py +316 -0
  205. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/mentions.py +57 -0
  206. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/ocr.py +71 -0
  207. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/paths.py +35 -0
  208. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/pdf_engine.py +77 -0
  209. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/schema.py +155 -0
  210. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/search.py +188 -0
  211. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/__init__.py +1 -0
  212. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/app.py +89 -0
  213. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/image_backend.py +29 -0
  214. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/state.py +65 -0
  215. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/__init__.py +1 -0
  216. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish.py +1139 -0
  217. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish_receipts.py +365 -0
  218. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish_recovery.py +240 -0
  219. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/__init__.py +1 -0
  220. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_behavior_corpus.py +2069 -0
  221. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_report_validation.py +4448 -0
  222. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_run_audit.py +852 -0
  223. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/architect_prompt_eval.py +341 -0
  224. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/body_linker_eval.py +240 -0
  225. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/curator_output_validation.py +175 -0
  226. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/curator_prompt_eval.py +865 -0
  227. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/triager_prompt_eval.py +1295 -0
  228. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/__init__.py +1 -0
  229. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/related_notes.py +1920 -0
  230. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/related_notes_headless.py +1186 -0
  231. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/__init__.py +1 -0
  232. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/plan_attestation.py +148 -0
  233. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_receipts.py +360 -0
  234. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_runtime.py +52 -0
  235. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_task_runner.py +2470 -0
  236. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/style/__init__.py +1 -0
  237. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/style/style.py +1952 -0
  238. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/subagents/__init__.py +1 -0
  239. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/subagents/agents.py +1767 -0
  240. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/__init__.py +1 -0
  241. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/alias_projection.py +331 -0
  242. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/link_terms.py +151 -0
  243. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/llm_disambiguation.py +182 -0
  244. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/__init__.py +116 -0
  245. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/audit.py +201 -0
  246. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/migration.py +314 -0
  247. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/normalize.py +72 -0
  248. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/policy.py +135 -0
  249. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/resolve.py +413 -0
  250. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/schema.py +157 -0
  251. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/status.py +137 -0
  252. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_bootstrap.py +509 -0
  253. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_curator_batch.py +1115 -0
  254. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_ingestion.py +632 -0
  255. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_map.py +930 -0
  256. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_recovery.py +1388 -0
  257. package/.opencode/mednotes/src/mednotes/domains/wiki/cli.py +6665 -0
  258. package/.opencode/mednotes/src/mednotes/domains/wiki/common.py +69 -0
  259. package/.opencode/mednotes/src/mednotes/domains/wiki/config.py +210 -0
  260. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/__init__.py +74 -0
  261. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agent_report.py +242 -0
  262. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agent_run_audit.py +196 -0
  263. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agents.py +601 -0
  264. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/curator.py +256 -0
  265. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/effect_payloads.py +519 -0
  266. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/happy_path.py +190 -0
  267. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/link_git.py +110 -0
  268. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/link_runtime_artifact.py +52 -0
  269. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/note_plan.py +75 -0
  270. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/paths.py +114 -0
  271. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/public_report.py +53 -0
  272. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/publish.py +111 -0
  273. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/raw_coverage.py +217 -0
  274. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes.py +136 -0
  275. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes_headless.py +153 -0
  276. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes_runtime.py +395 -0
  277. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/schema_registry.py +637 -0
  278. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/specialist.py +432 -0
  279. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/status.py +62 -0
  280. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/style_rewrite.py +568 -0
  281. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/vocabulary_ingestion.py +223 -0
  282. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_blockers.py +510 -0
  283. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_guardrails.py +637 -0
  284. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_outcomes.py +121 -0
  285. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_receipts.py +100 -0
  286. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/__init__.py +1 -0
  287. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/__init__.py +1 -0
  288. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/__main__.py +4 -0
  289. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/cli.py +275 -0
  290. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/__init__.py +2 -0
  291. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/candidates.py +193 -0
  292. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/cli.py +189 -0
  293. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/gemini.py +220 -0
  294. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/inputs.py +120 -0
  295. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/models.py +34 -0
  296. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/parsing.py +48 -0
  297. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/prompts.py +216 -0
  298. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/quality.py +54 -0
  299. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/reporting.py +24 -0
  300. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/runner.py +433 -0
  301. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/utils.py +39 -0
  302. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/vault_guard_bridge.py +17 -0
  303. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/__init__.py +1 -0
  304. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_context_packets.py +454 -0
  305. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_decision_projection.py +133 -0
  306. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_effects.py +1260 -0
  307. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_fsm.py +2768 -0
  308. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_machine.py +1588 -0
  309. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_plan.py +306 -0
  310. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_primary_objective.py +316 -0
  311. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_problem.py +153 -0
  312. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_receipt_evidence.py +306 -0
  313. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_states.py +290 -0
  314. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_user_report.py +342 -0
  315. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/health.py +6332 -0
  316. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/__init__.py +1 -0
  317. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_fsm.py +1119 -0
  318. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_git.py +638 -0
  319. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_machine.py +1106 -0
  320. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_retry_governance.py +374 -0
  321. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_runtime_result.py +485 -0
  322. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_triggers.py +183 -0
  323. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/linking.py +2758 -0
  324. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/reference_repair.py +718 -0
  325. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/related_notes_fsm.py +1855 -0
  326. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link_related/__init__.py +1 -0
  327. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link_related/link_related_machine.py +834 -0
  328. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/__init__.py +1 -0
  329. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_fsm.py +1592 -0
  330. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_machine.py +3097 -0
  331. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_primary_objective.py +28 -0
  332. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_runtime_result.py +185 -0
  333. package/.opencode/mednotes/src/mednotes/domains/wiki/performance.py +97 -0
  334. package/.opencode/mednotes/src/mednotes/kernel/__init__.py +6 -0
  335. package/.opencode/mednotes/src/mednotes/kernel/agent_directive.py +336 -0
  336. package/.opencode/mednotes/src/mednotes/kernel/base.py +51 -0
  337. package/.opencode/mednotes/src/mednotes/kernel/blockers.py +39 -0
  338. package/.opencode/mednotes/src/mednotes/kernel/effect_executor.py +55 -0
  339. package/.opencode/mednotes/src/mednotes/kernel/effect_intent.py +69 -0
  340. package/.opencode/mednotes/src/mednotes/kernel/effects.py +160 -0
  341. package/.opencode/mednotes/src/mednotes/kernel/errors.py +38 -0
  342. package/.opencode/mednotes/src/mednotes/kernel/fsm_event.py +35 -0
  343. package/.opencode/mednotes/src/mednotes/kernel/fsm_model.py +55 -0
  344. package/.opencode/mednotes/src/mednotes/kernel/fsm_transition_result.py +75 -0
  345. package/.opencode/mednotes/src/mednotes/kernel/guardrails.py +188 -0
  346. package/.opencode/mednotes/src/mednotes/kernel/progress.py +319 -0
  347. package/.opencode/mednotes/src/mednotes/kernel/public_report.py +346 -0
  348. package/.opencode/mednotes/src/mednotes/kernel/state_machine.py +164 -0
  349. package/.opencode/mednotes/src/mednotes/kernel/workflow.py +619 -0
  350. package/.opencode/mednotes/src/mednotes/platform/__init__.py +5 -0
  351. package/.opencode/mednotes/src/mednotes/platform/backup_policy.py +382 -0
  352. package/.opencode/mednotes/src/mednotes/platform/feedback/__init__.py +62 -0
  353. package/.opencode/mednotes/src/mednotes/platform/feedback/cli.py +275 -0
  354. package/.opencode/mednotes/src/mednotes/platform/feedback/contracts.py +83 -0
  355. package/.opencode/mednotes/src/mednotes/platform/feedback/core.py +4168 -0
  356. package/.opencode/mednotes/src/mednotes/platform/feedback/integrity.py +989 -0
  357. package/.opencode/mednotes/src/mednotes/platform/feedback/operational_contract.py +2293 -0
  358. package/.opencode/mednotes/src/mednotes/platform/feedback/telemetry.py +875 -0
  359. package/.opencode/mednotes/src/mednotes/platform/feedback/telemetry_config.py +65 -0
  360. package/.opencode/mednotes/src/mednotes/platform/opencode_runtime_config.py +182 -0
  361. package/.opencode/mednotes/src/mednotes/platform/paths/__init__.py +1560 -0
  362. package/.opencode/mednotes/src/mednotes/platform/secrets.py +89 -0
  363. package/.opencode/mednotes/src/mednotes/platform/user_config.py +103 -0
  364. package/.opencode/mednotes/src/mednotes/platform/vault_guard.py +214 -0
  365. package/.opencode/mednotes/uv.lock +932 -0
  366. package/.opencode/mednotes.generated.json +395 -0
  367. package/.opencode/opencode.json +31 -0
  368. package/.opencode/plugins/mednotes-fsm.mjs +7 -0
  369. package/.opencode/plugins/mednotes_hook/adapters/antigravity.mjs +169 -0
  370. package/.opencode/plugins/mednotes_hook/adapters/harness_payload.mjs +103 -0
  371. package/.opencode/plugins/mednotes_hook/adapters/opencode_plugin.mjs +341 -0
  372. package/.opencode/plugins/mednotes_hook/adapters/opencode_user_config_sync.mjs +177 -0
  373. package/.opencode/plugins/mednotes_hook/anki_preflight.mjs +214 -0
  374. package/.opencode/plugins/mednotes_hook/cli.mjs +143 -0
  375. package/.opencode/plugins/mednotes_hook/diagnostics.mjs +11 -0
  376. package/.opencode/plugins/mednotes_hook/domain/agent_directive_core.mjs +160 -0
  377. package/.opencode/plugins/mednotes_hook/fsm_directive.mjs +1470 -0
  378. package/.opencode/plugins/mednotes_hook/hook_errors.mjs +120 -0
  379. package/.opencode/plugins/mednotes_hook/retention.mjs +114 -0
  380. package/.opencode/plugins/mednotes_hook/runtime.mjs +174 -0
  381. package/.opencode/plugins/mednotes_hook/telemetry_capture.mjs +511 -0
  382. package/.opencode/plugins/mednotes_hook/vault_guard.mjs +624 -0
  383. package/AGENTS.md +57 -0
  384. package/README.md +194 -0
  385. package/adapters/antigravity/agents.json +80 -0
  386. package/adapters/antigravity/templates/med-chat-triager.md +214 -0
  387. package/adapters/antigravity/templates/med-flashcard-maker.md +72 -0
  388. package/adapters/antigravity/templates/med-knowledge-architect.md +241 -0
  389. package/adapters/antigravity/templates/med-link-graph-curator.md +187 -0
  390. package/adapters/antigravity/templates/med-publish-guard.md +71 -0
  391. package/adapters/gemini-cli/gemini-extension.json +14 -0
  392. package/adapters/gemini-cli/package.json +15 -0
  393. package/adapters/gemini-cli/pyproject.toml +48 -0
  394. package/bin/mednotes-opencode.mjs +155 -0
  395. package/contracts/agents.json +116 -0
  396. package/core/agents/med-chat-triager.md +197 -0
  397. package/core/agents/med-flashcard-maker.md +56 -0
  398. package/core/agents/med-knowledge-architect.md +224 -0
  399. package/core/agents/med-link-graph-curator.md +171 -0
  400. package/core/agents/med-publish-guard.md +55 -0
  401. package/core/commands/flashcards.toml +22 -0
  402. package/core/commands/mednotes/create.toml +22 -0
  403. package/core/commands/mednotes/enrich.toml +24 -0
  404. package/core/commands/mednotes/fix-wiki.toml +24 -0
  405. package/core/commands/mednotes/history.toml +19 -0
  406. package/core/commands/mednotes/link-body.toml +22 -0
  407. package/core/commands/mednotes/link-related.toml +24 -0
  408. package/core/commands/mednotes/link.toml +24 -0
  409. package/core/commands/mednotes/pdf-library.toml +24 -0
  410. package/core/commands/mednotes/process-chats.toml +20 -0
  411. package/core/commands/mednotes/setup.toml +18 -0
  412. package/core/commands/mednotes/status.toml +24 -0
  413. package/core/commands/mednotes/telemetry.toml +24 -0
  414. package/core/commands/report.toml +23 -0
  415. package/core/skills/THIRD_PARTY_NOTICES.md +45 -0
  416. package/core/skills/create-medical-flashcards/SKILL.md +113 -0
  417. package/core/skills/create-medical-note/SKILL.md +90 -0
  418. package/core/skills/enrich-medical-note/SKILL.md +120 -0
  419. package/core/skills/fix-medical-wiki/SKILL.md +559 -0
  420. package/core/skills/link-medical-wiki/SKILL.md +224 -0
  421. package/core/skills/obsidian-cli/SKILL.md +118 -0
  422. package/core/skills/obsidian-markdown/SKILL.md +207 -0
  423. package/core/skills/obsidian-markdown/references/CALLOUTS.md +58 -0
  424. package/core/skills/obsidian-markdown/references/EMBEDS.md +63 -0
  425. package/core/skills/obsidian-markdown/references/PROPERTIES.md +61 -0
  426. package/core/skills/obsidian-ops/SKILL.md +136 -0
  427. package/core/skills/pdf-library/SKILL.md +45 -0
  428. package/core/skills/process-medical-chats/SKILL.md +246 -0
  429. package/core/skills/workflow-report/SKILL.md +100 -0
  430. package/package.json +45 -0
@@ -0,0 +1,865 @@
1
+ """Offline prompt-quality evaluation for med-link-graph-curator outputs."""
2
+ from __future__ import annotations
3
+
4
+ import hashlib
5
+ import json
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Any, Literal
9
+
10
+ from pydantic import BaseModel, ConfigDict, Field, StrictStr
11
+ from pydantic import ValidationError as PydanticValidationError
12
+
13
+ from mednotes.domains.wiki.capabilities.vocabulary.vocabulary_curator_batch import (
14
+ VOCABULARY_CURATOR_BATCH_OUTPUT_MANIFEST_SCHEMA,
15
+ VOCABULARY_CURATOR_BATCH_PLAN_SCHEMA,
16
+ curator_plan_hash,
17
+ )
18
+ from mednotes.domains.wiki.common import ValidationError
19
+ from mednotes.domains.wiki.contracts.curator import LinkPolicy
20
+ from mednotes.kernel.base import JsonObject, JsonObjectAdapter, JsonValue
21
+
22
+ CURATOR_PROMPT_EVAL_SCHEMA = "medical-notes-workbench.curator-prompt-eval.v1"
23
+ CURATOR_PROMPT_GOLDEN_EXPECTATIONS_SCHEMA = (
24
+ "medical-notes-workbench.curator-prompt-golden-expectations.v1"
25
+ )
26
+ CURATOR_PROMPT_EXPECTATIONS_SCHEMA = CURATOR_PROMPT_GOLDEN_EXPECTATIONS_SCHEMA
27
+
28
+
29
+ def _json_object_from_model(model: BaseModel, **dump_options: Any) -> JsonObject:
30
+ # Prompt eval reports cross a JSON boundary before they gate DB mutation;
31
+ # every field used for promotion is parsed into this local contract first.
32
+ return JsonObjectAdapter.validate_python(model.model_dump(mode="json", by_alias=True, **dump_options))
33
+
34
+
35
+ class _CuratorPromotionInputFingerprints(BaseModel):
36
+ model_config = ConfigDict(extra="forbid")
37
+
38
+ plan_hash: StrictStr
39
+ manifest_hash: StrictStr
40
+ prompt_identity_hash: StrictStr = ""
41
+ evaluation_expectations_present: bool = False
42
+ evaluation_expectations_hash: StrictStr = ""
43
+
44
+
45
+ class _CuratorPromotionExpectationCoverage(BaseModel):
46
+ model_config = ConfigDict(extra="forbid")
47
+
48
+ items_with_expectations: int = 0
49
+ items_total: int = 0
50
+ failed_expectation_count: int = 0
51
+ unused_expectation_count: int = 0
52
+ status: StrictStr = ""
53
+
54
+
55
+ class _CuratorPromotionAggregate(BaseModel):
56
+ model_config = ConfigDict(extra="forbid")
57
+
58
+ score: int = 0
59
+ item_count: int = 0
60
+ issue_count: int = 0
61
+ redaction_issue_count: int = 0
62
+ quality_flags: list[StrictStr] = Field(default_factory=list)
63
+ route_counts: JsonObject = Field(default_factory=dict)
64
+ metric_coverage: JsonObject = Field(default_factory=dict)
65
+ efficiency: JsonObject = Field(default_factory=dict)
66
+ expectation_coverage: _CuratorPromotionExpectationCoverage = Field(
67
+ default_factory=_CuratorPromotionExpectationCoverage
68
+ )
69
+ unused_expectation_work_ids: list[StrictStr] = Field(default_factory=list)
70
+
71
+
72
+ class _CuratorPromotionEvalReport(BaseModel):
73
+ model_config = ConfigDict(extra="forbid")
74
+
75
+ schema_: Literal["medical-notes-workbench.curator-prompt-eval.v1"] = Field(alias="schema")
76
+ phase: StrictStr = ""
77
+ prompt_identity: JsonObject = Field(default_factory=dict)
78
+ input_fingerprints: _CuratorPromotionInputFingerprints
79
+ prompt_eval_context: JsonObject = Field(default_factory=dict)
80
+ status: StrictStr
81
+ aggregate: _CuratorPromotionAggregate
82
+ items: list[JsonObject] = Field(default_factory=list)
83
+ aggregate_issues: list[JsonObject] = Field(default_factory=list)
84
+ next_action: StrictStr = ""
85
+ comparison: JsonObject | None = None
86
+ baseline_metadata: JsonObject | None = None
87
+
88
+ def to_payload(self) -> JsonObject:
89
+ return _json_object_from_model(self, exclude_none=True)
90
+
91
+
92
+ @dataclass(frozen=True)
93
+ class _AliasLinkPolicyProjection:
94
+ link_policy: str
95
+
96
+ @classmethod
97
+ def from_payload(cls, payload: JsonObject) -> _AliasLinkPolicyProjection:
98
+ # This projection deliberately reads only the field needed for direct
99
+ # alias counting; shape validation for full curator outputs remains in
100
+ # the vocabulary curator contracts.
101
+ value = payload["link_policy"] if "link_policy" in payload else ""
102
+ return cls(link_policy=value.strip() if isinstance(value, str) else "")
103
+
104
+
105
+ def _canonical_payload_hash(payload: Any) -> str:
106
+ encoded = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":")).encode("utf-8")
107
+ return f"sha256:{hashlib.sha256(encoded).hexdigest()}"
108
+
109
+
110
+ def canonical_payload_hash(payload: Any) -> str:
111
+ return _canonical_payload_hash(payload)
112
+
113
+
114
+ def _read_json_object(path: Path, *, label: str) -> dict[str, Any]:
115
+ try:
116
+ payload = json.loads(path.read_text(encoding="utf-8"))
117
+ except FileNotFoundError as exc:
118
+ raise ValidationError(f"{label} not found: {path}") from exc
119
+ except json.JSONDecodeError as exc:
120
+ raise ValidationError(f"{label} is invalid JSON: {path}: {exc}") from exc
121
+ if not isinstance(payload, dict):
122
+ raise ValidationError(f"{label} must be a JSON object: {path}")
123
+ return JsonObjectAdapter.validate_python(payload)
124
+
125
+
126
+ def load_curator_prompt_expectations(path: Path, *, expected_plan_hash: str | None = None) -> dict[str, Any]:
127
+ payload = _read_json_object(path, label="curator prompt golden expectations")
128
+ if payload.get("schema") != CURATOR_PROMPT_GOLDEN_EXPECTATIONS_SCHEMA:
129
+ raise ValidationError(
130
+ f"curator prompt golden expectations must use schema {CURATOR_PROMPT_GOLDEN_EXPECTATIONS_SCHEMA}"
131
+ )
132
+ source_plan_hash = str(payload.get("source_plan_hash") or "")
133
+ if expected_plan_hash and source_plan_hash and source_plan_hash != expected_plan_hash:
134
+ raise ValidationError("curator prompt golden expectations source_plan_hash mismatch")
135
+ expectations = payload.get("expectations_by_work_id")
136
+ if not isinstance(expectations, dict):
137
+ raise ValidationError("curator prompt golden expectations require expectations_by_work_id")
138
+ normalized: dict[str, Any] = {}
139
+ for work_id, expectation in expectations.items():
140
+ if not isinstance(expectation, dict):
141
+ raise ValidationError(f"expectation for work_id {work_id} must be a JSON object")
142
+ normalized[str(work_id)] = expectation
143
+ return normalized
144
+
145
+
146
+ def promote_curator_prompt_baseline(eval_path: Path) -> dict[str, Any]:
147
+ try:
148
+ report = _CuratorPromotionEvalReport.model_validate(_read_json_object(eval_path, label="curator prompt eval"))
149
+ except PydanticValidationError as exc:
150
+ raise ValidationError("curator prompt eval baseline promotion requires a valid eval report") from exc
151
+ if report.schema_ != CURATOR_PROMPT_EVAL_SCHEMA:
152
+ raise ValidationError(f"curator prompt eval must use schema {CURATOR_PROMPT_EVAL_SCHEMA}")
153
+ if report.status != "pass":
154
+ raise ValidationError("curator prompt baseline promotion requires status=pass")
155
+ fingerprints = report.input_fingerprints
156
+ if not fingerprints.plan_hash or not fingerprints.manifest_hash:
157
+ raise ValidationError("curator prompt baseline promotion requires input_fingerprints plan_hash and manifest_hash")
158
+ if not fingerprints.evaluation_expectations_present:
159
+ raise ValidationError("curator prompt baseline promotion requires golden expectations")
160
+ expectation_coverage = report.aggregate.expectation_coverage
161
+ if expectation_coverage.status != "complete":
162
+ raise ValidationError("curator prompt baseline promotion requires complete golden expectation coverage")
163
+ if expectation_coverage.unused_expectation_count != 0:
164
+ raise ValidationError("curator prompt baseline promotion rejects unused golden expectations")
165
+ baseline = report.to_payload()
166
+ baseline["baseline_metadata"] = {
167
+ "status": "active",
168
+ "source_eval_path": str(eval_path),
169
+ "source_eval_hash": _canonical_payload_hash(report.to_payload()),
170
+ }
171
+ return baseline
172
+
173
+
174
+ def build_curator_prompt_expectations_template(plan: dict[str, Any]) -> dict[str, Any]:
175
+ by_work_id = _plan_items(plan)
176
+ items: list[dict[str, str]] = []
177
+ expectations: dict[str, dict[str, Any]] = {}
178
+ for work_id, item in by_work_id.items():
179
+ route = item.get("difficulty_route") if isinstance(item.get("difficulty_route"), dict) else {}
180
+ items.append(
181
+ {
182
+ "work_id": work_id,
183
+ "note_path": str(item.get("note_path") or ""),
184
+ "title": str(item.get("title") or ""),
185
+ "route": str(route.get("route") or ""),
186
+ }
187
+ )
188
+ expectations[work_id] = {
189
+ "primary_label": "",
190
+ "required_aliases": [],
191
+ "expected_alias_policies": {},
192
+ "forbidden_direct_aliases": [],
193
+ "expected_deferred_work_codes": [],
194
+ }
195
+ return {
196
+ "schema": CURATOR_PROMPT_GOLDEN_EXPECTATIONS_SCHEMA,
197
+ "source_plan_hash": curator_plan_hash(plan),
198
+ "item_count": len(items),
199
+ "items": items,
200
+ "expectations_by_work_id": expectations,
201
+ }
202
+
203
+
204
+ def _plan_items(plan: dict[str, Any]) -> dict[str, dict[str, Any]]:
205
+ if plan.get("schema") != VOCABULARY_CURATOR_BATCH_PLAN_SCHEMA:
206
+ raise ValidationError(f"curator batch plan must use schema {VOCABULARY_CURATOR_BATCH_PLAN_SCHEMA}")
207
+ raw_items = plan.get("work_items")
208
+ if not isinstance(raw_items, list):
209
+ raise ValidationError("curator batch plan requires work_items[]")
210
+ items: dict[str, dict[str, Any]] = {}
211
+ for raw in raw_items:
212
+ if not isinstance(raw, dict) or not raw.get("work_id"):
213
+ raise ValidationError("curator batch work_items require work_id")
214
+ work_id = str(raw["work_id"])
215
+ if work_id in items:
216
+ raise ValidationError(f"duplicate work_id in curator batch plan: {work_id}")
217
+ items[work_id] = raw
218
+ return items
219
+
220
+
221
+ def _manifest_items(manifest_path: Path) -> list[dict[str, str]]:
222
+ manifest = _read_json_object(manifest_path, label="curator batch output manifest")
223
+ if manifest.get("schema") != VOCABULARY_CURATOR_BATCH_OUTPUT_MANIFEST_SCHEMA:
224
+ raise ValidationError(
225
+ f"curator batch manifest must use schema {VOCABULARY_CURATOR_BATCH_OUTPUT_MANIFEST_SCHEMA}"
226
+ )
227
+ raw_items = manifest.get("items")
228
+ if not isinstance(raw_items, list):
229
+ raise ValidationError("curator batch manifest requires items[]")
230
+ seen: set[str] = set()
231
+ items: list[dict[str, str]] = []
232
+ for raw in raw_items:
233
+ if not isinstance(raw, dict) or not raw.get("work_id") or not raw.get("output_path"):
234
+ raise ValidationError("each curator batch manifest item requires work_id and output_path")
235
+ work_id = str(raw["work_id"])
236
+ if work_id in seen:
237
+ raise ValidationError(f"duplicate work_id in curator batch manifest: {work_id}")
238
+ seen.add(work_id)
239
+ items.append({"work_id": work_id, "output_path": str(raw["output_path"])})
240
+ return items
241
+
242
+
243
+ def _manifest_payload_and_items(manifest_path: Path) -> tuple[dict[str, Any], list[dict[str, str]]]:
244
+ manifest = _read_json_object(manifest_path, label="curator batch output manifest")
245
+ if manifest.get("schema") != VOCABULARY_CURATOR_BATCH_OUTPUT_MANIFEST_SCHEMA:
246
+ raise ValidationError(
247
+ f"curator batch manifest must use schema {VOCABULARY_CURATOR_BATCH_OUTPUT_MANIFEST_SCHEMA}"
248
+ )
249
+ raw_items = manifest.get("items")
250
+ if not isinstance(raw_items, list):
251
+ raise ValidationError("curator batch manifest requires items[]")
252
+ seen: set[str] = set()
253
+ items: list[dict[str, str]] = []
254
+ for raw in raw_items:
255
+ if not isinstance(raw, dict) or not raw.get("work_id") or not raw.get("output_path"):
256
+ raise ValidationError("each curator batch manifest item requires work_id and output_path")
257
+ work_id = str(raw["work_id"])
258
+ if work_id in seen:
259
+ raise ValidationError(f"duplicate work_id in curator batch manifest: {work_id}")
260
+ seen.add(work_id)
261
+ items.append({"work_id": work_id, "output_path": str(raw["output_path"])})
262
+ return manifest, items
263
+
264
+
265
+ def _issue(*, code: str, severity: str, rubric_key: str, message: str) -> dict[str, str]:
266
+ return {"code": code, "severity": severity, "rubric_key": rubric_key, "message": message}
267
+
268
+
269
+ def _forbidden_key_hits(value: Any, forbidden: set[str], *, prefix: str = "$") -> list[str]:
270
+ hits: list[str] = []
271
+ if isinstance(value, dict):
272
+ for key, nested in value.items():
273
+ key_text = str(key)
274
+ path = f"{prefix}.{key_text}"
275
+ if key_text in forbidden:
276
+ hits.append(path)
277
+ hits.extend(_forbidden_key_hits(nested, forbidden, prefix=path))
278
+ elif isinstance(value, list):
279
+ for index, nested in enumerate(value):
280
+ hits.extend(_forbidden_key_hits(nested, forbidden, prefix=f"{prefix}[{index}]"))
281
+ return hits
282
+
283
+
284
+ def _direct_alias_count(payload: JsonObject) -> int:
285
+ aliases = _json_field(payload, "aliases")
286
+ if not isinstance(aliases, list):
287
+ return 0
288
+ total = 0
289
+ for alias in aliases:
290
+ if isinstance(alias, dict) and _AliasLinkPolicyProjection.from_payload(alias).link_policy == LinkPolicy.DIRECT:
291
+ total += 1
292
+ return total
293
+
294
+
295
+ def _norm_text(value: Any) -> str:
296
+ return " ".join(str(value or "").casefold().split())
297
+
298
+
299
+ def _alias_entries(payload: JsonObject) -> list[JsonObject]:
300
+ aliases = _json_field(payload, "aliases")
301
+ if not isinstance(aliases, list):
302
+ return []
303
+ return [alias for alias in aliases if isinstance(alias, dict)]
304
+
305
+
306
+ def _golden_assertion_count(expectations: JsonObject) -> int:
307
+ count = 0
308
+ if _norm_text(_json_field(expectations, "primary_label")):
309
+ count += 1
310
+ required_aliases = _json_field(expectations, "required_aliases")
311
+ if isinstance(required_aliases, list):
312
+ count += sum(1 for alias in required_aliases if _norm_text(alias))
313
+ expected_policies = _json_field(expectations, "expected_alias_policies")
314
+ if isinstance(expected_policies, dict):
315
+ count += sum(1 for alias_text in expected_policies if _norm_text(alias_text))
316
+ forbidden_direct_aliases = _json_field(expectations, "forbidden_direct_aliases")
317
+ if isinstance(forbidden_direct_aliases, list):
318
+ count += sum(1 for alias in forbidden_direct_aliases if _norm_text(alias))
319
+ expected_deferred_codes = _json_field(expectations, "expected_deferred_work_codes")
320
+ if isinstance(expected_deferred_codes, list):
321
+ count += sum(1 for code in expected_deferred_codes if _norm_text(code))
322
+ return count
323
+
324
+
325
+ def _json_field(source: JsonObject, key: str, default: JsonValue = None) -> JsonValue:
326
+ return source.get(key, default)
327
+
328
+
329
+ def _expectation_issues(*, expected: dict[str, Any], payload: dict[str, Any]) -> list[dict[str, str]]:
330
+ expectations = expected.get("evaluation_expectations")
331
+ if not isinstance(expectations, dict):
332
+ return []
333
+
334
+ issues: list[dict[str, str]] = []
335
+ expected_label = expectations.get("primary_label")
336
+ required_aliases = expectations.get("required_aliases")
337
+ expected_policies = expectations.get("expected_alias_policies")
338
+ forbidden_direct_aliases = expectations.get("forbidden_direct_aliases")
339
+ expected_deferred_codes = expectations.get("expected_deferred_work_codes")
340
+ assertion_count = _golden_assertion_count(expectations)
341
+ if assertion_count == 0:
342
+ issues.append(
343
+ _issue(
344
+ code="empty_golden_expectations",
345
+ severity="error",
346
+ rubric_key="golden_expectations",
347
+ message="golden expectations must contain at least one actionable assertion",
348
+ )
349
+ )
350
+ elif assertion_count < 2:
351
+ issues.append(
352
+ _issue(
353
+ code="weak_golden_expectations",
354
+ severity="error",
355
+ rubric_key="golden_expectations",
356
+ message="golden expectations must contain at least two actionable assertions",
357
+ )
358
+ )
359
+
360
+ primary = payload.get("primary_meaning") if isinstance(payload.get("primary_meaning"), dict) else {}
361
+ if expected_label and _norm_text(primary.get("label")) != _norm_text(expected_label):
362
+ issues.append(
363
+ _issue(
364
+ code="expected_primary_label_mismatch",
365
+ severity="error",
366
+ rubric_key="golden_expectations",
367
+ message="primary_meaning.label does not match evaluation_expectations.primary_label",
368
+ )
369
+ )
370
+
371
+ aliases = _alias_entries(payload)
372
+ alias_texts = {_norm_text(alias.get("text")) for alias in aliases}
373
+ if isinstance(required_aliases, list):
374
+ for alias in required_aliases:
375
+ if _norm_text(alias) not in alias_texts:
376
+ issues.append(
377
+ _issue(
378
+ code="missing_required_alias",
379
+ severity="error",
380
+ rubric_key="golden_expectations",
381
+ message=f"required alias absent: {alias}",
382
+ )
383
+ )
384
+
385
+ if isinstance(expected_policies, dict):
386
+ for alias_text, expected_policy in expected_policies.items():
387
+ matches = [alias for alias in aliases if _norm_text(alias.get("text")) == _norm_text(alias_text)]
388
+ if not matches:
389
+ issues.append(
390
+ _issue(
391
+ code="missing_expected_alias_policy",
392
+ severity="error",
393
+ rubric_key="golden_expectations",
394
+ message=f"alias with expected policy absent: {alias_text}",
395
+ )
396
+ )
397
+ continue
398
+ if not any(str(alias.get("link_policy") or "") == str(expected_policy) for alias in matches):
399
+ issues.append(
400
+ _issue(
401
+ code="alias_policy_mismatch",
402
+ severity="error",
403
+ rubric_key="golden_expectations",
404
+ message=f"alias policy mismatch for {alias_text}",
405
+ )
406
+ )
407
+
408
+ if isinstance(forbidden_direct_aliases, list):
409
+ forbidden = {_norm_text(alias) for alias in forbidden_direct_aliases}
410
+ for alias in aliases:
411
+ if _norm_text(alias.get("text")) in forbidden and str(alias.get("link_policy") or "") == "direct":
412
+ issues.append(
413
+ _issue(
414
+ code="forbidden_direct_alias",
415
+ severity="error",
416
+ rubric_key="golden_expectations",
417
+ message=f"alias must not be direct: {alias.get('text')}",
418
+ )
419
+ )
420
+
421
+ if isinstance(expected_deferred_codes, list):
422
+ deferred = payload.get("deferred_work_items")
423
+ actual_codes = {
424
+ _norm_text(item.get("code") or item.get("reason") or item.get("type"))
425
+ for item in deferred
426
+ if isinstance(item, dict)
427
+ } if isinstance(deferred, list) else set()
428
+ for code in expected_deferred_codes:
429
+ if _norm_text(code) not in actual_codes:
430
+ issues.append(
431
+ _issue(
432
+ code="missing_expected_deferred_work",
433
+ severity="error",
434
+ rubric_key="golden_expectations",
435
+ message=f"expected deferred work absent: {code}",
436
+ )
437
+ )
438
+
439
+ return issues
440
+
441
+
442
+ def _has_complex_signal(payload: dict[str, Any]) -> bool:
443
+ deferred = payload.get("deferred_work_items")
444
+ duplicates = payload.get("duplicate_candidates")
445
+ split_warning = payload.get("split_warning")
446
+ primary = payload.get("primary_meaning")
447
+ atomic_status = str(primary.get("atomic_status") or "") if isinstance(primary, dict) else ""
448
+ return (
449
+ (isinstance(deferred, list) and len(deferred) > 0)
450
+ or (isinstance(duplicates, list) and len(duplicates) > 0)
451
+ or bool(split_warning)
452
+ or atomic_status in {"non_atomic", "split_candidate", "uncertain"}
453
+ )
454
+
455
+
456
+ def _agent_metrics(payload: dict[str, Any]) -> dict[str, Any] | None:
457
+ metrics = payload.get("agent_metrics")
458
+ return metrics if isinstance(metrics, dict) else None
459
+
460
+
461
+ def _evaluate_payload(*, expected: dict[str, Any], payload: dict[str, Any]) -> tuple[list[dict[str, str]], dict[str, Any]]:
462
+ issues: list[dict[str, str]] = []
463
+ output_contract = expected.get("output_contract") if isinstance(expected.get("output_contract"), dict) else {}
464
+ required = output_contract.get("must_include") if isinstance(output_contract.get("must_include"), list) else []
465
+ forbidden = output_contract.get("must_not_include") if isinstance(output_contract.get("must_not_include"), list) else []
466
+
467
+ missing = [str(key) for key in required if str(key) not in payload]
468
+ if missing:
469
+ issues.append(
470
+ _issue(
471
+ code="missing_output_contract_fields",
472
+ severity="error",
473
+ rubric_key="output_contract",
474
+ message=f"missing required fields: {', '.join(missing)}",
475
+ )
476
+ )
477
+
478
+ forbidden_hits = _forbidden_key_hits(payload, {str(key) for key in forbidden})
479
+ for path in forbidden_hits:
480
+ issues.append(
481
+ _issue(
482
+ code="forbidden_output_key",
483
+ severity="error",
484
+ rubric_key="evidence_redaction",
485
+ message=f"forbidden evidence key present at {path}",
486
+ )
487
+ )
488
+ issues.extend(_expectation_issues(expected=expected, payload=payload))
489
+
490
+ route = expected.get("difficulty_route") if isinstance(expected.get("difficulty_route"), dict) else {}
491
+ route_name = str(route.get("route") or "unknown")
492
+ if route_name == "simple_atomic" and _direct_alias_count(payload) > 3:
493
+ issues.append(
494
+ _issue(
495
+ code="too_many_direct_aliases_for_simple_route",
496
+ severity="warning",
497
+ rubric_key="alias_precision",
498
+ message="simple_atomic output has more than three direct aliases; review for over-broad surfaces",
499
+ )
500
+ )
501
+ if route_name == "complex_semantic_review" and not _has_complex_signal(payload):
502
+ issues.append(
503
+ _issue(
504
+ code="complex_route_without_defer_or_split_signal",
505
+ severity="error",
506
+ rubric_key="defer_when_uncertain",
507
+ message="complex route output did not include deferred work, duplicate candidates, split warning, or uncertain atomic status",
508
+ )
509
+ )
510
+
511
+ metrics = _agent_metrics(payload)
512
+ metrics_summary: dict[str, Any] = {"present": metrics is not None}
513
+ if metrics is None:
514
+ issues.append(
515
+ _issue(
516
+ code="missing_agent_metrics",
517
+ severity="error",
518
+ rubric_key="efficiency_routing",
519
+ message="agent_metrics is required so prompt quality can be evaluated for efficiency",
520
+ )
521
+ )
522
+ else:
523
+ max_turns = int(route.get("max_turns") or 0)
524
+ turns_used = int(metrics.get("turns_used") or 0)
525
+ prompt_tokens = int(metrics.get("prompt_tokens") or 0)
526
+ completion_tokens = int(metrics.get("completion_tokens") or 0)
527
+ retries = int(metrics.get("retries") or 0)
528
+ token_accounting = str(metrics.get("token_accounting") or "")
529
+ metrics_summary.update(
530
+ {
531
+ "token_accounting": token_accounting,
532
+ "turns_used": turns_used,
533
+ "max_turns": max_turns,
534
+ "prompt_tokens": prompt_tokens,
535
+ "completion_tokens": completion_tokens,
536
+ "retries": retries,
537
+ }
538
+ )
539
+ if max_turns and turns_used > max_turns:
540
+ issues.append(
541
+ _issue(
542
+ code="turn_budget_exceeded",
543
+ severity="warning",
544
+ rubric_key="efficiency_routing",
545
+ message=f"turns_used={turns_used} exceeds route max_turns={max_turns}",
546
+ )
547
+ )
548
+ if token_accounting not in {"exact", "estimated", "unavailable"}:
549
+ issues.append(
550
+ _issue(
551
+ code="agent_metrics_token_accounting_missing",
552
+ severity="warning",
553
+ rubric_key="efficiency_routing",
554
+ message="agent_metrics.token_accounting must be exact, estimated, or unavailable",
555
+ )
556
+ )
557
+ if retries > 1:
558
+ issues.append(
559
+ _issue(
560
+ code="retry_count_high",
561
+ severity="warning",
562
+ rubric_key="efficiency_routing",
563
+ message=f"retries={retries}; inspect prompt clarity or packet completeness",
564
+ )
565
+ )
566
+ return issues, metrics_summary
567
+
568
+
569
+ def _score(issues: list[dict[str, str]]) -> int:
570
+ penalty = 0
571
+ for issue in issues:
572
+ penalty += 25 if issue.get("severity") == "error" else 10
573
+ return max(0, 100 - penalty)
574
+
575
+
576
+ def _aggregate_efficiency(report: dict[str, Any]) -> dict[str, Any]:
577
+ aggregate = report.get("aggregate") if isinstance(report.get("aggregate"), dict) else {}
578
+ return aggregate.get("efficiency") if isinstance(aggregate.get("efficiency"), dict) else {}
579
+
580
+
581
+ def _input_fingerprints(report: dict[str, Any]) -> dict[str, Any]:
582
+ fingerprints = report.get("input_fingerprints")
583
+ return fingerprints if isinstance(fingerprints, dict) else {}
584
+
585
+
586
+ def _compare_to_baseline(*, current: dict[str, Any], baseline_path: Path) -> dict[str, Any]:
587
+ baseline = _read_json_object(baseline_path, label="curator prompt eval baseline")
588
+ if baseline.get("schema") != CURATOR_PROMPT_EVAL_SCHEMA:
589
+ raise ValidationError(f"curator prompt eval baseline must use schema {CURATOR_PROMPT_EVAL_SCHEMA}")
590
+ current_aggregate = current.get("aggregate") if isinstance(current.get("aggregate"), dict) else {}
591
+ baseline_aggregate = baseline.get("aggregate") if isinstance(baseline.get("aggregate"), dict) else {}
592
+ current_efficiency = _aggregate_efficiency(current)
593
+ baseline_efficiency = _aggregate_efficiency(baseline)
594
+ current_prompt = current.get("prompt_identity") if isinstance(current.get("prompt_identity"), dict) else {}
595
+ baseline_prompt = baseline.get("prompt_identity") if isinstance(baseline.get("prompt_identity"), dict) else {}
596
+ current_fingerprints = _input_fingerprints(current)
597
+ baseline_fingerprints = _input_fingerprints(baseline)
598
+ comparison: dict[str, Any] = {
599
+ "baseline_status": str(baseline.get("status") or ""),
600
+ "current_status": str(current.get("status") or ""),
601
+ "score_delta": int(current_aggregate.get("score") or 0) - int(baseline_aggregate.get("score") or 0),
602
+ "issue_count_delta": int(current_aggregate.get("issue_count") or 0)
603
+ - int(baseline_aggregate.get("issue_count") or 0),
604
+ "total_prompt_tokens_delta": int(current_efficiency.get("total_prompt_tokens") or 0)
605
+ - int(baseline_efficiency.get("total_prompt_tokens") or 0),
606
+ "total_completion_tokens_delta": int(current_efficiency.get("total_completion_tokens") or 0)
607
+ - int(baseline_efficiency.get("total_completion_tokens") or 0),
608
+ "total_retries_delta": int(current_efficiency.get("total_retries") or 0)
609
+ - int(baseline_efficiency.get("total_retries") or 0),
610
+ "turn_budget_exceeded_count_delta": int(current_efficiency.get("turn_budget_exceeded_count") or 0)
611
+ - int(baseline_efficiency.get("turn_budget_exceeded_count") or 0),
612
+ "prompt_identity_changed": str(current_prompt.get("aggregate_hash") or "")
613
+ != str(baseline_prompt.get("aggregate_hash") or ""),
614
+ }
615
+ comparability_flags: list[str] = []
616
+ baseline_metadata = baseline.get("baseline_metadata") if isinstance(baseline.get("baseline_metadata"), dict) else {}
617
+ if baseline_metadata.get("status") != "active":
618
+ comparability_flags.append("baseline_not_promoted")
619
+ current_metadata = current.get("baseline_metadata") if isinstance(current.get("baseline_metadata"), dict) else {}
620
+ if current_metadata and current_metadata.get("status") != "active":
621
+ comparability_flags.append("current_baseline_metadata_invalid")
622
+ current_expectations_present = bool(current_fingerprints.get("evaluation_expectations_present"))
623
+ baseline_expectations_present = bool(baseline_fingerprints.get("evaluation_expectations_present"))
624
+ if not baseline_expectations_present:
625
+ comparability_flags.append("baseline_missing_golden_expectations")
626
+ if not current_expectations_present:
627
+ comparability_flags.append("current_missing_golden_expectations")
628
+ if current_expectations_present or baseline_expectations_present:
629
+ current_expectations_hash = str(current_fingerprints.get("evaluation_expectations_hash") or "")
630
+ baseline_expectations_hash = str(baseline_fingerprints.get("evaluation_expectations_hash") or "")
631
+ if current_expectations_hash != baseline_expectations_hash:
632
+ comparability_flags.append("evaluation_expectations_changed")
633
+ regression_flags: list[str] = []
634
+ if comparison["baseline_status"] == "pass" and comparison["current_status"] != "pass":
635
+ regression_flags.append("status_regression")
636
+ if int(comparison["score_delta"]) < 0:
637
+ regression_flags.append("score_regression")
638
+ if int(comparison["issue_count_delta"]) > 0:
639
+ regression_flags.append("issue_count_regression")
640
+ if int(comparison["total_prompt_tokens_delta"]) > 0:
641
+ regression_flags.append("prompt_token_regression")
642
+ if int(comparison["total_completion_tokens_delta"]) > 0:
643
+ regression_flags.append("completion_token_regression")
644
+ if int(comparison["total_retries_delta"]) > 0:
645
+ regression_flags.append("retry_regression")
646
+ if int(comparison["turn_budget_exceeded_count_delta"]) > 0:
647
+ regression_flags.append("turn_budget_regression")
648
+ comparison["comparability_flags"] = comparability_flags
649
+ comparison["regression_flags"] = regression_flags
650
+ if comparability_flags:
651
+ comparison["status"] = "not_comparable"
652
+ else:
653
+ comparison["status"] = "regressed" if regression_flags else "improved_or_equal"
654
+ return comparison
655
+
656
+
657
+ def evaluate_curator_prompt_outputs(
658
+ *,
659
+ plan: dict[str, Any],
660
+ manifest_path: Path,
661
+ baseline_eval_path: Path | None = None,
662
+ ) -> dict[str, Any]:
663
+ by_work_id = _plan_items(plan)
664
+ expectations_by_work_id = (
665
+ plan.get("evaluation_expectations_by_work_id")
666
+ if isinstance(plan.get("evaluation_expectations_by_work_id"), dict)
667
+ else {}
668
+ )
669
+ manifest, manifest_items = _manifest_payload_and_items(manifest_path)
670
+ items: list[dict[str, Any]] = []
671
+ route_counts: dict[str, int] = {}
672
+ metrics_present = 0
673
+ redaction_issue_count = 0
674
+ total_prompt_tokens = 0
675
+ total_completion_tokens = 0
676
+ total_retries = 0
677
+ total_turns_used = 0
678
+ turn_budget_exceeded_count = 0
679
+ expectation_items = 0
680
+ failed_expectation_count = 0
681
+ expectations_active = bool(expectations_by_work_id)
682
+ manifest_work_ids = {str(item["work_id"]) for item in manifest_items}
683
+ unused_expectation_work_ids = sorted(str(work_id) for work_id in set(expectations_by_work_id) - manifest_work_ids)
684
+ aggregate_issues: list[dict[str, str]] = []
685
+
686
+ for manifest_item in manifest_items:
687
+ work_id = manifest_item["work_id"]
688
+ output_path = Path(manifest_item["output_path"])
689
+ expected = by_work_id.get(work_id)
690
+ expected_for_eval: dict[str, Any] | None = expected
691
+ if isinstance(expected, dict) and isinstance(expectations_by_work_id.get(work_id), dict):
692
+ expected_for_eval = dict(expected)
693
+ expected_for_eval["evaluation_expectations"] = expectations_by_work_id[work_id]
694
+ has_expectations = bool(
695
+ isinstance(expected_for_eval, dict) and isinstance(expected_for_eval.get("evaluation_expectations"), dict)
696
+ )
697
+ evaluation_expectations = expected_for_eval.get("evaluation_expectations") if isinstance(expected_for_eval, dict) else None
698
+ assertion_count = _golden_assertion_count(evaluation_expectations) if isinstance(evaluation_expectations, dict) else 0
699
+ if expected is None:
700
+ issues = [
701
+ _issue(
702
+ code="unknown_work_id",
703
+ severity="error",
704
+ rubric_key="output_contract",
705
+ message="manifest work_id is absent from plan",
706
+ )
707
+ ]
708
+ route_name = "unknown"
709
+ metrics_summary = {"present": False}
710
+ else:
711
+ expected_for_eval = expected_for_eval or {}
712
+ route = expected.get("difficulty_route") if isinstance(expected.get("difficulty_route"), dict) else {}
713
+ route_name = str(route.get("route") or "unknown")
714
+ try:
715
+ payload = _read_json_object(output_path, label="curator batch output")
716
+ except ValidationError as exc:
717
+ issues = [
718
+ _issue(
719
+ code="invalid_output_json",
720
+ severity="error",
721
+ rubric_key="output_contract",
722
+ message=str(exc),
723
+ )
724
+ ]
725
+ metrics_summary = {"present": False}
726
+ else:
727
+ issues, metrics_summary = _evaluate_payload(expected=expected_for_eval, payload=payload)
728
+ if expectations_active and not has_expectations:
729
+ issues.append(
730
+ _issue(
731
+ code="missing_golden_expectations",
732
+ severity="error",
733
+ rubric_key="golden_expectations",
734
+ message="golden expectations missing for work_id",
735
+ )
736
+ )
737
+ route_counts[route_name] = route_counts.get(route_name, 0) + 1
738
+ expectation_issue_count = sum(1 for issue in issues if issue.get("rubric_key") == "golden_expectations")
739
+ if has_expectations:
740
+ expectation_items += 1
741
+ if expectations_active:
742
+ failed_expectation_count += expectation_issue_count
743
+ if metrics_summary.get("present"):
744
+ metrics_present += 1
745
+ total_prompt_tokens += int(metrics_summary.get("prompt_tokens") or 0)
746
+ total_completion_tokens += int(metrics_summary.get("completion_tokens") or 0)
747
+ total_retries += int(metrics_summary.get("retries") or 0)
748
+ total_turns_used += int(metrics_summary.get("turns_used") or 0)
749
+ redaction_issue_count += sum(1 for issue in issues if issue.get("rubric_key") == "evidence_redaction")
750
+ turn_budget_exceeded_count += sum(1 for issue in issues if issue.get("code") == "turn_budget_exceeded")
751
+ item_score = _score(issues)
752
+ items.append(
753
+ {
754
+ "work_id": work_id,
755
+ "output_path": str(output_path),
756
+ "route": route_name,
757
+ "status": "pass" if not issues else "needs_review",
758
+ "score": item_score,
759
+ "issues": issues,
760
+ "agent_metrics": metrics_summary,
761
+ "evaluation_expectations": {
762
+ "present": has_expectations,
763
+ "failed_count": expectation_issue_count,
764
+ "assertion_count": assertion_count,
765
+ },
766
+ }
767
+ )
768
+
769
+ total = len(items)
770
+ aggregate_score = round(sum(int(item["score"]) for item in items) / total) if total else 100
771
+ avg_turns_used = round(total_turns_used / metrics_present, 2) if metrics_present else 0.0
772
+ metric_coverage_status = "complete" if metrics_present == total else "incomplete"
773
+ quality_flags = []
774
+ if metrics_present < total:
775
+ quality_flags.append("metric_coverage_incomplete")
776
+ if unused_expectation_work_ids:
777
+ failed_expectation_count += len(unused_expectation_work_ids)
778
+ aggregate_issues.append(
779
+ _issue(
780
+ code="unused_golden_expectations",
781
+ severity="error",
782
+ rubric_key="golden_expectations",
783
+ message="golden expectations include work_id values absent from the evaluated manifest",
784
+ )
785
+ )
786
+ if expectation_items == total and not unused_expectation_work_ids:
787
+ expectation_coverage_status = "complete"
788
+ elif unused_expectation_work_ids and expectation_items == total:
789
+ expectation_coverage_status = "stale"
790
+ else:
791
+ expectation_coverage_status = "incomplete"
792
+ if failed_expectation_count or (expectations_active and expectation_coverage_status != "complete"):
793
+ quality_flags.append("golden_expectation_failed")
794
+ if unused_expectation_work_ids:
795
+ quality_flags.append("unused_golden_expectations")
796
+ issue_count = sum(len(item["issues"]) for item in items) + len(aggregate_issues)
797
+ report = {
798
+ "schema": CURATOR_PROMPT_EVAL_SCHEMA,
799
+ "phase": "vocabulary_curation",
800
+ "prompt_identity": plan.get("prompt_identity") if isinstance(plan.get("prompt_identity"), dict) else {},
801
+ "input_fingerprints": {
802
+ "plan_hash": curator_plan_hash(plan),
803
+ "manifest_hash": _canonical_payload_hash(manifest),
804
+ "prompt_identity_hash": str(
805
+ (plan.get("prompt_identity") if isinstance(plan.get("prompt_identity"), dict) else {}).get(
806
+ "aggregate_hash"
807
+ )
808
+ or ""
809
+ ),
810
+ "evaluation_expectations_present": bool(expectations_by_work_id),
811
+ "evaluation_expectations_hash": _canonical_payload_hash(expectations_by_work_id),
812
+ },
813
+ "status": "pass" if issue_count == 0 else "needs_review",
814
+ "aggregate": {
815
+ "score": aggregate_score,
816
+ "item_count": total,
817
+ "issue_count": issue_count,
818
+ "redaction_issue_count": redaction_issue_count,
819
+ "quality_flags": quality_flags,
820
+ "route_counts": route_counts,
821
+ "metric_coverage": {
822
+ "items_with_agent_metrics": metrics_present,
823
+ "items_total": total,
824
+ "status": metric_coverage_status,
825
+ },
826
+ "efficiency": {
827
+ "total_prompt_tokens": total_prompt_tokens,
828
+ "total_completion_tokens": total_completion_tokens,
829
+ "total_retries": total_retries,
830
+ "avg_turns_used": avg_turns_used,
831
+ "turn_budget_exceeded_count": turn_budget_exceeded_count,
832
+ },
833
+ },
834
+ "items": items,
835
+ "aggregate_issues": aggregate_issues,
836
+ "next_action": "" if issue_count == 0 else "revisar outputs e prompt/rubrica antes de apply-curator-batch",
837
+ }
838
+ if expectations_active:
839
+ report["aggregate"]["expectation_coverage"] = {
840
+ "items_with_expectations": expectation_items,
841
+ "items_total": total,
842
+ "failed_expectation_count": failed_expectation_count,
843
+ "unused_expectation_count": len(unused_expectation_work_ids),
844
+ "status": expectation_coverage_status,
845
+ }
846
+ if unused_expectation_work_ids:
847
+ report["aggregate"]["unused_expectation_work_ids"] = unused_expectation_work_ids
848
+ if baseline_eval_path is not None:
849
+ comparison = _compare_to_baseline(current=report, baseline_path=baseline_eval_path)
850
+ report["comparison"] = comparison
851
+ if comparison.get("status") == "regressed":
852
+ aggregate = report["aggregate"]
853
+ quality_flags = aggregate["quality_flags"]
854
+ if "baseline_regression" not in quality_flags:
855
+ quality_flags.append("baseline_regression")
856
+ report["status"] = "needs_review"
857
+ report["next_action"] = "revisar regressao contra baseline antes de apply-curator-batch"
858
+ elif comparison.get("status") == "not_comparable":
859
+ aggregate = report["aggregate"]
860
+ quality_flags = aggregate["quality_flags"]
861
+ if "baseline_not_comparable" not in quality_flags:
862
+ quality_flags.append("baseline_not_comparable")
863
+ report["status"] = "needs_review"
864
+ report["next_action"] = "revisar baseline/corpus de ouro antes de comparar engenharia de prompt"
865
+ return report