mednotes-opencode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (430) hide show
  1. package/.opencode/agents/med-chat-triager.md +204 -0
  2. package/.opencode/agents/med-flashcard-maker.md +63 -0
  3. package/.opencode/agents/med-knowledge-architect.md +230 -0
  4. package/.opencode/agents/med-link-graph-curator.md +177 -0
  5. package/.opencode/agents/med-publish-guard.md +62 -0
  6. package/.opencode/commands/flashcards.md +25 -0
  7. package/.opencode/commands/mednotes/create.md +25 -0
  8. package/.opencode/commands/mednotes/enrich.md +27 -0
  9. package/.opencode/commands/mednotes/fix-wiki.md +27 -0
  10. package/.opencode/commands/mednotes/history.md +22 -0
  11. package/.opencode/commands/mednotes/link-body.md +25 -0
  12. package/.opencode/commands/mednotes/link-related.md +27 -0
  13. package/.opencode/commands/mednotes/link.md +27 -0
  14. package/.opencode/commands/mednotes/pdf-library.md +27 -0
  15. package/.opencode/commands/mednotes/process-chats.md +23 -0
  16. package/.opencode/commands/mednotes/setup.md +21 -0
  17. package/.opencode/commands/mednotes/status.md +27 -0
  18. package/.opencode/commands/mednotes/telemetry.md +27 -0
  19. package/.opencode/commands/report.md +26 -0
  20. package/.opencode/mednotes/AGENTS.md +57 -0
  21. package/.opencode/mednotes/agents/med-chat-triager.md +197 -0
  22. package/.opencode/mednotes/agents/med-flashcard-maker.md +56 -0
  23. package/.opencode/mednotes/agents/med-knowledge-architect.md +224 -0
  24. package/.opencode/mednotes/agents/med-link-graph-curator.md +171 -0
  25. package/.opencode/mednotes/agents/med-publish-guard.md +55 -0
  26. package/.opencode/mednotes/contracts/.gitkeep +1 -0
  27. package/.opencode/mednotes/contracts/agents.json +116 -0
  28. package/.opencode/mednotes/contracts/opencode-plugin.json +70 -0
  29. package/.opencode/mednotes/docs/agent-prompt-hardening.md +567 -0
  30. package/.opencode/mednotes/docs/agent-role-contracts.md +94 -0
  31. package/.opencode/mednotes/docs/anki-mcp-twenty-rules.md +214 -0
  32. package/.opencode/mednotes/docs/anki-templates/README.md +39 -0
  33. package/.opencode/mednotes/docs/anki-templates/cloze.back.html +23 -0
  34. package/.opencode/mednotes/docs/anki-templates/cloze.front.html +14 -0
  35. package/.opencode/mednotes/docs/anki-templates/qa.back.html +24 -0
  36. package/.opencode/mednotes/docs/anki-templates/qa.front.html +14 -0
  37. package/.opencode/mednotes/docs/anki-templates/style.css +182 -0
  38. package/.opencode/mednotes/docs/atomicity-splitting-policy.md +113 -0
  39. package/.opencode/mednotes/docs/extension-docs.md +40 -0
  40. package/.opencode/mednotes/docs/flashcard-ingestion.md +278 -0
  41. package/.opencode/mednotes/docs/knowledge-architect.md +208 -0
  42. package/.opencode/mednotes/docs/merge-policy.md +110 -0
  43. package/.opencode/mednotes/docs/public-vocabulary.md +104 -0
  44. package/.opencode/mednotes/docs/semantic-linker.md +141 -0
  45. package/.opencode/mednotes/docs/taxonomy-policy.md +90 -0
  46. package/.opencode/mednotes/docs/triage-policy.md +187 -0
  47. package/.opencode/mednotes/docs/vault-version-control.md +758 -0
  48. package/.opencode/mednotes/docs/vocabulary-db-recovery.md +58 -0
  49. package/.opencode/mednotes/docs/workflow-output-contract.md +779 -0
  50. package/.opencode/mednotes/hooks/hooks.json +79 -0
  51. package/.opencode/mednotes/package-lock.json +6361 -0
  52. package/.opencode/mednotes/package.json +15 -0
  53. package/.opencode/mednotes/pyproject.toml +48 -0
  54. package/.opencode/mednotes/scripts/bootstrap_windows_python_uv.cmd +13 -0
  55. package/.opencode/mednotes/scripts/bootstrap_windows_python_uv.ps1 +172 -0
  56. package/.opencode/mednotes/scripts/enrich_notes.py +23 -0
  57. package/.opencode/mednotes/scripts/full_reset_windows_python_uv.cmd +13 -0
  58. package/.opencode/mednotes/scripts/hooks/antigravity_hook_status.mjs +212 -0
  59. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/antigravity.mjs +169 -0
  60. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/harness_payload.mjs +103 -0
  61. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/opencode_plugin.mjs +341 -0
  62. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/opencode_user_config_sync.mjs +177 -0
  63. package/.opencode/mednotes/scripts/hooks/mednotes_hook/anki_preflight.mjs +214 -0
  64. package/.opencode/mednotes/scripts/hooks/mednotes_hook/cli.mjs +143 -0
  65. package/.opencode/mednotes/scripts/hooks/mednotes_hook/diagnostics.mjs +11 -0
  66. package/.opencode/mednotes/scripts/hooks/mednotes_hook/domain/agent_directive_core.mjs +160 -0
  67. package/.opencode/mednotes/scripts/hooks/mednotes_hook/fsm_directive.mjs +1470 -0
  68. package/.opencode/mednotes/scripts/hooks/mednotes_hook/hook_errors.mjs +120 -0
  69. package/.opencode/mednotes/scripts/hooks/mednotes_hook/retention.mjs +114 -0
  70. package/.opencode/mednotes/scripts/hooks/mednotes_hook/runtime.mjs +174 -0
  71. package/.opencode/mednotes/scripts/hooks/mednotes_hook/telemetry_capture.mjs +511 -0
  72. package/.opencode/mednotes/scripts/hooks/mednotes_hook/vault_guard.mjs +624 -0
  73. package/.opencode/mednotes/scripts/hooks/mednotes_hook.mjs +5 -0
  74. package/.opencode/mednotes/scripts/mednotes/_runtime_paths.py +24 -0
  75. package/.opencode/mednotes/scripts/mednotes/anki_model_validator.py +18 -0
  76. package/.opencode/mednotes/scripts/mednotes/capture_extension_diff.py +1562 -0
  77. package/.opencode/mednotes/scripts/mednotes/feedback_report.py +16 -0
  78. package/.opencode/mednotes/scripts/mednotes/flashcard_index.py +18 -0
  79. package/.opencode/mednotes/scripts/mednotes/flashcard_pipeline.py +18 -0
  80. package/.opencode/mednotes/scripts/mednotes/flashcard_report.py +18 -0
  81. package/.opencode/mednotes/scripts/mednotes/flashcard_sources.py +18 -0
  82. package/.opencode/mednotes/scripts/mednotes/obsidian/README.md +6 -0
  83. package/.opencode/mednotes/scripts/mednotes/obsidian_note_utils.py +20 -0
  84. package/.opencode/mednotes/scripts/mednotes/pdf_library/cli.py +16 -0
  85. package/.opencode/mednotes/scripts/mednotes/project_fsm.py +229 -0
  86. package/.opencode/mednotes/scripts/mednotes/setup_telemetry_email.py +404 -0
  87. package/.opencode/mednotes/scripts/mednotes/sync_anki_twenty_rules.py +18 -0
  88. package/.opencode/mednotes/scripts/mednotes/sync_opencode_user_config.py +36 -0
  89. package/.opencode/mednotes/scripts/mednotes/wiki/cli.py +20 -0
  90. package/.opencode/mednotes/scripts/mednotes/wiki_graph.py +18 -0
  91. package/.opencode/mednotes/scripts/mednotes/wiki_tree.py +134 -0
  92. package/.opencode/mednotes/scripts/reset_windows_python_uv.ps1 +625 -0
  93. package/.opencode/mednotes/scripts/run_python.mjs +109 -0
  94. package/.opencode/mednotes/scripts/vault/vault_commit.ps1 +19 -0
  95. package/.opencode/mednotes/scripts/vault/vault_commit.sh +18 -0
  96. package/.opencode/mednotes/scripts/vault/vault_git.ps1 +19 -0
  97. package/.opencode/mednotes/scripts/vault/vault_git.py +3107 -0
  98. package/.opencode/mednotes/scripts/vault/vault_git.sh +18 -0
  99. package/.opencode/mednotes/scripts/vault/vault_precommit.ps1 +19 -0
  100. package/.opencode/mednotes/scripts/vault/vault_precommit.sh +18 -0
  101. package/.opencode/mednotes/skills/THIRD_PARTY_NOTICES.md +45 -0
  102. package/.opencode/mednotes/skills/create-medical-flashcards/SKILL.md +113 -0
  103. package/.opencode/mednotes/skills/create-medical-note/SKILL.md +90 -0
  104. package/.opencode/mednotes/skills/enrich-medical-note/SKILL.md +120 -0
  105. package/.opencode/mednotes/skills/fix-medical-wiki/SKILL.md +559 -0
  106. package/.opencode/mednotes/skills/link-medical-wiki/SKILL.md +224 -0
  107. package/.opencode/mednotes/skills/obsidian-cli/SKILL.md +118 -0
  108. package/.opencode/mednotes/skills/obsidian-markdown/SKILL.md +207 -0
  109. package/.opencode/mednotes/skills/obsidian-markdown/references/CALLOUTS.md +58 -0
  110. package/.opencode/mednotes/skills/obsidian-markdown/references/EMBEDS.md +63 -0
  111. package/.opencode/mednotes/skills/obsidian-markdown/references/PROPERTIES.md +61 -0
  112. package/.opencode/mednotes/skills/obsidian-ops/SKILL.md +136 -0
  113. package/.opencode/mednotes/skills/pdf-library/SKILL.md +45 -0
  114. package/.opencode/mednotes/skills/process-medical-chats/SKILL.md +246 -0
  115. package/.opencode/mednotes/skills/workflow-report/SKILL.md +100 -0
  116. package/.opencode/mednotes/src/mednotes/__init__.py +5 -0
  117. package/.opencode/mednotes/src/mednotes/domains/__init__.py +5 -0
  118. package/.opencode/mednotes/src/mednotes/domains/flashcards/README.md +26 -0
  119. package/.opencode/mednotes/src/mednotes/domains/flashcards/__init__.py +2 -0
  120. package/.opencode/mednotes/src/mednotes/domains/flashcards/build_demo_apkg.py +177 -0
  121. package/.opencode/mednotes/src/mednotes/domains/flashcards/contracts.py +385 -0
  122. package/.opencode/mednotes/src/mednotes/domains/flashcards/flashcards_machine.py +522 -0
  123. package/.opencode/mednotes/src/mednotes/domains/flashcards/fsm.py +817 -0
  124. package/.opencode/mednotes/src/mednotes/domains/flashcards/index.py +630 -0
  125. package/.opencode/mednotes/src/mednotes/domains/flashcards/install_models.py +445 -0
  126. package/.opencode/mednotes/src/mednotes/domains/flashcards/model.py +359 -0
  127. package/.opencode/mednotes/src/mednotes/domains/flashcards/obsidian_links.py +135 -0
  128. package/.opencode/mednotes/src/mednotes/domains/flashcards/obsidian_note_utils.py +546 -0
  129. package/.opencode/mednotes/src/mednotes/domains/flashcards/pipeline.py +580 -0
  130. package/.opencode/mednotes/src/mednotes/domains/flashcards/report.py +510 -0
  131. package/.opencode/mednotes/src/mednotes/domains/flashcards/sources.py +682 -0
  132. package/.opencode/mednotes/src/mednotes/domains/flashcards/sync_rules.py +184 -0
  133. package/.opencode/mednotes/src/mednotes/domains/history/__init__.py +1 -0
  134. package/.opencode/mednotes/src/mednotes/domains/history/history_fsm.py +852 -0
  135. package/.opencode/mednotes/src/mednotes/domains/history/history_machine.py +453 -0
  136. package/.opencode/mednotes/src/mednotes/domains/setup/__init__.py +7 -0
  137. package/.opencode/mednotes/src/mednotes/domains/setup/setup_fsm.py +808 -0
  138. package/.opencode/mednotes/src/mednotes/domains/setup/setup_machine.py +973 -0
  139. package/.opencode/mednotes/src/mednotes/domains/wiki/README.md +64 -0
  140. package/.opencode/mednotes/src/mednotes/domains/wiki/__init__.py +1 -0
  141. package/.opencode/mednotes/src/mednotes/domains/wiki/api.py +668 -0
  142. package/.opencode/mednotes/src/mednotes/domains/wiki/batch_state.py +102 -0
  143. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/__init__.py +1 -0
  144. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/atomicity/__init__.py +1 -0
  145. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/atomicity/atomicity.py +877 -0
  146. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/body_link/__init__.py +1 -0
  147. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/body_link/body_linker.py +1562 -0
  148. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/__init__.py +1 -0
  149. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/effect_adapters.py +949 -0
  150. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/fix_wiki_runtime_adapters.py +433 -0
  151. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/__init__.py +1 -0
  152. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/coverage.py +413 -0
  153. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/graph.py +396 -0
  154. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/graph_fixes.py +161 -0
  155. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/hygiene/__init__.py +1 -0
  156. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/hygiene/hygiene.py +483 -0
  157. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/__init__.py +2 -0
  158. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/anchors.py +185 -0
  159. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/__init__.py +0 -0
  160. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/cache.py +223 -0
  161. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/config.py +131 -0
  162. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/download.py +224 -0
  163. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/frontmatter.py +59 -0
  164. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/insert.py +227 -0
  165. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/local_import.py +54 -0
  166. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/__init__.py +42 -0
  167. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/web_profiles.py +99 -0
  168. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/web_search.py +203 -0
  169. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/wikimedia.py +102 -0
  170. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/__init__.py +1 -0
  171. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_db_adapter.mjs +434 -0
  172. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_node_runtime.py +274 -0
  173. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_query.py +227 -0
  174. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/__init__.py +1 -0
  175. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/artifacts.py +605 -0
  176. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/canonical_merge.py +277 -0
  177. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/markdown_zones.py +85 -0
  178. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/meaning_planner.py +307 -0
  179. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_iter.py +67 -0
  180. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_merge.py +278 -0
  181. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_plan.py +409 -0
  182. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_policy.py +22 -0
  183. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/__init__.py +79 -0
  184. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/fixes.py +264 -0
  185. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/frontmatter.py +435 -0
  186. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/models.py +208 -0
  187. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/prompts.py +37 -0
  188. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/tables.py +236 -0
  189. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/validate.py +404 -0
  190. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/provenance.py +478 -0
  191. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/raw_chats.py +273 -0
  192. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/sources_backfill.py +235 -0
  193. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/__init__.py +10 -0
  194. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/anchors.py +16 -0
  195. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/captions.py +47 -0
  196. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/cli.py +179 -0
  197. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/cloud.py +52 -0
  198. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/config.py +196 -0
  199. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/context_packets.py +76 -0
  200. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/db.py +81 -0
  201. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/doctor.py +102 -0
  202. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/figure_ids.py +42 -0
  203. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/ingest.py +326 -0
  204. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/insert.py +316 -0
  205. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/mentions.py +57 -0
  206. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/ocr.py +71 -0
  207. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/paths.py +35 -0
  208. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/pdf_engine.py +77 -0
  209. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/schema.py +155 -0
  210. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/search.py +188 -0
  211. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/__init__.py +1 -0
  212. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/app.py +89 -0
  213. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/image_backend.py +29 -0
  214. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/state.py +65 -0
  215. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/__init__.py +1 -0
  216. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish.py +1139 -0
  217. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish_receipts.py +365 -0
  218. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish_recovery.py +240 -0
  219. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/__init__.py +1 -0
  220. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_behavior_corpus.py +2069 -0
  221. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_report_validation.py +4448 -0
  222. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_run_audit.py +852 -0
  223. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/architect_prompt_eval.py +341 -0
  224. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/body_linker_eval.py +240 -0
  225. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/curator_output_validation.py +175 -0
  226. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/curator_prompt_eval.py +865 -0
  227. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/triager_prompt_eval.py +1295 -0
  228. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/__init__.py +1 -0
  229. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/related_notes.py +1920 -0
  230. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/related_notes_headless.py +1186 -0
  231. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/__init__.py +1 -0
  232. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/plan_attestation.py +148 -0
  233. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_receipts.py +360 -0
  234. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_runtime.py +52 -0
  235. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_task_runner.py +2470 -0
  236. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/style/__init__.py +1 -0
  237. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/style/style.py +1952 -0
  238. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/subagents/__init__.py +1 -0
  239. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/subagents/agents.py +1767 -0
  240. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/__init__.py +1 -0
  241. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/alias_projection.py +331 -0
  242. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/link_terms.py +151 -0
  243. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/llm_disambiguation.py +182 -0
  244. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/__init__.py +116 -0
  245. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/audit.py +201 -0
  246. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/migration.py +314 -0
  247. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/normalize.py +72 -0
  248. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/policy.py +135 -0
  249. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/resolve.py +413 -0
  250. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/schema.py +157 -0
  251. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/status.py +137 -0
  252. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_bootstrap.py +509 -0
  253. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_curator_batch.py +1115 -0
  254. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_ingestion.py +632 -0
  255. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_map.py +930 -0
  256. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_recovery.py +1388 -0
  257. package/.opencode/mednotes/src/mednotes/domains/wiki/cli.py +6665 -0
  258. package/.opencode/mednotes/src/mednotes/domains/wiki/common.py +69 -0
  259. package/.opencode/mednotes/src/mednotes/domains/wiki/config.py +210 -0
  260. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/__init__.py +74 -0
  261. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agent_report.py +242 -0
  262. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agent_run_audit.py +196 -0
  263. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agents.py +601 -0
  264. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/curator.py +256 -0
  265. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/effect_payloads.py +519 -0
  266. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/happy_path.py +190 -0
  267. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/link_git.py +110 -0
  268. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/link_runtime_artifact.py +52 -0
  269. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/note_plan.py +75 -0
  270. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/paths.py +114 -0
  271. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/public_report.py +53 -0
  272. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/publish.py +111 -0
  273. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/raw_coverage.py +217 -0
  274. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes.py +136 -0
  275. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes_headless.py +153 -0
  276. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes_runtime.py +395 -0
  277. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/schema_registry.py +637 -0
  278. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/specialist.py +432 -0
  279. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/status.py +62 -0
  280. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/style_rewrite.py +568 -0
  281. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/vocabulary_ingestion.py +223 -0
  282. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_blockers.py +510 -0
  283. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_guardrails.py +637 -0
  284. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_outcomes.py +121 -0
  285. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_receipts.py +100 -0
  286. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/__init__.py +1 -0
  287. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/__init__.py +1 -0
  288. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/__main__.py +4 -0
  289. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/cli.py +275 -0
  290. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/__init__.py +2 -0
  291. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/candidates.py +193 -0
  292. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/cli.py +189 -0
  293. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/gemini.py +220 -0
  294. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/inputs.py +120 -0
  295. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/models.py +34 -0
  296. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/parsing.py +48 -0
  297. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/prompts.py +216 -0
  298. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/quality.py +54 -0
  299. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/reporting.py +24 -0
  300. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/runner.py +433 -0
  301. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/utils.py +39 -0
  302. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/vault_guard_bridge.py +17 -0
  303. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/__init__.py +1 -0
  304. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_context_packets.py +454 -0
  305. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_decision_projection.py +133 -0
  306. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_effects.py +1260 -0
  307. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_fsm.py +2768 -0
  308. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_machine.py +1588 -0
  309. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_plan.py +306 -0
  310. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_primary_objective.py +316 -0
  311. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_problem.py +153 -0
  312. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_receipt_evidence.py +306 -0
  313. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_states.py +290 -0
  314. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_user_report.py +342 -0
  315. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/health.py +6332 -0
  316. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/__init__.py +1 -0
  317. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_fsm.py +1119 -0
  318. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_git.py +638 -0
  319. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_machine.py +1106 -0
  320. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_retry_governance.py +374 -0
  321. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_runtime_result.py +485 -0
  322. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_triggers.py +183 -0
  323. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/linking.py +2758 -0
  324. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/reference_repair.py +718 -0
  325. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/related_notes_fsm.py +1855 -0
  326. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link_related/__init__.py +1 -0
  327. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link_related/link_related_machine.py +834 -0
  328. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/__init__.py +1 -0
  329. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_fsm.py +1592 -0
  330. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_machine.py +3097 -0
  331. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_primary_objective.py +28 -0
  332. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_runtime_result.py +185 -0
  333. package/.opencode/mednotes/src/mednotes/domains/wiki/performance.py +97 -0
  334. package/.opencode/mednotes/src/mednotes/kernel/__init__.py +6 -0
  335. package/.opencode/mednotes/src/mednotes/kernel/agent_directive.py +336 -0
  336. package/.opencode/mednotes/src/mednotes/kernel/base.py +51 -0
  337. package/.opencode/mednotes/src/mednotes/kernel/blockers.py +39 -0
  338. package/.opencode/mednotes/src/mednotes/kernel/effect_executor.py +55 -0
  339. package/.opencode/mednotes/src/mednotes/kernel/effect_intent.py +69 -0
  340. package/.opencode/mednotes/src/mednotes/kernel/effects.py +160 -0
  341. package/.opencode/mednotes/src/mednotes/kernel/errors.py +38 -0
  342. package/.opencode/mednotes/src/mednotes/kernel/fsm_event.py +35 -0
  343. package/.opencode/mednotes/src/mednotes/kernel/fsm_model.py +55 -0
  344. package/.opencode/mednotes/src/mednotes/kernel/fsm_transition_result.py +75 -0
  345. package/.opencode/mednotes/src/mednotes/kernel/guardrails.py +188 -0
  346. package/.opencode/mednotes/src/mednotes/kernel/progress.py +319 -0
  347. package/.opencode/mednotes/src/mednotes/kernel/public_report.py +346 -0
  348. package/.opencode/mednotes/src/mednotes/kernel/state_machine.py +164 -0
  349. package/.opencode/mednotes/src/mednotes/kernel/workflow.py +619 -0
  350. package/.opencode/mednotes/src/mednotes/platform/__init__.py +5 -0
  351. package/.opencode/mednotes/src/mednotes/platform/backup_policy.py +382 -0
  352. package/.opencode/mednotes/src/mednotes/platform/feedback/__init__.py +62 -0
  353. package/.opencode/mednotes/src/mednotes/platform/feedback/cli.py +275 -0
  354. package/.opencode/mednotes/src/mednotes/platform/feedback/contracts.py +83 -0
  355. package/.opencode/mednotes/src/mednotes/platform/feedback/core.py +4168 -0
  356. package/.opencode/mednotes/src/mednotes/platform/feedback/integrity.py +989 -0
  357. package/.opencode/mednotes/src/mednotes/platform/feedback/operational_contract.py +2293 -0
  358. package/.opencode/mednotes/src/mednotes/platform/feedback/telemetry.py +875 -0
  359. package/.opencode/mednotes/src/mednotes/platform/feedback/telemetry_config.py +65 -0
  360. package/.opencode/mednotes/src/mednotes/platform/opencode_runtime_config.py +182 -0
  361. package/.opencode/mednotes/src/mednotes/platform/paths/__init__.py +1560 -0
  362. package/.opencode/mednotes/src/mednotes/platform/secrets.py +89 -0
  363. package/.opencode/mednotes/src/mednotes/platform/user_config.py +103 -0
  364. package/.opencode/mednotes/src/mednotes/platform/vault_guard.py +214 -0
  365. package/.opencode/mednotes/uv.lock +932 -0
  366. package/.opencode/mednotes.generated.json +395 -0
  367. package/.opencode/opencode.json +31 -0
  368. package/.opencode/plugins/mednotes-fsm.mjs +7 -0
  369. package/.opencode/plugins/mednotes_hook/adapters/antigravity.mjs +169 -0
  370. package/.opencode/plugins/mednotes_hook/adapters/harness_payload.mjs +103 -0
  371. package/.opencode/plugins/mednotes_hook/adapters/opencode_plugin.mjs +341 -0
  372. package/.opencode/plugins/mednotes_hook/adapters/opencode_user_config_sync.mjs +177 -0
  373. package/.opencode/plugins/mednotes_hook/anki_preflight.mjs +214 -0
  374. package/.opencode/plugins/mednotes_hook/cli.mjs +143 -0
  375. package/.opencode/plugins/mednotes_hook/diagnostics.mjs +11 -0
  376. package/.opencode/plugins/mednotes_hook/domain/agent_directive_core.mjs +160 -0
  377. package/.opencode/plugins/mednotes_hook/fsm_directive.mjs +1470 -0
  378. package/.opencode/plugins/mednotes_hook/hook_errors.mjs +120 -0
  379. package/.opencode/plugins/mednotes_hook/retention.mjs +114 -0
  380. package/.opencode/plugins/mednotes_hook/runtime.mjs +174 -0
  381. package/.opencode/plugins/mednotes_hook/telemetry_capture.mjs +511 -0
  382. package/.opencode/plugins/mednotes_hook/vault_guard.mjs +624 -0
  383. package/AGENTS.md +57 -0
  384. package/README.md +194 -0
  385. package/adapters/antigravity/agents.json +80 -0
  386. package/adapters/antigravity/templates/med-chat-triager.md +214 -0
  387. package/adapters/antigravity/templates/med-flashcard-maker.md +72 -0
  388. package/adapters/antigravity/templates/med-knowledge-architect.md +241 -0
  389. package/adapters/antigravity/templates/med-link-graph-curator.md +187 -0
  390. package/adapters/antigravity/templates/med-publish-guard.md +71 -0
  391. package/adapters/gemini-cli/gemini-extension.json +14 -0
  392. package/adapters/gemini-cli/package.json +15 -0
  393. package/adapters/gemini-cli/pyproject.toml +48 -0
  394. package/bin/mednotes-opencode.mjs +155 -0
  395. package/contracts/agents.json +116 -0
  396. package/core/agents/med-chat-triager.md +197 -0
  397. package/core/agents/med-flashcard-maker.md +56 -0
  398. package/core/agents/med-knowledge-architect.md +224 -0
  399. package/core/agents/med-link-graph-curator.md +171 -0
  400. package/core/agents/med-publish-guard.md +55 -0
  401. package/core/commands/flashcards.toml +22 -0
  402. package/core/commands/mednotes/create.toml +22 -0
  403. package/core/commands/mednotes/enrich.toml +24 -0
  404. package/core/commands/mednotes/fix-wiki.toml +24 -0
  405. package/core/commands/mednotes/history.toml +19 -0
  406. package/core/commands/mednotes/link-body.toml +22 -0
  407. package/core/commands/mednotes/link-related.toml +24 -0
  408. package/core/commands/mednotes/link.toml +24 -0
  409. package/core/commands/mednotes/pdf-library.toml +24 -0
  410. package/core/commands/mednotes/process-chats.toml +20 -0
  411. package/core/commands/mednotes/setup.toml +18 -0
  412. package/core/commands/mednotes/status.toml +24 -0
  413. package/core/commands/mednotes/telemetry.toml +24 -0
  414. package/core/commands/report.toml +23 -0
  415. package/core/skills/THIRD_PARTY_NOTICES.md +45 -0
  416. package/core/skills/create-medical-flashcards/SKILL.md +113 -0
  417. package/core/skills/create-medical-note/SKILL.md +90 -0
  418. package/core/skills/enrich-medical-note/SKILL.md +120 -0
  419. package/core/skills/fix-medical-wiki/SKILL.md +559 -0
  420. package/core/skills/link-medical-wiki/SKILL.md +224 -0
  421. package/core/skills/obsidian-cli/SKILL.md +118 -0
  422. package/core/skills/obsidian-markdown/SKILL.md +207 -0
  423. package/core/skills/obsidian-markdown/references/CALLOUTS.md +58 -0
  424. package/core/skills/obsidian-markdown/references/EMBEDS.md +63 -0
  425. package/core/skills/obsidian-markdown/references/PROPERTIES.md +61 -0
  426. package/core/skills/obsidian-ops/SKILL.md +136 -0
  427. package/core/skills/pdf-library/SKILL.md +45 -0
  428. package/core/skills/process-medical-chats/SKILL.md +246 -0
  429. package/core/skills/workflow-report/SKILL.md +100 -0
  430. package/package.json +45 -0
@@ -0,0 +1,2069 @@
1
+ """Versioned offline behavior corpus gates for agent prompt changes."""
2
+ from __future__ import annotations
3
+
4
+ import hashlib
5
+ import json
6
+ import re
7
+ import tempfile
8
+ from datetime import UTC, datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from pydantic import ConfigDict, Field, NonNegativeInt, StrictStr
13
+ from pydantic import ValidationError as PydanticValidationError
14
+
15
+ from mednotes.domains.wiki.capabilities.quality.curator_prompt_eval import (
16
+ evaluate_curator_prompt_outputs,
17
+ load_curator_prompt_expectations,
18
+ )
19
+ from mednotes.domains.wiki.capabilities.vocabulary.vocabulary_curator_batch import (
20
+ VOCABULARY_CURATOR_BATCH_OUTPUT_MANIFEST_SCHEMA,
21
+ build_curator_prompt_identity,
22
+ curator_plan_hash,
23
+ )
24
+ from mednotes.domains.wiki.common import ValidationError
25
+ from mednotes.kernel.base import ContractModel, JsonObject, JsonObjectAdapter, JsonValue, contract_error
26
+
27
+ AGENT_BEHAVIOR_CORPUS_SCHEMA = "medical-notes-workbench.agent-behavior-corpus.v1"
28
+ AGENT_BEHAVIOR_CORPUS_REPORT_SCHEMA = "medical-notes-workbench.agent-behavior-corpus-report.v1"
29
+ AGENT_BEHAVIOR_CONTRACT_EVAL_SCHEMA = "medical-notes-workbench.agent-behavior-contract-eval.v1"
30
+ AGENT_BEHAVIOR_CASE_DRAFT_SCHEMA = "medical-notes-workbench.agent-behavior-case-draft.v1"
31
+ AGENT_BEHAVIOR_CASE_DRAFT_REPORT_SCHEMA = "medical-notes-workbench.agent-behavior-case-draft-report.v1"
32
+
33
+ DEFAULT_TELEMETRY_APP = "medical-notes-workbench"
34
+ SEVERITY_RANK = {"low": 1, "medium": 2, "high": 3, "critical": 4}
35
+ DEFAULT_SIGNAL_SEVERITY = {
36
+ "agent.retry_loop": "high",
37
+ "agent.retry_without_input_change": "high",
38
+ "agent.ignored_next_action": "high",
39
+ "agent.wrong_phase": "high",
40
+ "agent.generated_script_workaround": "high",
41
+ "agent.unsafe_generated_script_recovery_bypass": "high",
42
+ "agent.missing_error_context": "high",
43
+ "agent.script_or_prompt_drift": "high",
44
+ "agent.unexpected_mutation": "high",
45
+ "agent.command_failed": "medium",
46
+ "agent.workflow_blocked": "medium",
47
+ "agent.dry_run_without_apply": "medium",
48
+ "dry_run_without_apply": "medium",
49
+ "extension_prompt_or_script_drift": "high",
50
+ "resource.version_control_policy_bypassed": "critical",
51
+ "resource.guard_missing": "critical",
52
+ "resource.run_finish_missing": "high",
53
+ "resource.restore_point_after_mutation": "critical",
54
+ "resource.direct_mutation_attempt": "high",
55
+ }
56
+ RISK_CODES_THAT_CREATE_DRAFTS = {
57
+ "mass_markdown_mutation",
58
+ "hardcoded_user_path",
59
+ "reads_obsidian_plugin_data",
60
+ "writes_related_notes_section",
61
+ "external_api_or_embedding_call",
62
+ "no_dry_run",
63
+ "encoding_corruption",
64
+ "extension_prompt_or_script_drift",
65
+ "direct_sql_mutation",
66
+ "queue_truth_bypass",
67
+ "unsafe_mass_wikilink_rewrite",
68
+ }
69
+ COMMAND_PROMPT_SOURCES = {
70
+ "/flashcards": "commands/flashcards.toml",
71
+ "/report": "commands/report.toml",
72
+ "/mednotes:create": "commands/mednotes/create.toml",
73
+ "/mednotes:enrich": "commands/mednotes/enrich.toml",
74
+ "/mednotes:fix-wiki": "commands/mednotes/fix-wiki.toml",
75
+ "/mednotes:history": "commands/mednotes/history.toml",
76
+ "/mednotes:link": "commands/mednotes/link.toml",
77
+ "/mednotes:link-body": "commands/mednotes/link-body.toml",
78
+ "/mednotes:link-related": "commands/mednotes/link-related.toml",
79
+ "/mednotes:pdf-library": "commands/mednotes/pdf-library.toml",
80
+ "/mednotes:process-chats": "commands/mednotes/process-chats.toml",
81
+ "/mednotes:setup": "commands/mednotes/setup.toml",
82
+ "/mednotes:status": "commands/mednotes/status.toml",
83
+ "/mednotes:telemetry": "commands/mednotes/telemetry.toml",
84
+ }
85
+ WORKFLOW_SKILL_PROMPT_SOURCES = {
86
+ "flashcards": "skills/create-medical-flashcards/SKILL.md",
87
+ "create": "skills/create-medical-note/SKILL.md",
88
+ "enrich": "skills/enrich-medical-note/SKILL.md",
89
+ "fix-wiki": "skills/fix-medical-wiki/SKILL.md",
90
+ "link": "skills/link-medical-wiki/SKILL.md",
91
+ "link-body": "skills/link-medical-wiki/SKILL.md",
92
+ "link-related": "skills/link-medical-wiki/SKILL.md",
93
+ "pdf-library": "skills/pdf-library/SKILL.md",
94
+ "process-chats": "skills/process-medical-chats/SKILL.md",
95
+ "report": "skills/workflow-report/SKILL.md",
96
+ "setup": "skills/obsidian-ops/SKILL.md",
97
+ "status": "skills/obsidian-ops/SKILL.md",
98
+ "telemetry": "skills/obsidian-ops/SKILL.md",
99
+ }
100
+
101
+
102
+ class _AgentBehaviorCorpusFields(ContractModel):
103
+ schema_id: StrictStr = Field(alias="schema", serialization_alias="schema")
104
+ suite_id: StrictStr = ""
105
+ agent: StrictStr = ""
106
+ surface_type: StrictStr = ""
107
+ evaluator: StrictStr = ""
108
+ prompt_sources: list[StrictStr] = Field(default_factory=list)
109
+ prompt_identity_hash: StrictStr = ""
110
+ cases_path: StrictStr = ""
111
+ plan_path: StrictStr = ""
112
+ manifest_path: StrictStr = ""
113
+ expectations_path: StrictStr = ""
114
+ baseline_eval_path: StrictStr = ""
115
+ case_count: NonNegativeInt = 0
116
+ cases: list[JsonObject] = Field(default_factory=list)
117
+
118
+
119
+ class _AgentBehaviorAssertionFields(ContractModel):
120
+ """Typed assertion read from behavior-case fixtures before evaluation."""
121
+
122
+ model_config = ConfigDict(extra="ignore")
123
+
124
+ op: StrictStr = ""
125
+ path: StrictStr = ""
126
+ value: JsonValue = None
127
+
128
+
129
+ class _AgentBehaviorCaseFields(ContractModel):
130
+ """Fixture case boundary; raw JSON must validate before it can drive scoring."""
131
+
132
+ model_config = ConfigDict(extra="ignore")
133
+
134
+ case_id: StrictStr = ""
135
+ behavior: StrictStr = ""
136
+ output_path: StrictStr = ""
137
+ assertions: list[_AgentBehaviorAssertionFields] = Field(default_factory=list)
138
+
139
+
140
+ class _AgentBehaviorCasesPayloadFields(ContractModel):
141
+ """Root cases file consumed by the offline behavior-contract evaluator."""
142
+
143
+ model_config = ConfigDict(extra="ignore")
144
+
145
+ schema_id: StrictStr = Field(alias="schema")
146
+ cases: list[_AgentBehaviorCaseFields] = Field(default_factory=list)
147
+
148
+
149
+ class _CuratorOutputManifestItemFields(ContractModel):
150
+ """Typed lens for manifest fields that affect generated output resolution."""
151
+
152
+ model_config = ConfigDict(extra="ignore")
153
+
154
+ output_path: StrictStr = ""
155
+
156
+
157
+ class _CuratorOutputManifestFields(ContractModel):
158
+ """Curator manifest boundary; the raw manifest is preserved only as audit payload."""
159
+
160
+ model_config = ConfigDict(extra="ignore")
161
+
162
+ schema_id: StrictStr = Field(alias="schema")
163
+ items: list[JsonObject] = Field(default_factory=list)
164
+
165
+
166
+ class _TelemetryAgentEventFields(ContractModel):
167
+ """Telemetry event fields that may become behavior-corpus signals."""
168
+
169
+ model_config = ConfigDict(extra="ignore")
170
+
171
+ code: StrictStr = ""
172
+ type: StrictStr = ""
173
+ severity: StrictStr = ""
174
+ phase: StrictStr = ""
175
+ expected_phase: StrictStr = ""
176
+ next_action_expected: StrictStr = ""
177
+ recovery_command: StrictStr = ""
178
+ command_family: StrictStr = ""
179
+ path: StrictStr = ""
180
+
181
+
182
+ class _TelemetryClientLens(ContractModel):
183
+ """Typed app metadata nested inside telemetry evidence payloads."""
184
+
185
+ model_config = ConfigDict(extra="ignore")
186
+
187
+ app: StrictStr = ""
188
+ app_version: StrictStr = ""
189
+
190
+
191
+ class _TelemetryPayloadLens(ContractModel):
192
+ """External telemetry envelopes are validated before metadata drives routing."""
193
+
194
+ model_config = ConfigDict(extra="ignore")
195
+
196
+ schema_id: StrictStr = Field(default="", alias="schema", serialization_alias="schema")
197
+ app: StrictStr = ""
198
+ app_version: StrictStr = ""
199
+ client: _TelemetryClientLens | None = None
200
+ records: list[JsonObject] = Field(default_factory=list)
201
+
202
+
203
+ class _GeneratedScriptEvidenceLens(ContractModel):
204
+ """Redacted generated-script evidence promoted into prevention suggestions."""
205
+
206
+ model_config = ConfigDict(extra="ignore")
207
+
208
+ path: StrictStr = ""
209
+ risk_codes: list[StrictStr] = Field(default_factory=list)
210
+ function_or_command: StrictStr = ""
211
+
212
+
213
+ class _CommandEventEvidenceLens(ContractModel):
214
+ """Redacted command evidence promoted into prevention suggestions."""
215
+
216
+ model_config = ConfigDict(extra="ignore")
217
+
218
+ command: StrictStr = ""
219
+ command_family: StrictStr = ""
220
+ path: StrictStr = ""
221
+ status: StrictStr = ""
222
+
223
+
224
+ class _TelemetryEnvironmentIntegrityLens(ContractModel):
225
+ """Typed subset of environment integrity used only for version provenance."""
226
+
227
+ model_config = ConfigDict(extra="ignore")
228
+
229
+ app_version: StrictStr = ""
230
+
231
+
232
+ class _TelemetryEnvironmentContextLens(ContractModel):
233
+ """Typed subset of record environment context used by draft provenance."""
234
+
235
+ model_config = ConfigDict(extra="ignore")
236
+
237
+ extension_integrity: _TelemetryEnvironmentIntegrityLens | None = None
238
+
239
+
240
+ class _TelemetryRecordMetadataLens(ContractModel):
241
+ """Record fields allowed to affect draft naming, suite routing, and provenance."""
242
+
243
+ model_config = ConfigDict(extra="ignore")
244
+
245
+ workflow: StrictStr = ""
246
+ agent: StrictStr = ""
247
+ phase: StrictStr = ""
248
+ recorded_at: StrictStr = ""
249
+ app: StrictStr = ""
250
+ app_version: StrictStr = ""
251
+ client: _TelemetryClientLens | None = None
252
+ environment_context: _TelemetryEnvironmentContextLens | None = None
253
+
254
+
255
+ class _BehaviorCandidatePayloadLens(ContractModel):
256
+ """Typed edge for behavior-case candidate envelopes from telemetry or email."""
257
+
258
+ model_config = ConfigDict(extra="ignore")
259
+
260
+ behavior_case_candidates: list[JsonObject] = Field(default_factory=list)
261
+ first_pass_prevention_candidates: list[JsonObject] = Field(default_factory=list)
262
+ messages: list[JsonObject] = Field(default_factory=list)
263
+
264
+
265
+ class _BehaviorCandidateMessageLens(ContractModel):
266
+ """Typed candidate lists nested inside inbox/telemetry message records."""
267
+
268
+ model_config = ConfigDict(extra="ignore")
269
+
270
+ id: StrictStr = ""
271
+ source_kind: StrictStr = ""
272
+ behavior_case_candidates: list[JsonObject] = Field(default_factory=list)
273
+ first_pass_prevention_candidates: list[JsonObject] = Field(default_factory=list)
274
+
275
+
276
+ def _telemetry_payload_lens(payload: object) -> _TelemetryPayloadLens:
277
+ if not isinstance(payload, dict):
278
+ return _TelemetryPayloadLens()
279
+ return _TelemetryPayloadLens.model_validate(payload)
280
+
281
+
282
+ def _telemetry_record_lens(record: object) -> _TelemetryRecordMetadataLens:
283
+ if not isinstance(record, dict):
284
+ return _TelemetryRecordMetadataLens()
285
+ return _TelemetryRecordMetadataLens.model_validate(record)
286
+
287
+
288
+ def _agent_behavior_corpus_fields(corpus: JsonObject) -> _AgentBehaviorCorpusFields:
289
+ try:
290
+ return _AgentBehaviorCorpusFields.model_validate(corpus)
291
+ except PydanticValidationError as exc:
292
+ raise contract_error(exc, prefix="agent behavior corpus") from exc
293
+
294
+
295
+ def _agent_behavior_cases_payload_fields(payload: JsonObject) -> _AgentBehaviorCasesPayloadFields:
296
+ try:
297
+ return _AgentBehaviorCasesPayloadFields.model_validate(payload)
298
+ except PydanticValidationError as exc:
299
+ raise contract_error(exc, prefix="agent behavior cases") from exc
300
+
301
+
302
+ def _curator_output_manifest_fields(payload: JsonObject) -> _CuratorOutputManifestFields:
303
+ try:
304
+ return _CuratorOutputManifestFields.model_validate(payload)
305
+ except PydanticValidationError as exc:
306
+ raise contract_error(exc, prefix="agent behavior corpus manifest") from exc
307
+
308
+
309
+ def _telemetry_agent_event_fields(payload: JsonObject) -> _TelemetryAgentEventFields:
310
+ try:
311
+ return _TelemetryAgentEventFields.model_validate(payload)
312
+ except PydanticValidationError as exc:
313
+ raise contract_error(exc, prefix="agent behavior telemetry event") from exc
314
+
315
+
316
+ def _read_json_object(path: Path, *, label: str) -> JsonObject:
317
+ try:
318
+ payload = json.loads(path.read_text(encoding="utf-8"))
319
+ except FileNotFoundError as exc:
320
+ raise ValidationError(f"{label} not found: {path}") from exc
321
+ except json.JSONDecodeError as exc:
322
+ raise ValidationError(f"{label} is invalid JSON: {path}: {exc}") from exc
323
+ if not isinstance(payload, dict):
324
+ raise ValidationError(f"{label} must be a JSON object: {path}")
325
+ return JsonObjectAdapter.validate_python(payload)
326
+
327
+
328
+ def _corpus_files(path: Path) -> list[Path]:
329
+ if path.is_dir():
330
+ direct = path / "corpus.json"
331
+ if direct.is_file():
332
+ return [direct]
333
+ discovered = sorted(child for child in path.rglob("corpus.json") if child.is_file())
334
+ if discovered:
335
+ return discovered
336
+ return [direct]
337
+ return [path]
338
+
339
+
340
+ def _resolve(base: Path, value: Any) -> Path:
341
+ path = Path(str(value or ""))
342
+ return path if path.is_absolute() else base / path
343
+
344
+
345
+ def _serialized_output_path(base: Path, value: object) -> str:
346
+ """Serialize corpus output references relative to their suite directory."""
347
+
348
+ raw_path = Path(str(value or ""))
349
+ output_path = raw_path if raw_path.is_absolute() else base / raw_path
350
+ try:
351
+ return output_path.resolve().relative_to(base.resolve()).as_posix()
352
+ except ValueError as exc:
353
+ raise ValidationError(f"agent behavior output_path must stay under corpus suite root: {value}") from exc
354
+
355
+
356
+ def _serialized_evidence_source_path(source_path: Path) -> str:
357
+ """Keep private local paths out of promoted behavior-case evidence."""
358
+
359
+ return source_path.name if source_path.is_absolute() else source_path.as_posix()
360
+
361
+
362
+ def _relativize_output_paths(value: Any, *, base: Path) -> Any:
363
+ if isinstance(value, list):
364
+ return [_relativize_output_paths(item, base=base) for item in value]
365
+ if not isinstance(value, dict):
366
+ return value
367
+ normalized: dict[str, Any] = {}
368
+ for key, item in value.items():
369
+ if key == "output_path":
370
+ normalized[key] = _serialized_output_path(base, item)
371
+ else:
372
+ normalized[key] = _relativize_output_paths(item, base=base)
373
+ return normalized
374
+
375
+
376
+ def agent_behavior_baseline_paths(corpus_path: Path) -> list[Path]:
377
+ """Return baseline files declared by a corpus file or corpus bank."""
378
+
379
+ baselines: set[Path] = set()
380
+ for corpus_file in _corpus_files(corpus_path):
381
+ corpus = _read_json_object(corpus_file, label="agent behavior corpus")
382
+ corpus_fields = _agent_behavior_corpus_fields(corpus)
383
+ if corpus_fields.schema_id != AGENT_BEHAVIOR_CORPUS_SCHEMA:
384
+ raise ValidationError(f"agent behavior corpus must use schema {AGENT_BEHAVIOR_CORPUS_SCHEMA}")
385
+ baseline_value = corpus_fields.baseline_eval_path
386
+ if baseline_value:
387
+ baselines.add(_resolve(corpus_file.parent, baseline_value).expanduser().resolve())
388
+ return sorted(baselines)
389
+
390
+
391
+ def validate_agent_behavior_report_path(*, corpus_path: Path, report_path: Path) -> None:
392
+ """Prevent writing a corpus wrapper report over a promoted behavior baseline."""
393
+
394
+ candidate = report_path.expanduser().resolve()
395
+ for baseline_path in agent_behavior_baseline_paths(corpus_path):
396
+ if candidate == baseline_path:
397
+ raise ValidationError(
398
+ "agent_behavior_corpus.report_would_overwrite_baseline: "
399
+ "--report writes agent-behavior-corpus-report.v1, but this path is baseline_eval_path. "
400
+ "Write the corpus report to a separate file and promote the nested suite eval as the baseline."
401
+ )
402
+
403
+
404
+ def _with_current_prompt_identity(plan: dict[str, Any], prompt_identity: dict[str, Any]) -> dict[str, Any]:
405
+ normalized = dict(plan)
406
+ normalized["prompt_identity"] = dict(prompt_identity)
407
+ work_items: list[Any] = []
408
+ for item in normalized.get("work_items") if isinstance(normalized.get("work_items"), list) else []:
409
+ if isinstance(item, dict):
410
+ normalized_item = dict(item)
411
+ normalized_item["prompt_identity"] = dict(prompt_identity)
412
+ work_items.append(normalized_item)
413
+ else:
414
+ work_items.append(item)
415
+ normalized["work_items"] = work_items
416
+ return normalized
417
+
418
+
419
+ def _manifest_with_absolute_outputs(*, base: Path, manifest_path: Path, output_dir: Path) -> tuple[Path, dict[str, Any]]:
420
+ manifest = _read_json_object(manifest_path, label="agent behavior corpus manifest")
421
+ manifest_fields = _curator_output_manifest_fields(manifest)
422
+ if manifest_fields.schema_id != VOCABULARY_CURATOR_BATCH_OUTPUT_MANIFEST_SCHEMA:
423
+ raise ValidationError(
424
+ f"agent behavior corpus manifest must use schema {VOCABULARY_CURATOR_BATCH_OUTPUT_MANIFEST_SCHEMA}"
425
+ )
426
+ normalized = dict(manifest)
427
+ items: list[dict[str, Any]] = []
428
+ for raw in manifest_fields.items:
429
+ item_fields = _CuratorOutputManifestItemFields.model_validate(raw)
430
+ item = dict(raw)
431
+ item["output_path"] = str(_resolve(base, item_fields.output_path))
432
+ items.append(item)
433
+ normalized["items"] = items
434
+ normalized_path = output_dir / "manifest.absolute.json"
435
+ normalized_path.write_text(json.dumps(normalized, ensure_ascii=False, indent=2), encoding="utf-8")
436
+ return normalized_path, manifest
437
+
438
+
439
+ def _issue(*, code: str, message: str) -> JsonObject:
440
+ return {"code": code, "message": message}
441
+
442
+
443
+ def _canonical_payload_hash(payload: Any) -> str:
444
+ encoded = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":")).encode("utf-8")
445
+ return f"sha256:{hashlib.sha256(encoded).hexdigest()}"
446
+
447
+
448
+ def _sha256_bytes(content: bytes) -> str:
449
+ return f"sha256:{hashlib.sha256(content).hexdigest()}"
450
+
451
+
452
+ def _extension_root() -> Path:
453
+ from mednotes.platform.paths import extension_root
454
+
455
+ return extension_root()
456
+
457
+
458
+ def _source_fingerprint(relative_path: str) -> JsonObject:
459
+ path = _extension_root() / relative_path
460
+ if not path.is_file():
461
+ return {"path": relative_path, "exists": False, "sha256": "", "byte_count": 0, "word_count": 0}
462
+ content = path.read_bytes()
463
+ text = content.decode("utf-8", errors="replace")
464
+ return {
465
+ "path": relative_path,
466
+ "exists": True,
467
+ "sha256": _sha256_bytes(content),
468
+ "byte_count": len(content),
469
+ "word_count": len(text.split()),
470
+ }
471
+
472
+
473
+ def _prompt_identity_for_corpus(corpus: _AgentBehaviorCorpusFields) -> JsonObject:
474
+ if not corpus.prompt_sources:
475
+ return JsonObjectAdapter.validate_python(build_curator_prompt_identity())
476
+ normalized_sources = [_source_fingerprint(source) for source in corpus.prompt_sources if source]
477
+ aggregate_material = [
478
+ {"path": source["path"], "exists": source["exists"], "sha256": source["sha256"]}
479
+ for source in normalized_sources
480
+ ]
481
+ return JsonObjectAdapter.validate_python({
482
+ "schema": "medical-notes-workbench.agent-prompt-identity.v1",
483
+ "agent": corpus.agent,
484
+ "aggregate_hash": _canonical_payload_hash(aggregate_material),
485
+ "sources": normalized_sources,
486
+ })
487
+
488
+
489
+ def _get_path(payload: JsonValue, path: str) -> tuple[bool, JsonValue]:
490
+ current = payload
491
+ if not path:
492
+ return True, current
493
+ for part in path.split("."):
494
+ if isinstance(current, dict):
495
+ if part not in current:
496
+ return False, None
497
+ current = current[part]
498
+ elif isinstance(current, list) and part.isdigit():
499
+ index = int(part)
500
+ if index >= len(current):
501
+ return False, None
502
+ current = current[index]
503
+ else:
504
+ return False, None
505
+ return True, current
506
+
507
+
508
+ def _assertion_issue(case_id: str, assertion: _AgentBehaviorAssertionFields, message: str) -> dict[str, str]:
509
+ return {
510
+ "code": "behavior_contract_failed",
511
+ "case_id": case_id,
512
+ "assertion": assertion.op,
513
+ "path": assertion.path,
514
+ "message": message,
515
+ }
516
+
517
+
518
+ def _expected_array_length(case_id: str, assertion: _AgentBehaviorAssertionFields) -> tuple[bool, int, list[dict[str, str]]]:
519
+ """Validate array-length assertions without converting strings to numbers."""
520
+
521
+ expected = assertion.value
522
+ if isinstance(expected, int) and not isinstance(expected, bool):
523
+ return True, expected, []
524
+ return False, 0, [_assertion_issue(case_id, assertion, f"expected integer length, got {expected!r}")]
525
+
526
+
527
+ def _evaluate_assertion(*, case_id: str, payload: JsonObject, assertion: _AgentBehaviorAssertionFields) -> list[dict[str, str]]:
528
+ op = assertion.op
529
+ path = assertion.path
530
+ exists, value = _get_path(payload, path)
531
+ expected = assertion.value
532
+ if op == "path_present":
533
+ return [] if exists and value is not None else [_assertion_issue(case_id, assertion, "expected path to be present")]
534
+ if op == "path_absent":
535
+ return [] if not exists else [_assertion_issue(case_id, assertion, "expected path to be absent")]
536
+ if op == "path_equals":
537
+ return [] if exists and value == expected else [_assertion_issue(case_id, assertion, f"expected {expected!r}, got {value!r}")]
538
+ if op == "path_in":
539
+ choices = expected if isinstance(expected, list) else []
540
+ return [] if exists and value in choices else [_assertion_issue(case_id, assertion, f"expected value in {choices!r}")]
541
+ if op == "array_len_equals":
542
+ valid, expected_len, issues = _expected_array_length(case_id, assertion)
543
+ if not valid:
544
+ return issues
545
+ return [] if isinstance(value, list) and len(value) == expected_len else [
546
+ _assertion_issue(case_id, assertion, f"expected list length {expected!r}")
547
+ ]
548
+ if op == "array_len_at_least":
549
+ valid, expected_len, issues = _expected_array_length(case_id, assertion)
550
+ if not valid:
551
+ return issues
552
+ return [] if isinstance(value, list) and len(value) >= expected_len else [
553
+ _assertion_issue(case_id, assertion, f"expected list length >= {expected!r}")
554
+ ]
555
+ if op == "array_len_at_most":
556
+ valid, expected_len, issues = _expected_array_length(case_id, assertion)
557
+ if not valid:
558
+ return issues
559
+ return [] if isinstance(value, list) and len(value) <= expected_len else [
560
+ _assertion_issue(case_id, assertion, f"expected list length <= {expected!r}")
561
+ ]
562
+ if op == "json_not_contains":
563
+ if not isinstance(expected, str):
564
+ return [_assertion_issue(case_id, assertion, f"expected forbidden text string, got {expected!r}")]
565
+ text = json.dumps(payload, ensure_ascii=False)
566
+ return [] if expected not in text else [
567
+ _assertion_issue(case_id, assertion, f"forbidden text was present: {expected!r}")
568
+ ]
569
+ return [_assertion_issue(case_id, assertion, f"unknown assertion op: {op}")]
570
+
571
+
572
+ def _score(issue_count: int) -> int:
573
+ return max(0, 100 - 25 * issue_count)
574
+
575
+
576
+ class _PromptIdentityFields(ContractModel):
577
+ """Typed lens for prompt identity hashes embedded in corpus reports."""
578
+
579
+ model_config = ConfigDict(extra="ignore")
580
+
581
+ aggregate_hash: StrictStr = ""
582
+
583
+
584
+ class _BaselineMetadataFields(ContractModel):
585
+ """Typed lens for baseline promotion state."""
586
+
587
+ model_config = ConfigDict(extra="ignore")
588
+
589
+ status: StrictStr = ""
590
+
591
+
592
+ class _ContractEvalAggregateFields(ContractModel):
593
+ """Counts that decide corpus regression status."""
594
+
595
+ model_config = ConfigDict(extra="ignore")
596
+
597
+ case_count: int = Field(default=0, ge=0, strict=True)
598
+ item_count: int = Field(default=0, ge=0, strict=True)
599
+ issue_count: int = Field(default=0, ge=0, strict=True)
600
+ score: int = Field(default=0, ge=0, strict=True)
601
+
602
+
603
+ class _ContractEvalReportFields(ContractModel):
604
+ """Typed status/count lens before corpus reports can drive pass/fail."""
605
+
606
+ model_config = ConfigDict(extra="ignore")
607
+
608
+ schema_id: StrictStr = Field(default="", alias="schema")
609
+ status: StrictStr = ""
610
+ aggregate: _ContractEvalAggregateFields = Field(default_factory=_ContractEvalAggregateFields)
611
+ prompt_identity: _PromptIdentityFields = Field(default_factory=_PromptIdentityFields)
612
+ baseline_metadata: _BaselineMetadataFields = Field(default_factory=_BaselineMetadataFields)
613
+
614
+
615
+ class _BaselineComparisonFields(ContractModel):
616
+ """Typed result of comparing the current eval with its locked baseline."""
617
+
618
+ model_config = ConfigDict(extra="ignore")
619
+
620
+ status: StrictStr = ""
621
+
622
+
623
+ def _compare_contract_baseline(*, current: JsonObject, baseline_path: Path) -> JsonObject:
624
+ baseline = _read_json_object(baseline_path, label="agent behavior contract baseline")
625
+ current_fields = _ContractEvalReportFields.model_validate(current)
626
+ baseline_fields = _ContractEvalReportFields.model_validate(baseline)
627
+ if baseline_fields.schema_id != AGENT_BEHAVIOR_CONTRACT_EVAL_SCHEMA:
628
+ raise ValidationError(f"agent behavior contract baseline must use schema {AGENT_BEHAVIOR_CONTRACT_EVAL_SCHEMA}")
629
+ comparability_flags: list[str] = []
630
+ if baseline_fields.baseline_metadata.status != "active":
631
+ comparability_flags.append("baseline_not_promoted")
632
+ if current_fields.prompt_identity.aggregate_hash != baseline_fields.prompt_identity.aggregate_hash:
633
+ comparability_flags.append("prompt_identity_changed")
634
+ score_delta = current_fields.aggregate.score - baseline_fields.aggregate.score
635
+ issue_count_delta = current_fields.aggregate.issue_count - baseline_fields.aggregate.issue_count
636
+ regression_flags: list[str] = []
637
+ if baseline_fields.status == "pass" and current_fields.status != "pass":
638
+ regression_flags.append("status_regression")
639
+ if score_delta < 0:
640
+ regression_flags.append("score_regression")
641
+ if issue_count_delta > 0:
642
+ regression_flags.append("issue_count_regression")
643
+ comparison_status = "not_comparable" if comparability_flags else (
644
+ "regressed" if regression_flags else "improved_or_equal"
645
+ )
646
+ return JsonObjectAdapter.validate_python(
647
+ {
648
+ "baseline_status": baseline_fields.status,
649
+ "current_status": current_fields.status,
650
+ "score_delta": score_delta,
651
+ "issue_count_delta": issue_count_delta,
652
+ "comparability_flags": comparability_flags,
653
+ "regression_flags": regression_flags,
654
+ "status": comparison_status,
655
+ }
656
+ )
657
+
658
+
659
+ def _promote_contract_baseline(report: dict[str, Any], *, source_path: Path) -> dict[str, Any]:
660
+ baseline = dict(report)
661
+ baseline["baseline_metadata"] = {
662
+ "status": "active",
663
+ "source_eval_path": str(source_path),
664
+ "source_eval_hash": _canonical_payload_hash(report),
665
+ }
666
+ return baseline
667
+
668
+
669
+ def evaluate_json_contract_corpus(
670
+ *,
671
+ corpus: _AgentBehaviorCorpusFields,
672
+ base: Path,
673
+ prompt_identity: JsonObject,
674
+ baseline_path: Path | None = None,
675
+ ) -> dict[str, Any]:
676
+ cases_path = _resolve(base, corpus.cases_path)
677
+ cases_payload = _read_json_object(cases_path, label="agent behavior cases")
678
+ cases_fields = _agent_behavior_cases_payload_fields(cases_payload)
679
+ if cases_fields.schema_id != "medical-notes-workbench.agent-behavior-cases.v1":
680
+ raise ValidationError("agent behavior cases must use schema medical-notes-workbench.agent-behavior-cases.v1")
681
+ cases: list[JsonObject] = []
682
+ case_scores: list[int] = []
683
+ assertion_counts: list[int] = []
684
+ total_issues: list[dict[str, str]] = []
685
+ for case in cases_fields.cases:
686
+ case_id = case.case_id
687
+ output_path = _resolve(base, case.output_path)
688
+ payload = _read_json_object(output_path, label=f"agent behavior output {case_id}")
689
+ case_issues: list[dict[str, str]] = []
690
+ for assertion in case.assertions:
691
+ case_issues.extend(_evaluate_assertion(case_id=case_id, payload=payload, assertion=assertion))
692
+ total_issues.extend(case_issues)
693
+ case_score = _score(len(case_issues))
694
+ assertion_count = len(case.assertions)
695
+ case_scores.append(case_score)
696
+ assertion_counts.append(assertion_count)
697
+ cases.append(
698
+ JsonObjectAdapter.validate_python(
699
+ {
700
+ "case_id": case_id,
701
+ "behavior": case.behavior,
702
+ "output_path": _serialized_output_path(base, output_path),
703
+ "status": "pass" if not case_issues else "needs_review",
704
+ "score": case_score,
705
+ "issues": case_issues,
706
+ "assertion_count": assertion_count,
707
+ }
708
+ )
709
+ )
710
+ issue_count = len(total_issues)
711
+ report_status = "pass" if issue_count == 0 else "needs_review"
712
+ report_next_action = "" if issue_count == 0 else "review behavior contract failures before accepting prompt changes"
713
+ comparison: JsonObject | None = None
714
+ if baseline_path is not None and baseline_path.is_file():
715
+ comparison = _compare_contract_baseline(
716
+ current=JsonObjectAdapter.validate_python(
717
+ {
718
+ "schema": AGENT_BEHAVIOR_CONTRACT_EVAL_SCHEMA,
719
+ "status": report_status,
720
+ "aggregate": {
721
+ "case_count": len(cases),
722
+ "issue_count": issue_count,
723
+ "score": round(sum(case_scores) / len(case_scores)) if case_scores else 100,
724
+ },
725
+ "prompt_identity": prompt_identity,
726
+ }
727
+ ),
728
+ baseline_path=baseline_path,
729
+ )
730
+ comparison_fields = _BaselineComparisonFields.model_validate(comparison)
731
+ if comparison_fields.status != "improved_or_equal":
732
+ report_status = "needs_review"
733
+ report_next_action = "review behavior corpus baseline before accepting prompt changes"
734
+
735
+ report = {
736
+ "schema": AGENT_BEHAVIOR_CONTRACT_EVAL_SCHEMA,
737
+ "suite_id": corpus.suite_id,
738
+ "agent": corpus.agent,
739
+ "evaluator": "json_contract",
740
+ "prompt_identity": prompt_identity,
741
+ "status": report_status,
742
+ "aggregate": {
743
+ "case_count": len(cases),
744
+ "issue_count": issue_count,
745
+ "score": round(sum(case_scores) / len(case_scores)) if case_scores else 100,
746
+ "assertion_count": sum(assertion_counts),
747
+ },
748
+ "cases": cases,
749
+ "issues": total_issues,
750
+ "next_action": report_next_action,
751
+ }
752
+ if comparison is not None:
753
+ report["comparison"] = comparison
754
+ return report
755
+
756
+
757
+ def _blocked_report(
758
+ *,
759
+ corpus: _AgentBehaviorCorpusFields,
760
+ prompt_identity_hash: str,
761
+ issues: list[JsonObject],
762
+ ) -> JsonObject:
763
+ return JsonObjectAdapter.validate_python({
764
+ "schema": AGENT_BEHAVIOR_CORPUS_REPORT_SCHEMA,
765
+ "status": "needs_review",
766
+ "suite_id": corpus.suite_id,
767
+ "agent": corpus.agent,
768
+ "aggregate": {
769
+ "suite_count": 1,
770
+ "case_count": corpus.case_count,
771
+ "prompt_identity_hash": prompt_identity_hash,
772
+ "issue_codes": [issue["code"] for issue in issues],
773
+ },
774
+ "suites": [],
775
+ "issues": issues,
776
+ "next_action": "rerun the agent behavior corpus with the current prompt and promote a fresh baseline",
777
+ })
778
+
779
+
780
+ def _evaluate_single_agent_behavior_corpus(corpus_file: Path) -> dict[str, Any]:
781
+ base = corpus_file.parent
782
+ corpus = _read_json_object(corpus_file, label="agent behavior corpus")
783
+ corpus_fields = _agent_behavior_corpus_fields(corpus)
784
+ if corpus_fields.schema_id != AGENT_BEHAVIOR_CORPUS_SCHEMA:
785
+ raise ValidationError(f"agent behavior corpus must use schema {AGENT_BEHAVIOR_CORPUS_SCHEMA}")
786
+ evaluator = corpus_fields.evaluator
787
+ if evaluator not in {"curator_prompt_eval", "json_contract"}:
788
+ raise ValidationError("agent behavior corpus supports evaluator=curator_prompt_eval or json_contract")
789
+
790
+ prompt_identity = _prompt_identity_for_corpus(corpus_fields)
791
+ prompt_identity_hash = str(prompt_identity.get("aggregate_hash") or "")
792
+ locked_prompt_hash = corpus_fields.prompt_identity_hash
793
+ issues: list[JsonObject] = []
794
+ if locked_prompt_hash != prompt_identity_hash:
795
+ issues.append(
796
+ _issue(
797
+ code="stale_prompt_identity",
798
+ message="corpus prompt_identity_hash does not match the current prompt/runbook fingerprint",
799
+ )
800
+ )
801
+ return _blocked_report(corpus=corpus_fields, prompt_identity_hash=prompt_identity_hash, issues=issues)
802
+
803
+ baseline_path = _resolve(base, corpus_fields.baseline_eval_path)
804
+ if not baseline_path.is_file():
805
+ issues.append(_issue(code="missing_behavior_baseline", message=f"baseline eval not found: {baseline_path}"))
806
+ return _blocked_report(corpus=corpus_fields, prompt_identity_hash=prompt_identity_hash, issues=issues)
807
+
808
+ if evaluator == "json_contract":
809
+ eval_report = evaluate_json_contract_corpus(
810
+ corpus=corpus_fields,
811
+ base=base,
812
+ prompt_identity=prompt_identity,
813
+ baseline_path=baseline_path,
814
+ )
815
+ eval_fields = _ContractEvalReportFields.model_validate(eval_report)
816
+ suite_status = "pass" if eval_fields.status == "pass" else "needs_review"
817
+ report_issues = list(issues)
818
+ if suite_status != "pass":
819
+ report_issues.append(
820
+ _issue(
821
+ code="behavior_contract_failed",
822
+ message="agent behavior contract returned needs_review",
823
+ )
824
+ )
825
+ return {
826
+ "schema": AGENT_BEHAVIOR_CORPUS_REPORT_SCHEMA,
827
+ "status": "pass" if not report_issues else "needs_review",
828
+ "suite_id": corpus_fields.suite_id,
829
+ "agent": corpus_fields.agent,
830
+ "aggregate": {
831
+ "suite_count": 1,
832
+ "case_count": eval_fields.aggregate.case_count or corpus_fields.case_count,
833
+ "prompt_identity_hash": prompt_identity_hash,
834
+ "issue_codes": [issue["code"] for issue in report_issues],
835
+ },
836
+ "suites": [
837
+ {
838
+ "suite_id": corpus_fields.suite_id,
839
+ "agent": corpus_fields.agent,
840
+ "evaluator": evaluator,
841
+ "status": suite_status,
842
+ "prompt_identity_hash": prompt_identity_hash,
843
+ "eval": eval_report,
844
+ }
845
+ ],
846
+ "issues": report_issues,
847
+ "next_action": ""
848
+ if not report_issues
849
+ else "review agent behavior corpus failures before accepting prompt changes",
850
+ }
851
+
852
+ plan_path = _resolve(base, corpus_fields.plan_path)
853
+ manifest_path = _resolve(base, corpus_fields.manifest_path)
854
+ expectations_path = _resolve(base, corpus_fields.expectations_path)
855
+ plan = _with_current_prompt_identity(_read_json_object(plan_path, label="agent behavior corpus plan"), prompt_identity)
856
+ plan["evaluation_expectations_by_work_id"] = load_curator_prompt_expectations(
857
+ expectations_path,
858
+ expected_plan_hash=curator_plan_hash(plan),
859
+ )
860
+
861
+ with tempfile.TemporaryDirectory(prefix="agent-behavior-corpus-") as temp_dir:
862
+ normalized_manifest_path, manifest = _manifest_with_absolute_outputs(
863
+ base=base,
864
+ manifest_path=manifest_path,
865
+ output_dir=Path(temp_dir),
866
+ )
867
+ manifest_prompt_hash = str(manifest.get("prompt_identity_hash") or "")
868
+ if manifest_prompt_hash != prompt_identity_hash:
869
+ issues.append(
870
+ _issue(
871
+ code="stale_behavior_outputs",
872
+ message="manifest prompt_identity_hash does not match current prompt/runbook fingerprint",
873
+ )
874
+ )
875
+ return _blocked_report(corpus=corpus_fields, prompt_identity_hash=prompt_identity_hash, issues=issues)
876
+ baseline = _read_json_object(baseline_path, label="agent behavior corpus baseline")
877
+ baseline_prompt = baseline.get("prompt_identity") if isinstance(baseline.get("prompt_identity"), dict) else {}
878
+ if str(baseline_prompt.get("aggregate_hash") or "") != prompt_identity_hash:
879
+ issues.append(
880
+ _issue(
881
+ code="stale_behavior_baseline",
882
+ message="baseline prompt_identity does not match current prompt/runbook fingerprint",
883
+ )
884
+ )
885
+ return _blocked_report(corpus=corpus_fields, prompt_identity_hash=prompt_identity_hash, issues=issues)
886
+ eval_report = evaluate_curator_prompt_outputs(
887
+ plan=plan,
888
+ manifest_path=normalized_manifest_path,
889
+ baseline_eval_path=baseline_path,
890
+ )
891
+ eval_report = _relativize_output_paths(eval_report, base=base)
892
+
893
+ eval_fields = _ContractEvalReportFields.model_validate(eval_report)
894
+ suite_status = "pass" if eval_fields.status == "pass" else "needs_review"
895
+ case_count = eval_fields.aggregate.item_count or corpus_fields.case_count
896
+ report_issues = list(issues)
897
+ if suite_status != "pass":
898
+ report_issues.append(
899
+ _issue(
900
+ code="behavior_corpus_eval_needs_review",
901
+ message="curator behavior corpus eval returned needs_review",
902
+ )
903
+ )
904
+ return {
905
+ "schema": AGENT_BEHAVIOR_CORPUS_REPORT_SCHEMA,
906
+ "status": "pass" if not report_issues else "needs_review",
907
+ "suite_id": corpus_fields.suite_id,
908
+ "agent": corpus_fields.agent,
909
+ "aggregate": {
910
+ "suite_count": 1,
911
+ "case_count": case_count,
912
+ "prompt_identity_hash": prompt_identity_hash,
913
+ "issue_codes": [issue["code"] for issue in report_issues],
914
+ },
915
+ "suites": [
916
+ {
917
+ "suite_id": corpus_fields.suite_id,
918
+ "agent": corpus_fields.agent,
919
+ "evaluator": corpus_fields.evaluator,
920
+ "status": suite_status,
921
+ "plan_hash": curator_plan_hash(plan),
922
+ "prompt_identity_hash": prompt_identity_hash,
923
+ "eval": eval_report,
924
+ }
925
+ ],
926
+ "issues": report_issues,
927
+ "next_action": ""
928
+ if not report_issues
929
+ else "review agent behavior corpus failures before accepting prompt changes",
930
+ }
931
+
932
+
933
+ class _CorpusAggregateFields(ContractModel):
934
+ """Typed status/count lens for bank-level corpus aggregation."""
935
+
936
+ model_config = ConfigDict(extra="ignore")
937
+
938
+ status: str = ""
939
+ aggregate: JsonObject = Field(default_factory=dict)
940
+ issues: list[JsonObject] = Field(default_factory=list)
941
+ suites: list[JsonObject] = Field(default_factory=list)
942
+
943
+
944
+ def _aggregate_corpus_reports(reports: list[dict[str, Any]]) -> dict[str, Any]:
945
+ issue_codes: list[str] = []
946
+ issues: list[dict[str, str]] = []
947
+ suites: list[dict[str, Any]] = []
948
+ case_count = 0
949
+ prompt_identity_hash = ""
950
+ typed_reports = [_CorpusAggregateFields.model_validate(report) for report in reports]
951
+ for report in typed_reports:
952
+ aggregate = report.aggregate
953
+ case_count += int(aggregate.get("case_count") or 0)
954
+ if not prompt_identity_hash:
955
+ prompt_identity_hash = str(aggregate.get("prompt_identity_hash") or "")
956
+ issue_codes.extend(str(code) for code in aggregate.get("issue_codes", []) if str(code))
957
+ issues.extend(issue for issue in report.issues if isinstance(issue, dict))
958
+ suites.extend(suite for suite in report.suites if isinstance(suite, dict))
959
+ status = "pass" if all(report.status == "pass" for report in typed_reports) else "needs_review"
960
+ return {
961
+ "schema": AGENT_BEHAVIOR_CORPUS_REPORT_SCHEMA,
962
+ "status": status,
963
+ "suite_id": "agent_behavior_corpus_bank",
964
+ "agent": "multiple",
965
+ "aggregate": {
966
+ "suite_count": len(reports),
967
+ "case_count": case_count,
968
+ "prompt_identity_hash": prompt_identity_hash,
969
+ "issue_codes": issue_codes,
970
+ },
971
+ "suites": suites,
972
+ "issues": issues,
973
+ "next_action": ""
974
+ if status == "pass"
975
+ else "review agent behavior corpus failures before accepting prompt changes",
976
+ }
977
+
978
+
979
+ def evaluate_agent_behavior_corpus(corpus_path: Path) -> dict[str, Any]:
980
+ corpus_files = _corpus_files(corpus_path)
981
+ reports = [_evaluate_single_agent_behavior_corpus(corpus_file) for corpus_file in corpus_files]
982
+ if len(reports) == 1:
983
+ return reports[0]
984
+ return _aggregate_corpus_reports(reports)
985
+
986
+
987
+ def _json_payload_files(input_path: Path) -> list[Path]:
988
+ if input_path.is_dir():
989
+ return sorted(path for path in input_path.rglob("*.json") if path.is_file())
990
+ return [input_path]
991
+
992
+
993
+ def _evidence_payload_files(input_path: Path) -> list[Path]:
994
+ if input_path.is_dir():
995
+ return sorted(
996
+ path
997
+ for path in input_path.rglob("*")
998
+ if path.is_file() and path.suffix.lower() in {".json", ".md", ".markdown", ".txt"}
999
+ )
1000
+ return [input_path]
1001
+
1002
+
1003
+ def _read_json_any(path: Path) -> Any:
1004
+ try:
1005
+ return json.loads(path.read_text(encoding="utf-8"))
1006
+ except FileNotFoundError as exc:
1007
+ raise ValidationError(f"telemetry input not found: {path}") from exc
1008
+ except json.JSONDecodeError as exc:
1009
+ raise ValidationError(f"telemetry input is invalid JSON: {path}: {exc}") from exc
1010
+
1011
+
1012
+ def _schema_app(payload: dict[str, Any]) -> str:
1013
+ fields = _telemetry_payload_lens(payload)
1014
+ schema = fields.schema_id
1015
+ if ".workflow-telemetry-envelope." in schema:
1016
+ return schema.split(".workflow-telemetry-envelope.", 1)[0]
1017
+ if ".workflow-run-record." in schema:
1018
+ return schema.split(".workflow-run-record.", 1)[0]
1019
+ return ""
1020
+
1021
+
1022
+ def _payload_app(payload: dict[str, Any]) -> str:
1023
+ fields = _telemetry_payload_lens(payload)
1024
+ client_app = fields.client.app if fields.client is not None else ""
1025
+ for value in (fields.app, client_app, _schema_app(payload)):
1026
+ if value:
1027
+ return value
1028
+ return ""
1029
+
1030
+
1031
+ def _telemetry_records(input_path: Path) -> list[tuple[dict[str, Any], dict[str, Any], Path]]:
1032
+ records: list[tuple[dict[str, Any], dict[str, Any], Path]] = []
1033
+ for path in _json_payload_files(input_path):
1034
+ payload = _read_json_any(path)
1035
+ payload_fields = _telemetry_payload_lens(payload)
1036
+ if isinstance(payload, dict) and payload_fields.records:
1037
+ envelope = payload
1038
+ for record in payload_fields.records:
1039
+ if isinstance(record, dict):
1040
+ records.append((record, envelope, path))
1041
+ elif isinstance(payload, dict):
1042
+ records.append((payload, {}, path))
1043
+ elif isinstance(payload, list):
1044
+ for record in payload:
1045
+ if isinstance(record, dict):
1046
+ records.append((record, {}, path))
1047
+ return records
1048
+
1049
+
1050
+ def _record_app(record: dict[str, Any], envelope: dict[str, Any]) -> str:
1051
+ record_app = _payload_app(record)
1052
+ envelope_app = _payload_app(envelope)
1053
+ if record_app:
1054
+ return record_app
1055
+ if envelope_app:
1056
+ return envelope_app
1057
+ return DEFAULT_TELEMETRY_APP
1058
+
1059
+
1060
+ def _record_app_version(record: dict[str, Any], envelope: dict[str, Any]) -> str:
1061
+ record_fields = _telemetry_record_lens(record)
1062
+ envelope_fields = _telemetry_payload_lens(envelope)
1063
+ record_client_version = record_fields.client.app_version if record_fields.client is not None else ""
1064
+ envelope_client_version = envelope_fields.client.app_version if envelope_fields.client is not None else ""
1065
+ integrity = (
1066
+ record_fields.environment_context.extension_integrity
1067
+ if record_fields.environment_context is not None
1068
+ else None
1069
+ )
1070
+ for value in (
1071
+ record_fields.app_version,
1072
+ record_client_version,
1073
+ envelope_client_version,
1074
+ integrity.app_version if integrity is not None else "",
1075
+ ):
1076
+ if value:
1077
+ return value
1078
+ return "unknown"
1079
+
1080
+
1081
+ def _list_strings(value: Any) -> list[str]:
1082
+ if isinstance(value, list):
1083
+ return [str(item) for item in value if str(item or "")]
1084
+ if str(value or ""):
1085
+ return [str(value)]
1086
+ return []
1087
+
1088
+
1089
+ def _script_risk_codes(record: dict[str, Any]) -> list[str]:
1090
+ codes: list[str] = []
1091
+ scripts = record.get("generated_scripts")
1092
+ if not isinstance(scripts, list):
1093
+ return codes
1094
+ for script in scripts:
1095
+ if not isinstance(script, dict):
1096
+ continue
1097
+ for code in _list_strings(script.get("risk_codes")):
1098
+ if code not in codes:
1099
+ codes.append(code)
1100
+ return codes
1101
+
1102
+
1103
+ def _agent_events(record: dict[str, Any]) -> list[_TelemetryAgentEventFields]:
1104
+ events = record.get("agent_events")
1105
+ typed_events: list[_TelemetryAgentEventFields] = []
1106
+ if not isinstance(events, list):
1107
+ return typed_events
1108
+ for event in events:
1109
+ if isinstance(event, dict):
1110
+ typed_events.append(_telemetry_agent_event_fields(JsonObjectAdapter.validate_python(event)))
1111
+ return typed_events
1112
+
1113
+
1114
+ def _signals_for_record(record: dict[str, Any]) -> list[str]:
1115
+ diagnostic = record.get("diagnostic_context") if isinstance(record.get("diagnostic_context"), dict) else {}
1116
+ behavior = (
1117
+ diagnostic.get("agent_behavior_context")
1118
+ if isinstance(diagnostic.get("agent_behavior_context"), dict)
1119
+ else {}
1120
+ )
1121
+ signals: list[str] = []
1122
+ for value in _list_strings(behavior.get("codes")):
1123
+ if value not in signals:
1124
+ signals.append(value)
1125
+ root = str(diagnostic.get("root_cause_code") or "")
1126
+ if root and (root.startswith("agent.") or root in DEFAULT_SIGNAL_SEVERITY) and root not in signals:
1127
+ signals.append(root)
1128
+ for event in _agent_events(record):
1129
+ code = event.code
1130
+ if code and code not in signals:
1131
+ signals.append(code)
1132
+ risk_codes = set(_script_risk_codes(record))
1133
+ if risk_codes & RISK_CODES_THAT_CREATE_DRAFTS and "agent.generated_script_workaround" not in signals:
1134
+ signals.append("agent.generated_script_workaround")
1135
+ if "extension_prompt_or_script_drift" in risk_codes and "extension_prompt_or_script_drift" not in signals:
1136
+ signals.append("extension_prompt_or_script_drift")
1137
+ return signals
1138
+
1139
+
1140
+ def _severity_for_signal(record: dict[str, Any], signal: str) -> str:
1141
+ severities: list[str] = []
1142
+ for event in _agent_events(record):
1143
+ if event.code == signal or event.type == signal:
1144
+ severity = event.severity.lower()
1145
+ if severity:
1146
+ severities.append(severity)
1147
+ severities.append(DEFAULT_SIGNAL_SEVERITY.get(signal, "low"))
1148
+ return max(severities, key=lambda item: SEVERITY_RANK.get(item, 0))
1149
+
1150
+
1151
+ def _passes_min_severity(record: dict[str, Any], signal: str, min_severity: str) -> bool:
1152
+ return SEVERITY_RANK.get(_severity_for_signal(record, signal), 0) >= SEVERITY_RANK.get(min_severity, 2)
1153
+
1154
+
1155
+ def _clean_text(value: Any, *, max_chars: int = 320) -> str:
1156
+ text = str(value or "").replace("\r", " ").replace("\n", " ").strip()
1157
+ text = re.sub(
1158
+ r"(?i)(token|auth[_-]?token|api[_-]?key|secret|authorization|bearer)\s*[:=]\s*['\"]?[^'\"\s]+",
1159
+ r"\1=<redacted>",
1160
+ text,
1161
+ )
1162
+ text = re.sub(r"(?i)(RESEND_API_KEY|INGEST_TOKEN|OPENAI_API_KEY|ANTHROPIC_API_KEY)[^,\s]*", r"\1=<redacted>", text)
1163
+ return text[:max_chars]
1164
+
1165
+
1166
+ def _event_sample(event: _TelemetryAgentEventFields) -> dict[str, str]:
1167
+ allowed = (
1168
+ "code",
1169
+ "type",
1170
+ "severity",
1171
+ "phase",
1172
+ "expected_phase",
1173
+ "next_action_expected",
1174
+ "recovery_command",
1175
+ "command_family",
1176
+ "path",
1177
+ )
1178
+ return {key: _clean_text(getattr(event, key)) for key in allowed if getattr(event, key)}
1179
+
1180
+
1181
+ def _redacted_evidence(record: dict[str, Any], envelope: dict[str, Any], *, signal: str, source_path: Path) -> dict[str, Any]:
1182
+ diagnostic = record.get("diagnostic_context") if isinstance(record.get("diagnostic_context"), dict) else {}
1183
+ behavior = (
1184
+ diagnostic.get("agent_behavior_context")
1185
+ if isinstance(diagnostic.get("agent_behavior_context"), dict)
1186
+ else {}
1187
+ )
1188
+ evidence = {
1189
+ "source_path": _serialized_evidence_source_path(source_path),
1190
+ "run_id": _clean_text(record.get("run_id")),
1191
+ "workflow": _clean_text(record.get("workflow")),
1192
+ "status": _clean_text(record.get("status")),
1193
+ "phase": _clean_text(record.get("phase")),
1194
+ "blocked_reason": _clean_text(record.get("blocked_reason")),
1195
+ "next_action": _clean_text(record.get("next_action")),
1196
+ "root_cause_code": _clean_text(diagnostic.get("root_cause_code")),
1197
+ "recovery_command": _clean_text(diagnostic.get("recovery_command")),
1198
+ "agent_behavior_codes": _list_strings(behavior.get("codes")),
1199
+ "risk_codes": _script_risk_codes(record),
1200
+ "event_samples": [_event_sample(event) for event in _agent_events(record)[:3]],
1201
+ "payload_level": _clean_text(envelope.get("payload_level")),
1202
+ "signal": signal,
1203
+ }
1204
+ return {key: value for key, value in evidence.items() if value not in ("", [], {})}
1205
+
1206
+
1207
+ def _workflow_key(workflow: str) -> str:
1208
+ value = workflow.strip()
1209
+ if not value:
1210
+ return ""
1211
+ if value in COMMAND_PROMPT_SOURCES:
1212
+ return value.rsplit(":", 1)[-1].lstrip("/")
1213
+ if value.startswith("/mednotes:"):
1214
+ return value.split(":", 1)[1].split()[0]
1215
+ if value.startswith("/flashcards"):
1216
+ return "flashcards"
1217
+ return value.split()[0].replace("/mednotes:", "").replace("/", "")
1218
+
1219
+
1220
+ def _command_source_for_workflow(workflow: str) -> str:
1221
+ normalized = workflow.strip().split()[0] if workflow.strip() else ""
1222
+ return COMMAND_PROMPT_SOURCES.get(normalized, "")
1223
+
1224
+
1225
+ def _suggested_prompt_sources(workflow: str) -> list[str]:
1226
+ sources: list[str] = []
1227
+ command_source = _command_source_for_workflow(workflow)
1228
+ if command_source:
1229
+ sources.append(command_source)
1230
+ skill_source = WORKFLOW_SKILL_PROMPT_SOURCES.get(_workflow_key(workflow))
1231
+ if skill_source and skill_source not in sources:
1232
+ sources.append(skill_source)
1233
+ return sources
1234
+
1235
+
1236
+ def _prompt_snippet(relative_path: str, *, signal: str) -> str:
1237
+ path = _extension_root() / relative_path
1238
+ try:
1239
+ lines = path.read_text(encoding="utf-8").splitlines()
1240
+ except OSError:
1241
+ return ""
1242
+ keywords = ["next_action", "blocked", "bloque", "script", "comando", "workflow"]
1243
+ if "tool" in signal or "command" in signal:
1244
+ keywords.extend(["exit code", "shell", "terminal"])
1245
+ if "script" in signal:
1246
+ keywords.extend(["workaround", "oficial", "manual"])
1247
+ selected = 0
1248
+ for index, line in enumerate(lines):
1249
+ lowered = line.casefold()
1250
+ if any(keyword in lowered for keyword in keywords):
1251
+ selected = max(0, index - 1)
1252
+ break
1253
+ snippet = " ".join(line.strip() for line in lines[selected : selected + 3] if line.strip())
1254
+ return _clean_text(snippet, max_chars=420)
1255
+
1256
+
1257
+ def _surface_items(value: Any, *, kind: str) -> list[dict[str, str]]:
1258
+ if not isinstance(value, list):
1259
+ return []
1260
+ allowed = (
1261
+ ("path", "snippet", "reason")
1262
+ if kind == "prompt"
1263
+ else ("path", "function_or_command", "reason")
1264
+ )
1265
+ items: list[dict[str, str]] = []
1266
+ for item in value:
1267
+ if not isinstance(item, dict):
1268
+ continue
1269
+ clean = {key: _clean_text(item.get(key), max_chars=420) for key in allowed if _clean_text(item.get(key))}
1270
+ if clean.get("path"):
1271
+ items.append(clean)
1272
+ return items
1273
+
1274
+
1275
+ def _suspect_prompts_from_sources(prompt_sources: list[str], *, signal: str) -> list[dict[str, str]]:
1276
+ prompts: list[dict[str, str]] = []
1277
+ for source in prompt_sources:
1278
+ path = _clean_text(source)
1279
+ if not path:
1280
+ continue
1281
+ snippet = _prompt_snippet(path, signal=signal)
1282
+ prompts.append(
1283
+ {
1284
+ "path": path,
1285
+ "snippet": snippet or "Trecho não disponível no bundle local.",
1286
+ "reason": f"Fonte de prompt vinculada ao workflow/sinal {signal}; revisar se deveria prevenir o desvio.",
1287
+ }
1288
+ )
1289
+ return prompts
1290
+
1291
+
1292
+ def _suspect_scripts_from_record(record: dict[str, Any]) -> list[dict[str, str]]:
1293
+ scripts: list[dict[str, str]] = []
1294
+ raw_scripts = record.get("generated_scripts")
1295
+ if isinstance(raw_scripts, list):
1296
+ for item in raw_scripts:
1297
+ if not isinstance(item, dict):
1298
+ continue
1299
+ script = _GeneratedScriptEvidenceLens.model_validate(item)
1300
+ path = _clean_text(script.path)
1301
+ if not path:
1302
+ continue
1303
+ risks = ", ".join(_list_strings(script.risk_codes)[:5])
1304
+ scripts.append(
1305
+ {
1306
+ "path": path,
1307
+ "function_or_command": _clean_text(script.function_or_command or "generated_script"),
1308
+ "reason": f"Script capturado na evidência; risk_codes={risks}" if risks else "Script capturado na evidência.",
1309
+ }
1310
+ )
1311
+ raw_commands = record.get("command_events")
1312
+ if isinstance(raw_commands, list):
1313
+ for item in raw_commands:
1314
+ if not isinstance(item, dict):
1315
+ continue
1316
+ event = _CommandEventEvidenceLens.model_validate(item)
1317
+ command = _clean_text(event.command or event.command_family, max_chars=260)
1318
+ if not command:
1319
+ continue
1320
+ scripts.append(
1321
+ {
1322
+ "path": _clean_text(event.path or "terminal"),
1323
+ "function_or_command": command,
1324
+ "reason": _clean_text(event.status or "command_event"),
1325
+ }
1326
+ )
1327
+ return scripts
1328
+
1329
+
1330
+ def _prevention_owner_note(*, prompts: list[dict[str, str]], scripts: list[dict[str, str]]) -> str:
1331
+ if prompts or scripts:
1332
+ return "Superfícies suspeitas listadas para revisão; isso não prova culpa sem reprodução."
1333
+ return "Nenhum prompt ou script encarregado de prevenir este comportamento foi identificado na evidência redigida."
1334
+
1335
+
1336
+ def _target_suite(record: dict[str, Any]) -> str:
1337
+ fields = _telemetry_record_lens(record)
1338
+ workflow = fields.workflow
1339
+ if _command_source_for_workflow(workflow):
1340
+ return "extension_commands.core_behavior.v1"
1341
+ agent = fields.agent
1342
+ if agent:
1343
+ normalized = agent.replace("-", "_")
1344
+ return f"{normalized}.core_behavior.v1"
1345
+ return "extension_skills.core_behavior.v1"
1346
+
1347
+
1348
+ def _suggested_assertions(signal: str) -> list[dict[str, Any]]:
1349
+ shared_block = [
1350
+ {"op": "path_equals", "path": "status", "value": "blocked"},
1351
+ {"op": "path_present", "path": "next_action"},
1352
+ ]
1353
+ mapping: dict[str, list[dict[str, Any]]] = {
1354
+ "agent.retry_loop": shared_block
1355
+ + [
1356
+ {"op": "path_present", "path": "diagnostic_context.agent_behavior_context.codes"},
1357
+ {"op": "path_present", "path": "error_context.retry_scope"},
1358
+ ],
1359
+ "agent.retry_without_input_change": shared_block
1360
+ + [
1361
+ {"op": "path_equals", "path": "blocked_reason", "value": "retry_without_input_change"},
1362
+ {"op": "path_present", "path": "error_context.input_hash"},
1363
+ ],
1364
+ "agent.ignored_next_action": shared_block
1365
+ + [
1366
+ {"op": "path_present", "path": "next_action_expected"},
1367
+ {"op": "path_equals", "path": "followed_next_action", "value": True},
1368
+ ],
1369
+ "agent.wrong_phase": shared_block
1370
+ + [
1371
+ {"op": "path_present", "path": "expected_phase"},
1372
+ {"op": "path_equals", "path": "mutated", "value": False},
1373
+ ],
1374
+ "agent.generated_script_workaround": shared_block
1375
+ + [
1376
+ {"op": "path_equals", "path": "used_official_recovery_command", "value": True},
1377
+ {"op": "path_equals", "path": "unsafe_workaround_created", "value": False},
1378
+ ],
1379
+ "agent.unsafe_generated_script_recovery_bypass": shared_block
1380
+ + [
1381
+ {"op": "path_equals", "path": "used_official_recovery_command", "value": True},
1382
+ {"op": "path_equals", "path": "unsafe_workaround_created", "value": False},
1383
+ ],
1384
+ "agent.missing_error_context": shared_block
1385
+ + [
1386
+ {"op": "path_present", "path": "error_context.cause"},
1387
+ {"op": "path_present", "path": "error_context.retry_scope"},
1388
+ ],
1389
+ "agent.script_or_prompt_drift": shared_block
1390
+ + [
1391
+ {"op": "path_equals", "path": "drift_classified", "value": True},
1392
+ {"op": "path_present", "path": "recovery_command"},
1393
+ ],
1394
+ "extension_prompt_or_script_drift": shared_block
1395
+ + [
1396
+ {"op": "path_equals", "path": "drift_classified", "value": True},
1397
+ {"op": "path_present", "path": "recovery_command"},
1398
+ ],
1399
+ "resource.version_control_policy_bypassed": [
1400
+ {"op": "path_equals", "path": "status", "value": "blocked"},
1401
+ {"op": "path_present", "path": "version_control_safety"},
1402
+ {"op": "path_equals", "path": "version_control_safety.mutation_without_guard", "value": False},
1403
+ {"op": "path_equals", "path": "version_control_safety.run_start_seen", "value": True},
1404
+ {"op": "path_equals", "path": "version_control_safety.run_finish_seen", "value": True},
1405
+ ],
1406
+ "resource.guard_missing": [
1407
+ {"op": "path_equals", "path": "status", "value": "blocked"},
1408
+ {"op": "path_equals", "path": "blocked_reason", "value": "vault_guard_required"},
1409
+ {"op": "path_equals", "path": "version_control_safety.mutation_without_guard", "value": False},
1410
+ {"op": "path_present", "path": "recovery_command"},
1411
+ ],
1412
+ "resource.run_finish_missing": [
1413
+ {"op": "path_equals", "path": "status", "value": "blocked"},
1414
+ {"op": "path_equals", "path": "version_control_safety.run_start_seen", "value": True},
1415
+ {"op": "path_equals", "path": "version_control_safety.run_finish_seen", "value": True},
1416
+ {"op": "path_present", "path": "version_control_safety.restore_point_after"},
1417
+ ],
1418
+ "resource.restore_point_after_mutation": [
1419
+ {"op": "path_equals", "path": "status", "value": "blocked"},
1420
+ {"op": "path_equals", "path": "version_control_safety.restore_point_before", "value": True},
1421
+ {"op": "path_equals", "path": "version_control_safety.restore_point_after", "value": True},
1422
+ ],
1423
+ "resource.direct_mutation_attempt": [
1424
+ {"op": "path_equals", "path": "status", "value": "blocked"},
1425
+ {"op": "path_equals", "path": "blocked_reason", "value": "direct_mutation_forbidden"},
1426
+ {"op": "path_equals", "path": "version_control_safety.direct_mutation_forbidden", "value": True},
1427
+ {"op": "path_present", "path": "recovery_command"},
1428
+ ],
1429
+ "agent.dry_run_without_apply": [
1430
+ {"op": "path_in", "path": "status", "value": ["ready_to_apply", "blocked", "discarded"]},
1431
+ {"op": "path_present", "path": "next_action"},
1432
+ {"op": "path_equals", "path": "dry_run_called_completed", "value": False},
1433
+ ],
1434
+ "dry_run_without_apply": [
1435
+ {"op": "path_in", "path": "status", "value": ["ready_to_apply", "blocked", "discarded"]},
1436
+ {"op": "path_present", "path": "next_action"},
1437
+ {"op": "path_equals", "path": "dry_run_called_completed", "value": False},
1438
+ ],
1439
+ }
1440
+ return mapping.get(
1441
+ signal,
1442
+ shared_block
1443
+ + [
1444
+ {"op": "path_present", "path": "diagnostic_context.root_cause_code"},
1445
+ {"op": "path_present", "path": "error_context.next_action"},
1446
+ ],
1447
+ )
1448
+
1449
+
1450
+ def _promotion_checklist(signal: str) -> list[str]:
1451
+ return [
1452
+ "Confirmar que a evidência está redigida e não contém conteúdo clínico bruto, HTML, tokens ou chaves.",
1453
+ "Escolher a suite final e criar output fixture que reproduza o comportamento corrigido.",
1454
+ "Manter ao menos duas assertions fortes e promover baseline somente após o corpus passar.",
1455
+ f"Verificar que o caso falharia antes da correção do prompt para {signal}.",
1456
+ ]
1457
+
1458
+
1459
+ def _slug(value: str) -> str:
1460
+ slug = re.sub(r"[^a-zA-Z0-9]+", "-", value.lower()).strip("-")
1461
+ return slug[:80] or "telemetry"
1462
+
1463
+
1464
+ def _draft_date(record: dict[str, Any], envelope: dict[str, Any]) -> str:
1465
+ record_fields = _telemetry_record_lens(record)
1466
+ envelope_generated_at = envelope["generated_at"] if "generated_at" in envelope and isinstance(envelope["generated_at"], str) else ""
1467
+ for text in (record_fields.recorded_at, envelope_generated_at):
1468
+ if re.match(r"\d{4}-\d{2}-\d{2}", text):
1469
+ return text[:10]
1470
+ return datetime.now(UTC).date().isoformat()
1471
+
1472
+
1473
+ def _unique_draft_output_path(output_dir: Path, stem: str, reserved: set[Path]) -> Path:
1474
+ output_path = output_dir / f"{stem}.json"
1475
+ suffix = 2
1476
+ while output_path.exists() or output_path in reserved:
1477
+ output_path = output_dir / f"{stem}-{suffix}.json"
1478
+ suffix += 1
1479
+ reserved.add(output_path)
1480
+ return output_path
1481
+
1482
+
1483
+ def _draft_for_signal(
1484
+ *,
1485
+ record: dict[str, Any],
1486
+ envelope: dict[str, Any],
1487
+ signal: str,
1488
+ source_path: Path,
1489
+ ) -> dict[str, Any]:
1490
+ record_fields = _telemetry_record_lens(record)
1491
+ workflow = record_fields.workflow
1492
+ prompt_sources = _suggested_prompt_sources(workflow)
1493
+ suspect_prompts = _suspect_prompts_from_sources(prompt_sources, signal=signal)
1494
+ suspect_scripts = _suspect_scripts_from_record(record)
1495
+ return {
1496
+ "schema": AGENT_BEHAVIOR_CASE_DRAFT_SCHEMA,
1497
+ "status": "draft",
1498
+ "source": "telemetry",
1499
+ "app": _record_app(record, envelope),
1500
+ "app_version": _record_app_version(record, envelope),
1501
+ "workflow": _clean_text(workflow),
1502
+ "phase": _clean_text(record_fields.phase),
1503
+ "signal": signal,
1504
+ "severity": _severity_for_signal(record, signal),
1505
+ "target_suite": _target_suite(record),
1506
+ "prompt_sources_suggested": prompt_sources,
1507
+ "suspect_prompts": suspect_prompts,
1508
+ "suspect_scripts": suspect_scripts,
1509
+ "prevention_owner_note": _prevention_owner_note(prompts=suspect_prompts, scripts=suspect_scripts),
1510
+ "redacted_evidence": _redacted_evidence(record, envelope, signal=signal, source_path=source_path),
1511
+ "suggested_assertions": _suggested_assertions(signal),
1512
+ "promotion_checklist": _promotion_checklist(signal),
1513
+ }
1514
+
1515
+
1516
+ def suggest_agent_behavior_cases_from_telemetry(
1517
+ input_path: Path,
1518
+ *,
1519
+ output_dir: Path,
1520
+ app: str = DEFAULT_TELEMETRY_APP,
1521
+ app_version: str | None = None,
1522
+ min_severity: str = "medium",
1523
+ ) -> dict[str, Any]:
1524
+ """Create reviewable behavior-corpus draft cases from redacted telemetry JSON."""
1525
+ drafts: list[dict[str, Any]] = []
1526
+ reserved_paths: set[Path] = set()
1527
+ skipped = 0
1528
+ for record, envelope, source_path in _telemetry_records(input_path):
1529
+ if _record_app(record, envelope) != app:
1530
+ skipped += 1
1531
+ continue
1532
+ if app_version and _record_app_version(record, envelope) != app_version:
1533
+ skipped += 1
1534
+ continue
1535
+ signals = _signals_for_record(record)
1536
+ selected = [signal for signal in signals if _passes_min_severity(record, signal, min_severity)]
1537
+ if not selected:
1538
+ skipped += 1
1539
+ continue
1540
+ for signal in selected:
1541
+ draft = _draft_for_signal(record=record, envelope=envelope, signal=signal, source_path=source_path)
1542
+ date_prefix = _draft_date(record, envelope)
1543
+ record_fields = _telemetry_record_lens(record)
1544
+ workflow_slug = _slug(record_fields.workflow or "workflow")
1545
+ signal_slug = _slug(signal)
1546
+ output_path = _unique_draft_output_path(
1547
+ output_dir,
1548
+ f"{date_prefix}-{signal_slug}-{workflow_slug}",
1549
+ reserved_paths,
1550
+ )
1551
+ drafts.append({"path": str(output_path), "draft": draft})
1552
+ if drafts:
1553
+ output_dir.mkdir(parents=True, exist_ok=True)
1554
+ for item in drafts:
1555
+ Path(item["path"]).write_text(json.dumps(item["draft"], ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
1556
+ return {
1557
+ "schema": AGENT_BEHAVIOR_CASE_DRAFT_REPORT_SCHEMA,
1558
+ "status": "drafts_created" if drafts else "no_drafts",
1559
+ "app": app,
1560
+ "app_version": app_version or "",
1561
+ "min_severity": min_severity,
1562
+ "aggregate": {
1563
+ "draft_count": len(drafts),
1564
+ "skipped_record_count": skipped,
1565
+ },
1566
+ "drafts": [
1567
+ {
1568
+ "path": item["path"],
1569
+ "signal": item["draft"]["signal"],
1570
+ "target_suite": item["draft"]["target_suite"],
1571
+ "app_version": item["draft"]["app_version"],
1572
+ }
1573
+ for item in drafts
1574
+ ],
1575
+ "next_action": "review drafts, promote selected cases into a corpus suite, then rerun eval-agent-behavior-corpus"
1576
+ if drafts
1577
+ else "",
1578
+ }
1579
+
1580
+
1581
+ def _looks_like_telemetry_payload(payload: Any) -> bool:
1582
+ if not isinstance(payload, dict):
1583
+ return False
1584
+ fields = _telemetry_payload_lens(payload)
1585
+ schema = fields.schema_id
1586
+ return ".workflow-telemetry-envelope." in schema or ".workflow-run-record." in schema or bool(fields.records)
1587
+
1588
+
1589
+ def _json_blocks_from_markdown(text: str) -> list[Any]:
1590
+ payloads: list[Any] = []
1591
+ stripped = text.strip()
1592
+ if stripped.startswith(("{", "[")):
1593
+ try:
1594
+ payloads.append(json.loads(stripped))
1595
+ except json.JSONDecodeError:
1596
+ pass
1597
+ for match in re.finditer(r"```(?:json)?\s*(.*?)```", text, flags=re.S | re.I):
1598
+ block = match.group(1).strip()
1599
+ if not block:
1600
+ continue
1601
+ try:
1602
+ payloads.append(json.loads(block))
1603
+ except json.JSONDecodeError:
1604
+ continue
1605
+ return payloads
1606
+
1607
+
1608
+ def _candidate_payloads(payload: Any) -> list[JsonObject]:
1609
+ candidates: list[JsonObject] = []
1610
+ if isinstance(payload, dict):
1611
+ fields = _BehaviorCandidatePayloadLens.model_validate(payload)
1612
+ candidates.extend(JsonObjectAdapter.validate_python(item) for item in fields.behavior_case_candidates)
1613
+ for item in fields.first_pass_prevention_candidates:
1614
+ enriched = dict(item)
1615
+ enriched.setdefault("case_kind", "first_pass_prevention")
1616
+ candidates.append(JsonObjectAdapter.validate_python(enriched))
1617
+ for message in fields.messages:
1618
+ message_fields = _BehaviorCandidateMessageLens.model_validate(message)
1619
+ for item in message_fields.behavior_case_candidates:
1620
+ enriched = dict(item)
1621
+ enriched.setdefault("source_message_id", message_fields.id)
1622
+ enriched.setdefault("source_kind", message_fields.source_kind)
1623
+ candidates.append(JsonObjectAdapter.validate_python(enriched))
1624
+ for item in message_fields.first_pass_prevention_candidates:
1625
+ enriched = dict(item)
1626
+ enriched.setdefault("case_kind", "first_pass_prevention")
1627
+ enriched.setdefault("source_message_id", message_fields.id)
1628
+ enriched.setdefault("source_kind", message_fields.source_kind)
1629
+ candidates.append(JsonObjectAdapter.validate_python(enriched))
1630
+ elif isinstance(payload, list):
1631
+ candidates.extend(JsonObjectAdapter.validate_python(item) for item in payload if isinstance(item, dict) and item.get("signal"))
1632
+ return candidates
1633
+
1634
+
1635
+ def _sanitize_evidence(value: Any) -> Any:
1636
+ if isinstance(value, dict):
1637
+ sanitized: dict[str, Any] = {}
1638
+ for key, item in value.items():
1639
+ lower = str(key).lower()
1640
+ if any(token in lower for token in ("content", "body", "html", "markdown", "raw", "token", "secret", "api_key", "script")):
1641
+ continue
1642
+ sanitized[str(key)] = _sanitize_evidence(item)
1643
+ return {key: item for key, item in sanitized.items() if item not in ("", [], {})}
1644
+ if isinstance(value, list):
1645
+ return [_sanitize_evidence(item) for item in value if _sanitize_evidence(item) not in ("", [], {})]
1646
+ if isinstance(value, str):
1647
+ return _clean_text(value, max_chars=700)
1648
+ return value
1649
+
1650
+
1651
+ def _candidate_text_list(candidate: dict[str, Any], key: str) -> list[str]:
1652
+ value = candidate.get(key)
1653
+ if isinstance(value, list):
1654
+ return [_clean_text(item) for item in value if _clean_text(item)]
1655
+ if isinstance(value, str) and value.strip():
1656
+ return [_clean_text(value)]
1657
+ return []
1658
+
1659
+
1660
+ def _candidate_count_map(candidate: JsonObject, key: str) -> dict[str, int]:
1661
+ value = candidate.get(key)
1662
+ if not isinstance(value, dict):
1663
+ return {}
1664
+ counts: dict[str, int] = {}
1665
+ for raw_key, raw_count in value.items():
1666
+ name = _clean_text(raw_key)
1667
+ if not name:
1668
+ continue
1669
+ try:
1670
+ counts[name] = int(raw_count)
1671
+ except (TypeError, ValueError):
1672
+ counts[name] = 1
1673
+ return counts
1674
+
1675
+
1676
+ def _candidate_signal(candidate: dict[str, Any]) -> str:
1677
+ signal = str(candidate.get("signal") or candidate.get("root_cause") or candidate.get("root_cause_code") or "")
1678
+ if signal:
1679
+ return signal
1680
+ evidence = json.dumps(candidate, ensure_ascii=False).lower()
1681
+ if "retry loop" in evidence or ("loop" in evidence and "retry" in evidence):
1682
+ return "agent.retry_loop"
1683
+ if "ignored next_action" in evidence or "ignorou next_action" in evidence:
1684
+ return "agent.ignored_next_action"
1685
+ if "wrong phase" in evidence or "fase errada" in evidence:
1686
+ return "agent.wrong_phase"
1687
+ if "generated script" in evidence or "script gerado" in evidence:
1688
+ return "agent.generated_script_workaround"
1689
+ if "missing error_context" in evidence or "sem error_context" in evidence:
1690
+ return "agent.missing_error_context"
1691
+ if "dry-run" in evidence and "apply" in evidence:
1692
+ return "dry_run_without_apply"
1693
+ return "agent.workflow_blocked"
1694
+
1695
+
1696
+ def _candidate_workflow(candidate: JsonObject) -> str:
1697
+ raw_workflow = candidate.get("workflow")
1698
+ workflow = raw_workflow.strip() if isinstance(raw_workflow, str) else ""
1699
+ if workflow:
1700
+ return workflow
1701
+ text = json.dumps(candidate, ensure_ascii=False)
1702
+ match = re.search(r"/(?:mednotes:[a-z0-9_-]+|flashcards)", text, flags=re.I)
1703
+ return match.group(0) if match else ""
1704
+
1705
+
1706
+ def _candidate_app_version(candidate: dict[str, Any]) -> str:
1707
+ for key in ("app_version", "version"):
1708
+ if str(candidate.get(key) or ""):
1709
+ return str(candidate[key])
1710
+ text = json.dumps(candidate, ensure_ascii=False)
1711
+ match = re.search(r"(?:app[_ ]version|vers[aã]o)\s*[:=` ]+\s*([0-9]+(?:\.[0-9]+){1,3})", text, flags=re.I)
1712
+ return match.group(1) if match else "unknown"
1713
+
1714
+
1715
+ def _draft_from_candidate(
1716
+ candidate: dict[str, Any],
1717
+ *,
1718
+ source_path: Path,
1719
+ confidence: str,
1720
+ ) -> dict[str, Any]:
1721
+ signal = _candidate_signal(candidate)
1722
+ workflow = _candidate_workflow(candidate)
1723
+ source = str(candidate.get("source_kind") or candidate.get("source") or "agent_report")
1724
+ evidence = candidate.get("redacted_evidence") if isinstance(candidate.get("redacted_evidence"), dict) else {}
1725
+ sanitized_evidence = _sanitize_evidence(evidence or candidate)
1726
+ if isinstance(sanitized_evidence, dict):
1727
+ sanitized_evidence.setdefault("source_path", _serialized_evidence_source_path(source_path))
1728
+ sanitized_evidence.setdefault("signal", signal)
1729
+ else:
1730
+ sanitized_evidence = {
1731
+ "summary": _clean_text(sanitized_evidence),
1732
+ "source_path": _serialized_evidence_source_path(source_path),
1733
+ "signal": signal,
1734
+ }
1735
+ assertions = candidate.get("suggested_assertions")
1736
+ if not isinstance(assertions, list) or not all(isinstance(item, dict) for item in assertions):
1737
+ assertions = _suggested_assertions(signal)
1738
+ prompt_sources = candidate.get("prompt_sources_suggested")
1739
+ if not isinstance(prompt_sources, list):
1740
+ prompt_sources = candidate.get("prompt_surface")
1741
+ if not isinstance(prompt_sources, list):
1742
+ prompt_sources = _suggested_prompt_sources(workflow)
1743
+ prompt_sources = [str(item) for item in prompt_sources if str(item)]
1744
+ suspect_prompts = _surface_items(candidate.get("suspect_prompts"), kind="prompt")
1745
+ if not suspect_prompts:
1746
+ suspect_prompts = _suspect_prompts_from_sources(prompt_sources, signal=signal)
1747
+ suspect_scripts = _surface_items(candidate.get("suspect_scripts"), kind="script")
1748
+ target_suite = str(candidate.get("target_suite") or "")
1749
+ if not target_suite:
1750
+ target_suite = "extension_commands.core_behavior.v1" if _command_source_for_workflow(workflow) else "extension_skills.core_behavior.v1"
1751
+ draft = {
1752
+ "schema": AGENT_BEHAVIOR_CASE_DRAFT_SCHEMA,
1753
+ "status": "draft",
1754
+ "source": source,
1755
+ "confidence": confidence,
1756
+ "case_kind": str(candidate.get("case_kind") or "behavior_regression"),
1757
+ "app": str(candidate.get("app") or DEFAULT_TELEMETRY_APP),
1758
+ "app_version": _candidate_app_version(candidate),
1759
+ "workflow": _clean_text(workflow),
1760
+ "phase": _clean_text(candidate.get("phase")),
1761
+ "signal": signal,
1762
+ "severity": str(candidate.get("severity") or DEFAULT_SIGNAL_SEVERITY.get(signal, "medium")),
1763
+ "target_suite": target_suite,
1764
+ "prompt_sources_suggested": prompt_sources,
1765
+ "suspect_prompts": suspect_prompts,
1766
+ "suspect_scripts": suspect_scripts,
1767
+ "prevention_owner_note": _prevention_owner_note(prompts=suspect_prompts, scripts=suspect_scripts),
1768
+ "redacted_evidence": sanitized_evidence,
1769
+ "suggested_assertions": assertions,
1770
+ "promotion_checklist": _promotion_checklist(signal),
1771
+ }
1772
+ if draft["case_kind"] == "first_pass_prevention":
1773
+ prevention = {
1774
+ "prevention_type": _clean_text(candidate.get("prevention_type")),
1775
+ "optimization_class": _clean_text(candidate.get("optimization_class") or "first_pass_prevention"),
1776
+ "first_pass_failure_mode": _clean_text(candidate.get("first_pass_failure_mode"), max_chars=700),
1777
+ "bad_artifact_type": _clean_text(candidate.get("bad_artifact_type")),
1778
+ "failure_facets": _candidate_text_list(candidate, "failure_facets"),
1779
+ "suspected_upstream_prompt_source": _candidate_text_list(candidate, "suspected_upstream_prompt_source"),
1780
+ "desired_first_pass_behavior": _clean_text(candidate.get("desired_first_pass_behavior"), max_chars=700),
1781
+ "recommended_prompt_change": _clean_text(candidate.get("recommended_prompt_change"), max_chars=700),
1782
+ "recommended_contract_change": _clean_text(candidate.get("recommended_contract_change"), max_chars=700),
1783
+ "suggested_fixture": _clean_text(candidate.get("suggested_fixture")),
1784
+ "root_cause_counts": _candidate_count_map(candidate, "root_cause_counts"),
1785
+ "workflow_counts": _candidate_count_map(candidate, "workflow_counts"),
1786
+ "example_records": _sanitize_evidence(candidate.get("example_records") or []),
1787
+ "prompt_optimization_ready": bool(prompt_sources and assertions),
1788
+ "recovery_only": str(candidate.get("optimization_class") or "").lower() == "recovery_governance"
1789
+ or _clean_text(candidate.get("prevention_type")) == "recovery_only",
1790
+ }
1791
+ draft["first_pass_prevention"] = {
1792
+ key: value for key, value in prevention.items() if value not in ("", [], {})
1793
+ }
1794
+ if str(candidate.get("source_message_id") or ""):
1795
+ draft["source_message_id"] = str(candidate["source_message_id"])
1796
+ return draft
1797
+
1798
+
1799
+ def _freeform_mentions_workbench(text: str) -> bool:
1800
+ lowered = text.lower()
1801
+ return any(token in lowered for token in ("medical-notes-workbench", "wiki_medicina", "/mednotes:", "linker", "workbench"))
1802
+
1803
+
1804
+ def _freeform_candidate(text: str, *, source_kind: str) -> dict[str, Any] | None:
1805
+ if not _freeform_mentions_workbench(text):
1806
+ return None
1807
+ lowered = text.lower()
1808
+ signal = ""
1809
+ if "retry loop" in lowered or ("retry" in lowered and "loop" in lowered) or "repetiu diagnóstico" in lowered:
1810
+ signal = "agent.retry_loop"
1811
+ elif "ignored next_action" in lowered or "ignorou next_action" in lowered:
1812
+ signal = "agent.ignored_next_action"
1813
+ elif "wrong phase" in lowered or "fase errada" in lowered:
1814
+ signal = "agent.wrong_phase"
1815
+ elif "generated script" in lowered or "script gerado" in lowered or "criou script" in lowered:
1816
+ signal = "agent.generated_script_workaround"
1817
+ elif "missing error_context" in lowered or "sem error_context" in lowered:
1818
+ signal = "agent.missing_error_context"
1819
+ if not signal:
1820
+ return None
1821
+ workflow_match = re.search(r"/(?:mednotes:[a-z0-9_-]+|flashcards)", text, flags=re.I)
1822
+ version_match = re.search(r"(?:app[_ ]version|vers[aã]o)\s*[:=` ]+\s*([0-9]+(?:\.[0-9]+){1,3})", text, flags=re.I)
1823
+ return {
1824
+ "source_kind": source_kind,
1825
+ "app_version": version_match.group(1) if version_match else "unknown",
1826
+ "workflow": workflow_match.group(0) if workflow_match else "",
1827
+ "signal": signal,
1828
+ "severity": DEFAULT_SIGNAL_SEVERITY.get(signal, "medium"),
1829
+ "redacted_evidence": {"summary": _clean_text(text, max_chars=700)},
1830
+ }
1831
+
1832
+
1833
+ def _draft_items_from_evidence_payload(
1834
+ payload: Any,
1835
+ *,
1836
+ source_path: Path,
1837
+ source_kind: str,
1838
+ ) -> tuple[list[dict[str, Any]], str]:
1839
+ candidates = _candidate_payloads(payload)
1840
+ if candidates:
1841
+ return [
1842
+ _draft_from_candidate(candidate, source_path=source_path, confidence="medium")
1843
+ for candidate in candidates
1844
+ ], "structured_candidates"
1845
+ if isinstance(payload, str):
1846
+ text = payload
1847
+ else:
1848
+ text = json.dumps(payload, ensure_ascii=False)
1849
+ candidate = _freeform_candidate(text, source_kind=source_kind)
1850
+ if candidate:
1851
+ return [_draft_from_candidate(candidate, source_path=source_path, confidence="low")], "freeform_inference"
1852
+ return [], "no_candidate_signal"
1853
+
1854
+
1855
+ class _DraftReportAggregateFields(ContractModel):
1856
+ """Counts that decide the wrapper report for draft generation."""
1857
+
1858
+ model_config = ConfigDict(extra="ignore")
1859
+
1860
+ draft_count: int = Field(default=0, ge=0, strict=True)
1861
+ skipped_record_count: int = Field(default=0, ge=0, strict=True)
1862
+
1863
+
1864
+ class _DraftReportFields(ContractModel):
1865
+ """Typed lens for draft-generation reports before directory aggregation."""
1866
+
1867
+ model_config = ConfigDict(extra="ignore")
1868
+
1869
+ aggregate: _DraftReportAggregateFields = Field(default_factory=_DraftReportAggregateFields)
1870
+ drafts: list[JsonObject] = Field(default_factory=list)
1871
+
1872
+
1873
+ def _write_draft_items(
1874
+ draft_payloads: list[dict[str, Any]],
1875
+ *,
1876
+ output_dir: Path,
1877
+ source_path: Path,
1878
+ app: str,
1879
+ app_version: str | None,
1880
+ min_severity: str,
1881
+ skipped: int,
1882
+ mode: str,
1883
+ ) -> dict[str, Any]:
1884
+ drafts: list[dict[str, Any]] = []
1885
+ reserved_paths: set[Path] = set()
1886
+ for draft in draft_payloads:
1887
+ if draft.get("app") != app:
1888
+ skipped += 1
1889
+ continue
1890
+ if app_version and draft.get("app_version") != app_version:
1891
+ skipped += 1
1892
+ continue
1893
+ if SEVERITY_RANK.get(str(draft.get("severity") or "low"), 0) < SEVERITY_RANK.get(min_severity, 2):
1894
+ skipped += 1
1895
+ continue
1896
+ date_prefix = datetime.now(UTC).date().isoformat()
1897
+ signal_slug = _slug(str(draft.get("signal") or "evidence"))
1898
+ workflow_slug = _slug(str(draft.get("workflow") or source_path.stem))
1899
+ output_path = _unique_draft_output_path(
1900
+ output_dir,
1901
+ f"{date_prefix}-{signal_slug}-{workflow_slug}",
1902
+ reserved_paths,
1903
+ )
1904
+ drafts.append({"path": str(output_path), "draft": draft})
1905
+ if drafts:
1906
+ output_dir.mkdir(parents=True, exist_ok=True)
1907
+ for item in drafts:
1908
+ Path(item["path"]).write_text(json.dumps(item["draft"], ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
1909
+ return {
1910
+ "schema": AGENT_BEHAVIOR_CASE_DRAFT_REPORT_SCHEMA,
1911
+ "status": "drafts_created" if drafts else "no_drafts",
1912
+ "app": app,
1913
+ "app_version": app_version or "",
1914
+ "min_severity": min_severity,
1915
+ "mode": mode,
1916
+ "aggregate": {
1917
+ "draft_count": len(drafts),
1918
+ "skipped_record_count": skipped,
1919
+ },
1920
+ "drafts": [
1921
+ {
1922
+ "path": item["path"],
1923
+ "signal": item["draft"]["signal"],
1924
+ "target_suite": item["draft"]["target_suite"],
1925
+ "app_version": item["draft"]["app_version"],
1926
+ "source": item["draft"].get("source", ""),
1927
+ "confidence": item["draft"].get("confidence", ""),
1928
+ }
1929
+ for item in drafts
1930
+ ],
1931
+ "next_action": "review drafts, promote selected cases into a corpus suite, then rerun eval-agent-behavior-corpus"
1932
+ if drafts
1933
+ else "",
1934
+ }
1935
+
1936
+
1937
+ def _merge_existing_draft_report(result: JsonObject, existing_drafts: list[JsonObject]) -> JsonObject:
1938
+ """Merge telemetry-created drafts into the directory-level report without dict mutation."""
1939
+
1940
+ result_fields = _DraftReportFields.model_validate(result)
1941
+ drafts = [*existing_drafts, *result_fields.drafts]
1942
+ merged = dict(result)
1943
+ merged.update(
1944
+ {
1945
+ "status": "drafts_created",
1946
+ "aggregate": {
1947
+ **result_fields.aggregate.model_dump(mode="json"),
1948
+ "draft_count": len(drafts),
1949
+ },
1950
+ "drafts": drafts,
1951
+ "next_action": "review drafts, promote selected cases into a corpus suite, then rerun eval-agent-behavior-corpus",
1952
+ }
1953
+ )
1954
+ return JsonObjectAdapter.validate_python(merged)
1955
+
1956
+
1957
+ def suggest_agent_behavior_cases_from_evidence(
1958
+ input_path: Path,
1959
+ *,
1960
+ output_dir: Path,
1961
+ app: str = DEFAULT_TELEMETRY_APP,
1962
+ app_version: str | None = None,
1963
+ min_severity: str = "medium",
1964
+ source_kind: str = "auto",
1965
+ ) -> dict[str, Any]:
1966
+ """Create reviewable behavior-corpus drafts from telemetry, reports, manifests, or freeform evidence."""
1967
+ if input_path.is_dir():
1968
+ draft_payloads: list[dict[str, Any]] = []
1969
+ existing_drafts: list[dict[str, Any]] = []
1970
+ skipped = 0
1971
+ modes: set[str] = set()
1972
+ for path in _evidence_payload_files(input_path):
1973
+ try:
1974
+ payload = _read_json_any(path)
1975
+ if _looks_like_telemetry_payload(payload):
1976
+ telemetry_result = suggest_agent_behavior_cases_from_telemetry(
1977
+ path,
1978
+ output_dir=output_dir,
1979
+ app=app,
1980
+ app_version=app_version,
1981
+ min_severity=min_severity,
1982
+ )
1983
+ telemetry_fields = _DraftReportFields.model_validate(telemetry_result)
1984
+ modes.add("telemetry")
1985
+ skipped += telemetry_fields.aggregate.skipped_record_count
1986
+ existing_drafts.extend(telemetry_fields.drafts)
1987
+ continue
1988
+ items, mode = _draft_items_from_evidence_payload(
1989
+ payload,
1990
+ source_path=path,
1991
+ source_kind="agent_report" if source_kind == "auto" else source_kind,
1992
+ )
1993
+ except ValidationError:
1994
+ text = path.read_text(encoding="utf-8")
1995
+ items = []
1996
+ mode = "freeform_inference"
1997
+ for payload in _json_blocks_from_markdown(text):
1998
+ block_items, block_mode = _draft_items_from_evidence_payload(
1999
+ payload,
2000
+ source_path=path,
2001
+ source_kind="inbox_report" if source_kind == "auto" else source_kind,
2002
+ )
2003
+ if block_items:
2004
+ mode = block_mode
2005
+ items.extend(block_items)
2006
+ if not items:
2007
+ candidate = _freeform_candidate(text, source_kind="agent_report" if source_kind == "auto" else source_kind)
2008
+ if candidate:
2009
+ items.append(_draft_from_candidate(candidate, source_path=path, confidence="low"))
2010
+ if items:
2011
+ draft_payloads.extend(items)
2012
+ modes.add(mode)
2013
+ result = _write_draft_items(
2014
+ draft_payloads,
2015
+ output_dir=output_dir,
2016
+ source_path=input_path,
2017
+ app=app,
2018
+ app_version=app_version,
2019
+ min_severity=min_severity,
2020
+ skipped=skipped,
2021
+ mode="+".join(sorted(modes)) if modes else "no_candidate_signal",
2022
+ )
2023
+ if existing_drafts:
2024
+ result = _merge_existing_draft_report(JsonObjectAdapter.validate_python(result), existing_drafts)
2025
+ return result
2026
+
2027
+ try:
2028
+ payload = _read_json_any(input_path)
2029
+ if _looks_like_telemetry_payload(payload):
2030
+ return suggest_agent_behavior_cases_from_telemetry(
2031
+ input_path,
2032
+ output_dir=output_dir,
2033
+ app=app,
2034
+ app_version=app_version,
2035
+ min_severity=min_severity,
2036
+ )
2037
+ draft_payloads, mode = _draft_items_from_evidence_payload(
2038
+ payload,
2039
+ source_path=input_path,
2040
+ source_kind="agent_report" if source_kind == "auto" else source_kind,
2041
+ )
2042
+ except ValidationError:
2043
+ text = input_path.read_text(encoding="utf-8")
2044
+ json_payloads = _json_blocks_from_markdown(text)
2045
+ draft_payloads = []
2046
+ mode = "freeform_inference"
2047
+ for payload in json_payloads:
2048
+ items, item_mode = _draft_items_from_evidence_payload(
2049
+ payload,
2050
+ source_path=input_path,
2051
+ source_kind="inbox_report" if source_kind == "auto" else source_kind,
2052
+ )
2053
+ if items:
2054
+ mode = item_mode
2055
+ draft_payloads.extend(items)
2056
+ if not draft_payloads:
2057
+ candidate = _freeform_candidate(text, source_kind="agent_report" if source_kind == "auto" else source_kind)
2058
+ if candidate:
2059
+ draft_payloads.append(_draft_from_candidate(candidate, source_path=input_path, confidence="low"))
2060
+ return _write_draft_items(
2061
+ draft_payloads,
2062
+ output_dir=output_dir,
2063
+ source_path=input_path,
2064
+ app=app,
2065
+ app_version=app_version,
2066
+ min_severity=min_severity,
2067
+ skipped=0,
2068
+ mode=mode if "mode" in locals() else "no_candidate_signal",
2069
+ )