mednotes-opencode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (430) hide show
  1. package/.opencode/agents/med-chat-triager.md +204 -0
  2. package/.opencode/agents/med-flashcard-maker.md +63 -0
  3. package/.opencode/agents/med-knowledge-architect.md +230 -0
  4. package/.opencode/agents/med-link-graph-curator.md +177 -0
  5. package/.opencode/agents/med-publish-guard.md +62 -0
  6. package/.opencode/commands/flashcards.md +25 -0
  7. package/.opencode/commands/mednotes/create.md +25 -0
  8. package/.opencode/commands/mednotes/enrich.md +27 -0
  9. package/.opencode/commands/mednotes/fix-wiki.md +27 -0
  10. package/.opencode/commands/mednotes/history.md +22 -0
  11. package/.opencode/commands/mednotes/link-body.md +25 -0
  12. package/.opencode/commands/mednotes/link-related.md +27 -0
  13. package/.opencode/commands/mednotes/link.md +27 -0
  14. package/.opencode/commands/mednotes/pdf-library.md +27 -0
  15. package/.opencode/commands/mednotes/process-chats.md +23 -0
  16. package/.opencode/commands/mednotes/setup.md +21 -0
  17. package/.opencode/commands/mednotes/status.md +27 -0
  18. package/.opencode/commands/mednotes/telemetry.md +27 -0
  19. package/.opencode/commands/report.md +26 -0
  20. package/.opencode/mednotes/AGENTS.md +57 -0
  21. package/.opencode/mednotes/agents/med-chat-triager.md +197 -0
  22. package/.opencode/mednotes/agents/med-flashcard-maker.md +56 -0
  23. package/.opencode/mednotes/agents/med-knowledge-architect.md +224 -0
  24. package/.opencode/mednotes/agents/med-link-graph-curator.md +171 -0
  25. package/.opencode/mednotes/agents/med-publish-guard.md +55 -0
  26. package/.opencode/mednotes/contracts/.gitkeep +1 -0
  27. package/.opencode/mednotes/contracts/agents.json +116 -0
  28. package/.opencode/mednotes/contracts/opencode-plugin.json +70 -0
  29. package/.opencode/mednotes/docs/agent-prompt-hardening.md +567 -0
  30. package/.opencode/mednotes/docs/agent-role-contracts.md +94 -0
  31. package/.opencode/mednotes/docs/anki-mcp-twenty-rules.md +214 -0
  32. package/.opencode/mednotes/docs/anki-templates/README.md +39 -0
  33. package/.opencode/mednotes/docs/anki-templates/cloze.back.html +23 -0
  34. package/.opencode/mednotes/docs/anki-templates/cloze.front.html +14 -0
  35. package/.opencode/mednotes/docs/anki-templates/qa.back.html +24 -0
  36. package/.opencode/mednotes/docs/anki-templates/qa.front.html +14 -0
  37. package/.opencode/mednotes/docs/anki-templates/style.css +182 -0
  38. package/.opencode/mednotes/docs/atomicity-splitting-policy.md +113 -0
  39. package/.opencode/mednotes/docs/extension-docs.md +40 -0
  40. package/.opencode/mednotes/docs/flashcard-ingestion.md +278 -0
  41. package/.opencode/mednotes/docs/knowledge-architect.md +208 -0
  42. package/.opencode/mednotes/docs/merge-policy.md +110 -0
  43. package/.opencode/mednotes/docs/public-vocabulary.md +104 -0
  44. package/.opencode/mednotes/docs/semantic-linker.md +141 -0
  45. package/.opencode/mednotes/docs/taxonomy-policy.md +90 -0
  46. package/.opencode/mednotes/docs/triage-policy.md +187 -0
  47. package/.opencode/mednotes/docs/vault-version-control.md +758 -0
  48. package/.opencode/mednotes/docs/vocabulary-db-recovery.md +58 -0
  49. package/.opencode/mednotes/docs/workflow-output-contract.md +779 -0
  50. package/.opencode/mednotes/hooks/hooks.json +79 -0
  51. package/.opencode/mednotes/package-lock.json +6361 -0
  52. package/.opencode/mednotes/package.json +15 -0
  53. package/.opencode/mednotes/pyproject.toml +48 -0
  54. package/.opencode/mednotes/scripts/bootstrap_windows_python_uv.cmd +13 -0
  55. package/.opencode/mednotes/scripts/bootstrap_windows_python_uv.ps1 +172 -0
  56. package/.opencode/mednotes/scripts/enrich_notes.py +23 -0
  57. package/.opencode/mednotes/scripts/full_reset_windows_python_uv.cmd +13 -0
  58. package/.opencode/mednotes/scripts/hooks/antigravity_hook_status.mjs +212 -0
  59. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/antigravity.mjs +169 -0
  60. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/harness_payload.mjs +103 -0
  61. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/opencode_plugin.mjs +341 -0
  62. package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/opencode_user_config_sync.mjs +177 -0
  63. package/.opencode/mednotes/scripts/hooks/mednotes_hook/anki_preflight.mjs +214 -0
  64. package/.opencode/mednotes/scripts/hooks/mednotes_hook/cli.mjs +143 -0
  65. package/.opencode/mednotes/scripts/hooks/mednotes_hook/diagnostics.mjs +11 -0
  66. package/.opencode/mednotes/scripts/hooks/mednotes_hook/domain/agent_directive_core.mjs +160 -0
  67. package/.opencode/mednotes/scripts/hooks/mednotes_hook/fsm_directive.mjs +1470 -0
  68. package/.opencode/mednotes/scripts/hooks/mednotes_hook/hook_errors.mjs +120 -0
  69. package/.opencode/mednotes/scripts/hooks/mednotes_hook/retention.mjs +114 -0
  70. package/.opencode/mednotes/scripts/hooks/mednotes_hook/runtime.mjs +174 -0
  71. package/.opencode/mednotes/scripts/hooks/mednotes_hook/telemetry_capture.mjs +511 -0
  72. package/.opencode/mednotes/scripts/hooks/mednotes_hook/vault_guard.mjs +624 -0
  73. package/.opencode/mednotes/scripts/hooks/mednotes_hook.mjs +5 -0
  74. package/.opencode/mednotes/scripts/mednotes/_runtime_paths.py +24 -0
  75. package/.opencode/mednotes/scripts/mednotes/anki_model_validator.py +18 -0
  76. package/.opencode/mednotes/scripts/mednotes/capture_extension_diff.py +1562 -0
  77. package/.opencode/mednotes/scripts/mednotes/feedback_report.py +16 -0
  78. package/.opencode/mednotes/scripts/mednotes/flashcard_index.py +18 -0
  79. package/.opencode/mednotes/scripts/mednotes/flashcard_pipeline.py +18 -0
  80. package/.opencode/mednotes/scripts/mednotes/flashcard_report.py +18 -0
  81. package/.opencode/mednotes/scripts/mednotes/flashcard_sources.py +18 -0
  82. package/.opencode/mednotes/scripts/mednotes/obsidian/README.md +6 -0
  83. package/.opencode/mednotes/scripts/mednotes/obsidian_note_utils.py +20 -0
  84. package/.opencode/mednotes/scripts/mednotes/pdf_library/cli.py +16 -0
  85. package/.opencode/mednotes/scripts/mednotes/project_fsm.py +229 -0
  86. package/.opencode/mednotes/scripts/mednotes/setup_telemetry_email.py +404 -0
  87. package/.opencode/mednotes/scripts/mednotes/sync_anki_twenty_rules.py +18 -0
  88. package/.opencode/mednotes/scripts/mednotes/sync_opencode_user_config.py +36 -0
  89. package/.opencode/mednotes/scripts/mednotes/wiki/cli.py +20 -0
  90. package/.opencode/mednotes/scripts/mednotes/wiki_graph.py +18 -0
  91. package/.opencode/mednotes/scripts/mednotes/wiki_tree.py +134 -0
  92. package/.opencode/mednotes/scripts/reset_windows_python_uv.ps1 +625 -0
  93. package/.opencode/mednotes/scripts/run_python.mjs +109 -0
  94. package/.opencode/mednotes/scripts/vault/vault_commit.ps1 +19 -0
  95. package/.opencode/mednotes/scripts/vault/vault_commit.sh +18 -0
  96. package/.opencode/mednotes/scripts/vault/vault_git.ps1 +19 -0
  97. package/.opencode/mednotes/scripts/vault/vault_git.py +3107 -0
  98. package/.opencode/mednotes/scripts/vault/vault_git.sh +18 -0
  99. package/.opencode/mednotes/scripts/vault/vault_precommit.ps1 +19 -0
  100. package/.opencode/mednotes/scripts/vault/vault_precommit.sh +18 -0
  101. package/.opencode/mednotes/skills/THIRD_PARTY_NOTICES.md +45 -0
  102. package/.opencode/mednotes/skills/create-medical-flashcards/SKILL.md +113 -0
  103. package/.opencode/mednotes/skills/create-medical-note/SKILL.md +90 -0
  104. package/.opencode/mednotes/skills/enrich-medical-note/SKILL.md +120 -0
  105. package/.opencode/mednotes/skills/fix-medical-wiki/SKILL.md +559 -0
  106. package/.opencode/mednotes/skills/link-medical-wiki/SKILL.md +224 -0
  107. package/.opencode/mednotes/skills/obsidian-cli/SKILL.md +118 -0
  108. package/.opencode/mednotes/skills/obsidian-markdown/SKILL.md +207 -0
  109. package/.opencode/mednotes/skills/obsidian-markdown/references/CALLOUTS.md +58 -0
  110. package/.opencode/mednotes/skills/obsidian-markdown/references/EMBEDS.md +63 -0
  111. package/.opencode/mednotes/skills/obsidian-markdown/references/PROPERTIES.md +61 -0
  112. package/.opencode/mednotes/skills/obsidian-ops/SKILL.md +136 -0
  113. package/.opencode/mednotes/skills/pdf-library/SKILL.md +45 -0
  114. package/.opencode/mednotes/skills/process-medical-chats/SKILL.md +246 -0
  115. package/.opencode/mednotes/skills/workflow-report/SKILL.md +100 -0
  116. package/.opencode/mednotes/src/mednotes/__init__.py +5 -0
  117. package/.opencode/mednotes/src/mednotes/domains/__init__.py +5 -0
  118. package/.opencode/mednotes/src/mednotes/domains/flashcards/README.md +26 -0
  119. package/.opencode/mednotes/src/mednotes/domains/flashcards/__init__.py +2 -0
  120. package/.opencode/mednotes/src/mednotes/domains/flashcards/build_demo_apkg.py +177 -0
  121. package/.opencode/mednotes/src/mednotes/domains/flashcards/contracts.py +385 -0
  122. package/.opencode/mednotes/src/mednotes/domains/flashcards/flashcards_machine.py +522 -0
  123. package/.opencode/mednotes/src/mednotes/domains/flashcards/fsm.py +817 -0
  124. package/.opencode/mednotes/src/mednotes/domains/flashcards/index.py +630 -0
  125. package/.opencode/mednotes/src/mednotes/domains/flashcards/install_models.py +445 -0
  126. package/.opencode/mednotes/src/mednotes/domains/flashcards/model.py +359 -0
  127. package/.opencode/mednotes/src/mednotes/domains/flashcards/obsidian_links.py +135 -0
  128. package/.opencode/mednotes/src/mednotes/domains/flashcards/obsidian_note_utils.py +546 -0
  129. package/.opencode/mednotes/src/mednotes/domains/flashcards/pipeline.py +580 -0
  130. package/.opencode/mednotes/src/mednotes/domains/flashcards/report.py +510 -0
  131. package/.opencode/mednotes/src/mednotes/domains/flashcards/sources.py +682 -0
  132. package/.opencode/mednotes/src/mednotes/domains/flashcards/sync_rules.py +184 -0
  133. package/.opencode/mednotes/src/mednotes/domains/history/__init__.py +1 -0
  134. package/.opencode/mednotes/src/mednotes/domains/history/history_fsm.py +852 -0
  135. package/.opencode/mednotes/src/mednotes/domains/history/history_machine.py +453 -0
  136. package/.opencode/mednotes/src/mednotes/domains/setup/__init__.py +7 -0
  137. package/.opencode/mednotes/src/mednotes/domains/setup/setup_fsm.py +808 -0
  138. package/.opencode/mednotes/src/mednotes/domains/setup/setup_machine.py +973 -0
  139. package/.opencode/mednotes/src/mednotes/domains/wiki/README.md +64 -0
  140. package/.opencode/mednotes/src/mednotes/domains/wiki/__init__.py +1 -0
  141. package/.opencode/mednotes/src/mednotes/domains/wiki/api.py +668 -0
  142. package/.opencode/mednotes/src/mednotes/domains/wiki/batch_state.py +102 -0
  143. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/__init__.py +1 -0
  144. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/atomicity/__init__.py +1 -0
  145. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/atomicity/atomicity.py +877 -0
  146. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/body_link/__init__.py +1 -0
  147. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/body_link/body_linker.py +1562 -0
  148. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/__init__.py +1 -0
  149. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/effect_adapters.py +949 -0
  150. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/fix_wiki_runtime_adapters.py +433 -0
  151. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/__init__.py +1 -0
  152. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/coverage.py +413 -0
  153. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/graph.py +396 -0
  154. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/graph_fixes.py +161 -0
  155. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/hygiene/__init__.py +1 -0
  156. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/hygiene/hygiene.py +483 -0
  157. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/__init__.py +2 -0
  158. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/anchors.py +185 -0
  159. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/__init__.py +0 -0
  160. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/cache.py +223 -0
  161. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/config.py +131 -0
  162. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/download.py +224 -0
  163. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/frontmatter.py +59 -0
  164. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/insert.py +227 -0
  165. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/local_import.py +54 -0
  166. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/__init__.py +42 -0
  167. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/web_profiles.py +99 -0
  168. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/web_search.py +203 -0
  169. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/wikimedia.py +102 -0
  170. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/__init__.py +1 -0
  171. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_db_adapter.mjs +434 -0
  172. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_node_runtime.py +274 -0
  173. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_query.py +227 -0
  174. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/__init__.py +1 -0
  175. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/artifacts.py +605 -0
  176. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/canonical_merge.py +277 -0
  177. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/markdown_zones.py +85 -0
  178. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/meaning_planner.py +307 -0
  179. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_iter.py +67 -0
  180. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_merge.py +278 -0
  181. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_plan.py +409 -0
  182. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_policy.py +22 -0
  183. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/__init__.py +79 -0
  184. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/fixes.py +264 -0
  185. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/frontmatter.py +435 -0
  186. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/models.py +208 -0
  187. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/prompts.py +37 -0
  188. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/tables.py +236 -0
  189. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/validate.py +404 -0
  190. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/provenance.py +478 -0
  191. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/raw_chats.py +273 -0
  192. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/sources_backfill.py +235 -0
  193. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/__init__.py +10 -0
  194. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/anchors.py +16 -0
  195. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/captions.py +47 -0
  196. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/cli.py +179 -0
  197. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/cloud.py +52 -0
  198. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/config.py +196 -0
  199. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/context_packets.py +76 -0
  200. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/db.py +81 -0
  201. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/doctor.py +102 -0
  202. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/figure_ids.py +42 -0
  203. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/ingest.py +326 -0
  204. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/insert.py +316 -0
  205. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/mentions.py +57 -0
  206. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/ocr.py +71 -0
  207. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/paths.py +35 -0
  208. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/pdf_engine.py +77 -0
  209. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/schema.py +155 -0
  210. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/search.py +188 -0
  211. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/__init__.py +1 -0
  212. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/app.py +89 -0
  213. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/image_backend.py +29 -0
  214. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/state.py +65 -0
  215. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/__init__.py +1 -0
  216. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish.py +1139 -0
  217. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish_receipts.py +365 -0
  218. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish_recovery.py +240 -0
  219. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/__init__.py +1 -0
  220. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_behavior_corpus.py +2069 -0
  221. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_report_validation.py +4448 -0
  222. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_run_audit.py +852 -0
  223. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/architect_prompt_eval.py +341 -0
  224. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/body_linker_eval.py +240 -0
  225. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/curator_output_validation.py +175 -0
  226. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/curator_prompt_eval.py +865 -0
  227. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/triager_prompt_eval.py +1295 -0
  228. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/__init__.py +1 -0
  229. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/related_notes.py +1920 -0
  230. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/related_notes_headless.py +1186 -0
  231. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/__init__.py +1 -0
  232. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/plan_attestation.py +148 -0
  233. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_receipts.py +360 -0
  234. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_runtime.py +52 -0
  235. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_task_runner.py +2470 -0
  236. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/style/__init__.py +1 -0
  237. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/style/style.py +1952 -0
  238. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/subagents/__init__.py +1 -0
  239. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/subagents/agents.py +1767 -0
  240. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/__init__.py +1 -0
  241. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/alias_projection.py +331 -0
  242. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/link_terms.py +151 -0
  243. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/llm_disambiguation.py +182 -0
  244. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/__init__.py +116 -0
  245. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/audit.py +201 -0
  246. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/migration.py +314 -0
  247. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/normalize.py +72 -0
  248. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/policy.py +135 -0
  249. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/resolve.py +413 -0
  250. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/schema.py +157 -0
  251. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/status.py +137 -0
  252. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_bootstrap.py +509 -0
  253. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_curator_batch.py +1115 -0
  254. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_ingestion.py +632 -0
  255. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_map.py +930 -0
  256. package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_recovery.py +1388 -0
  257. package/.opencode/mednotes/src/mednotes/domains/wiki/cli.py +6665 -0
  258. package/.opencode/mednotes/src/mednotes/domains/wiki/common.py +69 -0
  259. package/.opencode/mednotes/src/mednotes/domains/wiki/config.py +210 -0
  260. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/__init__.py +74 -0
  261. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agent_report.py +242 -0
  262. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agent_run_audit.py +196 -0
  263. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agents.py +601 -0
  264. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/curator.py +256 -0
  265. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/effect_payloads.py +519 -0
  266. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/happy_path.py +190 -0
  267. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/link_git.py +110 -0
  268. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/link_runtime_artifact.py +52 -0
  269. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/note_plan.py +75 -0
  270. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/paths.py +114 -0
  271. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/public_report.py +53 -0
  272. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/publish.py +111 -0
  273. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/raw_coverage.py +217 -0
  274. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes.py +136 -0
  275. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes_headless.py +153 -0
  276. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes_runtime.py +395 -0
  277. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/schema_registry.py +637 -0
  278. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/specialist.py +432 -0
  279. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/status.py +62 -0
  280. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/style_rewrite.py +568 -0
  281. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/vocabulary_ingestion.py +223 -0
  282. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_blockers.py +510 -0
  283. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_guardrails.py +637 -0
  284. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_outcomes.py +121 -0
  285. package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_receipts.py +100 -0
  286. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/__init__.py +1 -0
  287. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/__init__.py +1 -0
  288. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/__main__.py +4 -0
  289. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/cli.py +275 -0
  290. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/__init__.py +2 -0
  291. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/candidates.py +193 -0
  292. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/cli.py +189 -0
  293. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/gemini.py +220 -0
  294. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/inputs.py +120 -0
  295. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/models.py +34 -0
  296. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/parsing.py +48 -0
  297. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/prompts.py +216 -0
  298. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/quality.py +54 -0
  299. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/reporting.py +24 -0
  300. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/runner.py +433 -0
  301. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/utils.py +39 -0
  302. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/vault_guard_bridge.py +17 -0
  303. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/__init__.py +1 -0
  304. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_context_packets.py +454 -0
  305. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_decision_projection.py +133 -0
  306. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_effects.py +1260 -0
  307. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_fsm.py +2768 -0
  308. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_machine.py +1588 -0
  309. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_plan.py +306 -0
  310. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_primary_objective.py +316 -0
  311. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_problem.py +153 -0
  312. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_receipt_evidence.py +306 -0
  313. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_states.py +290 -0
  314. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_user_report.py +342 -0
  315. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/health.py +6332 -0
  316. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/__init__.py +1 -0
  317. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_fsm.py +1119 -0
  318. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_git.py +638 -0
  319. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_machine.py +1106 -0
  320. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_retry_governance.py +374 -0
  321. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_runtime_result.py +485 -0
  322. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_triggers.py +183 -0
  323. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/linking.py +2758 -0
  324. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/reference_repair.py +718 -0
  325. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/related_notes_fsm.py +1855 -0
  326. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link_related/__init__.py +1 -0
  327. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link_related/link_related_machine.py +834 -0
  328. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/__init__.py +1 -0
  329. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_fsm.py +1592 -0
  330. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_machine.py +3097 -0
  331. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_primary_objective.py +28 -0
  332. package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_runtime_result.py +185 -0
  333. package/.opencode/mednotes/src/mednotes/domains/wiki/performance.py +97 -0
  334. package/.opencode/mednotes/src/mednotes/kernel/__init__.py +6 -0
  335. package/.opencode/mednotes/src/mednotes/kernel/agent_directive.py +336 -0
  336. package/.opencode/mednotes/src/mednotes/kernel/base.py +51 -0
  337. package/.opencode/mednotes/src/mednotes/kernel/blockers.py +39 -0
  338. package/.opencode/mednotes/src/mednotes/kernel/effect_executor.py +55 -0
  339. package/.opencode/mednotes/src/mednotes/kernel/effect_intent.py +69 -0
  340. package/.opencode/mednotes/src/mednotes/kernel/effects.py +160 -0
  341. package/.opencode/mednotes/src/mednotes/kernel/errors.py +38 -0
  342. package/.opencode/mednotes/src/mednotes/kernel/fsm_event.py +35 -0
  343. package/.opencode/mednotes/src/mednotes/kernel/fsm_model.py +55 -0
  344. package/.opencode/mednotes/src/mednotes/kernel/fsm_transition_result.py +75 -0
  345. package/.opencode/mednotes/src/mednotes/kernel/guardrails.py +188 -0
  346. package/.opencode/mednotes/src/mednotes/kernel/progress.py +319 -0
  347. package/.opencode/mednotes/src/mednotes/kernel/public_report.py +346 -0
  348. package/.opencode/mednotes/src/mednotes/kernel/state_machine.py +164 -0
  349. package/.opencode/mednotes/src/mednotes/kernel/workflow.py +619 -0
  350. package/.opencode/mednotes/src/mednotes/platform/__init__.py +5 -0
  351. package/.opencode/mednotes/src/mednotes/platform/backup_policy.py +382 -0
  352. package/.opencode/mednotes/src/mednotes/platform/feedback/__init__.py +62 -0
  353. package/.opencode/mednotes/src/mednotes/platform/feedback/cli.py +275 -0
  354. package/.opencode/mednotes/src/mednotes/platform/feedback/contracts.py +83 -0
  355. package/.opencode/mednotes/src/mednotes/platform/feedback/core.py +4168 -0
  356. package/.opencode/mednotes/src/mednotes/platform/feedback/integrity.py +989 -0
  357. package/.opencode/mednotes/src/mednotes/platform/feedback/operational_contract.py +2293 -0
  358. package/.opencode/mednotes/src/mednotes/platform/feedback/telemetry.py +875 -0
  359. package/.opencode/mednotes/src/mednotes/platform/feedback/telemetry_config.py +65 -0
  360. package/.opencode/mednotes/src/mednotes/platform/opencode_runtime_config.py +182 -0
  361. package/.opencode/mednotes/src/mednotes/platform/paths/__init__.py +1560 -0
  362. package/.opencode/mednotes/src/mednotes/platform/secrets.py +89 -0
  363. package/.opencode/mednotes/src/mednotes/platform/user_config.py +103 -0
  364. package/.opencode/mednotes/src/mednotes/platform/vault_guard.py +214 -0
  365. package/.opencode/mednotes/uv.lock +932 -0
  366. package/.opencode/mednotes.generated.json +395 -0
  367. package/.opencode/opencode.json +31 -0
  368. package/.opencode/plugins/mednotes-fsm.mjs +7 -0
  369. package/.opencode/plugins/mednotes_hook/adapters/antigravity.mjs +169 -0
  370. package/.opencode/plugins/mednotes_hook/adapters/harness_payload.mjs +103 -0
  371. package/.opencode/plugins/mednotes_hook/adapters/opencode_plugin.mjs +341 -0
  372. package/.opencode/plugins/mednotes_hook/adapters/opencode_user_config_sync.mjs +177 -0
  373. package/.opencode/plugins/mednotes_hook/anki_preflight.mjs +214 -0
  374. package/.opencode/plugins/mednotes_hook/cli.mjs +143 -0
  375. package/.opencode/plugins/mednotes_hook/diagnostics.mjs +11 -0
  376. package/.opencode/plugins/mednotes_hook/domain/agent_directive_core.mjs +160 -0
  377. package/.opencode/plugins/mednotes_hook/fsm_directive.mjs +1470 -0
  378. package/.opencode/plugins/mednotes_hook/hook_errors.mjs +120 -0
  379. package/.opencode/plugins/mednotes_hook/retention.mjs +114 -0
  380. package/.opencode/plugins/mednotes_hook/runtime.mjs +174 -0
  381. package/.opencode/plugins/mednotes_hook/telemetry_capture.mjs +511 -0
  382. package/.opencode/plugins/mednotes_hook/vault_guard.mjs +624 -0
  383. package/AGENTS.md +57 -0
  384. package/README.md +194 -0
  385. package/adapters/antigravity/agents.json +80 -0
  386. package/adapters/antigravity/templates/med-chat-triager.md +214 -0
  387. package/adapters/antigravity/templates/med-flashcard-maker.md +72 -0
  388. package/adapters/antigravity/templates/med-knowledge-architect.md +241 -0
  389. package/adapters/antigravity/templates/med-link-graph-curator.md +187 -0
  390. package/adapters/antigravity/templates/med-publish-guard.md +71 -0
  391. package/adapters/gemini-cli/gemini-extension.json +14 -0
  392. package/adapters/gemini-cli/package.json +15 -0
  393. package/adapters/gemini-cli/pyproject.toml +48 -0
  394. package/bin/mednotes-opencode.mjs +155 -0
  395. package/contracts/agents.json +116 -0
  396. package/core/agents/med-chat-triager.md +197 -0
  397. package/core/agents/med-flashcard-maker.md +56 -0
  398. package/core/agents/med-knowledge-architect.md +224 -0
  399. package/core/agents/med-link-graph-curator.md +171 -0
  400. package/core/agents/med-publish-guard.md +55 -0
  401. package/core/commands/flashcards.toml +22 -0
  402. package/core/commands/mednotes/create.toml +22 -0
  403. package/core/commands/mednotes/enrich.toml +24 -0
  404. package/core/commands/mednotes/fix-wiki.toml +24 -0
  405. package/core/commands/mednotes/history.toml +19 -0
  406. package/core/commands/mednotes/link-body.toml +22 -0
  407. package/core/commands/mednotes/link-related.toml +24 -0
  408. package/core/commands/mednotes/link.toml +24 -0
  409. package/core/commands/mednotes/pdf-library.toml +24 -0
  410. package/core/commands/mednotes/process-chats.toml +20 -0
  411. package/core/commands/mednotes/setup.toml +18 -0
  412. package/core/commands/mednotes/status.toml +24 -0
  413. package/core/commands/mednotes/telemetry.toml +24 -0
  414. package/core/commands/report.toml +23 -0
  415. package/core/skills/THIRD_PARTY_NOTICES.md +45 -0
  416. package/core/skills/create-medical-flashcards/SKILL.md +113 -0
  417. package/core/skills/create-medical-note/SKILL.md +90 -0
  418. package/core/skills/enrich-medical-note/SKILL.md +120 -0
  419. package/core/skills/fix-medical-wiki/SKILL.md +559 -0
  420. package/core/skills/link-medical-wiki/SKILL.md +224 -0
  421. package/core/skills/obsidian-cli/SKILL.md +118 -0
  422. package/core/skills/obsidian-markdown/SKILL.md +207 -0
  423. package/core/skills/obsidian-markdown/references/CALLOUTS.md +58 -0
  424. package/core/skills/obsidian-markdown/references/EMBEDS.md +63 -0
  425. package/core/skills/obsidian-markdown/references/PROPERTIES.md +61 -0
  426. package/core/skills/obsidian-ops/SKILL.md +136 -0
  427. package/core/skills/pdf-library/SKILL.md +45 -0
  428. package/core/skills/process-medical-chats/SKILL.md +246 -0
  429. package/core/skills/workflow-report/SKILL.md +100 -0
  430. package/package.json +45 -0
@@ -0,0 +1,4448 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ import shlex
6
+ import unicodedata
7
+ from collections.abc import Iterable
8
+ from pathlib import Path
9
+ from typing import cast
10
+ from urllib.parse import unquote
11
+
12
+ from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr, ValidationError
13
+
14
+ from mednotes.domains.wiki.capabilities.quality.agent_run_audit import audit_agent_transcript
15
+ from mednotes.domains.wiki.common import SKILLS_RELPATH
16
+ from mednotes.domains.wiki.contracts.agent_report import (
17
+ AgentRunReportFinding,
18
+ AgentRunReportFindingCode,
19
+ AgentRunReportSeverity,
20
+ AgentRunReportValidation,
21
+ FixWikiPrimaryObjectiveSummary,
22
+ ProcessChatsPrimaryObjectiveSummary,
23
+ StyleRewriteAtomicApplyResult,
24
+ )
25
+ from mednotes.domains.wiki.contracts.agent_run_audit import (
26
+ AuditWorkflow,
27
+ WorkflowDeviationFinding,
28
+ WorkflowTranscriptAuditResult,
29
+ )
30
+ from mednotes.domains.wiki.contracts.happy_path import happy_path_metrics_from_findings
31
+ from mednotes.domains.wiki.contracts.public_report import WorkflowPublicObjectiveAnswer, WorkflowPublicReportViewModel
32
+ from mednotes.domains.wiki.flows.fix_wiki.fix_wiki_primary_objective import fix_wiki_primary_objective_summary
33
+ from mednotes.domains.wiki.flows.process_chats.process_chats_primary_objective import (
34
+ process_chats_primary_objective_summary,
35
+ )
36
+ from mednotes.kernel.agent_directive import AgentDirective, AgentEffect
37
+ from mednotes.kernel.base import JsonObject, JsonObjectAdapter
38
+ from mednotes.kernel.effects import WorkflowEffectKind
39
+ from mednotes.kernel.public_report import WorkflowPrimaryObjectiveSummary, WorkflowPublicReport
40
+ from mednotes.platform.feedback.operational_contract import (
41
+ PUBLIC_TOOL_TEXT_CONTRACT_VIOLATION,
42
+ TOOL_CALL_ERROR,
43
+ validate_agent_tool_calls,
44
+ )
45
+
46
+ NON_SUCCESS_STATUSES = {
47
+ "blocked",
48
+ "failed",
49
+ "waiting_agent",
50
+ "waiting_external",
51
+ "waiting_human",
52
+ "completed_with_link_blockers",
53
+ }
54
+ FSM_FIRST_SCHEMAS = {
55
+ "medical-notes-workbench.fix-wiki-fsm-result.v1",
56
+ "medical-notes-workbench.flashcards-fsm-result.v1",
57
+ "medical-notes-workbench.link-fsm-result.v1",
58
+ "medical-notes-workbench.link-related-fsm-result.v1",
59
+ "medical-notes-workbench.process-chats-fsm-result.v1",
60
+ "medical-notes-workbench.setup-fsm-result.v1",
61
+ "medical-notes-workbench.history-fsm-result.v1",
62
+ }
63
+ PrimaryObjectiveSummary = (
64
+ FixWikiPrimaryObjectiveSummary | ProcessChatsPrimaryObjectiveSummary | WorkflowPrimaryObjectiveSummary
65
+ )
66
+ STYLE_REWRITE_APPLY_RESULT_SCHEMAS = {
67
+ "medical-notes-workbench.style-rewrite-atomic-apply-agent-stdout.v1",
68
+ "medical-notes-workbench.style-rewrite-atomic-apply-result.v1",
69
+ }
70
+ KNOWN_WORKFLOW_STATUSES = NON_SUCCESS_STATUSES | {
71
+ "no_pending",
72
+ "completed",
73
+ "completed_with_warnings",
74
+ "ready_to_publish",
75
+ "published",
76
+ "preview_ready",
77
+ "ready",
78
+ "running",
79
+ }
80
+ TRANSCRIPT_AUDIT_FINDING_CODE_MAP: dict[str, AgentRunReportFindingCode] = {
81
+ "agent.transcript_unreadable": AgentRunReportFindingCode.TRANSCRIPT_UNREADABLE,
82
+ "agent.subagent_raw_content_contract_violation": (
83
+ AgentRunReportFindingCode.SUBAGENT_RAW_CONTENT_CONTRACT_VIOLATION
84
+ ),
85
+ "agent.parent_canonical_artifact_write_before_subagent": (
86
+ AgentRunReportFindingCode.PARENT_CANONICAL_ARTIFACT_WRITE_BEFORE_SUBAGENT
87
+ ),
88
+ "agent.parent_canonical_artifact_write_after_subagent": (
89
+ AgentRunReportFindingCode.PARENT_CANONICAL_ARTIFACT_WRITE_AFTER_SUBAGENT
90
+ ),
91
+ "agent.parallel_human_decision_backlog": AgentRunReportFindingCode.PARALLEL_HUMAN_DECISION_BACKLOG,
92
+ "agent.agy_materialized_skill_misclassified_as_stale": (
93
+ AgentRunReportFindingCode.AGY_MATERIALIZED_SKILL_MISCLASSIFIED_AS_STALE
94
+ ),
95
+ "agent.recoverable_tool_error_observed": AgentRunReportFindingCode.RECOVERABLE_TOOL_ERROR_OBSERVED,
96
+ }
97
+ GLOBAL_SUCCESS_CONTEXT_MARKERS = {
98
+ "workflow",
99
+ "fluxo",
100
+ "wiki",
101
+ "publicacao",
102
+ "publicou",
103
+ "publicad",
104
+ "conclu",
105
+ "pronto",
106
+ "completo",
107
+ "final",
108
+ }
109
+ SCOPED_SUCCESS_CONTEXT_MARKERS = {
110
+ "reparos deterministic",
111
+ "reparo deterministic",
112
+ "reparos iniciais",
113
+ "reparo inicial",
114
+ "reparos automatic",
115
+ "reparo automatic",
116
+ "etapa deterministic",
117
+ "related notes",
118
+ "notas relacionadas",
119
+ "grafo",
120
+ "body links",
121
+ "links corporais",
122
+ }
123
+ SUCCESS_CLAIM_RE = re.compile(
124
+ r"\b("
125
+ r"sucesso|conclu[ií]do|concluiu|completed|success|sem\s+desvios|sem\s+problemas|pronto"
126
+ r")\b",
127
+ re.IGNORECASE,
128
+ )
129
+ UNSUPPORTED_BLOCKER_CLAIM_RE = re.compile(
130
+ r"\b("
131
+ r"bloquead\w*|blocked|pausad\w*|interrompid\w*|bloqueio\s+preventivo|"
132
+ r"duplicidade|duplicate|collision|colis[aã]o"
133
+ r")\b",
134
+ re.IGNORECASE,
135
+ )
136
+ NO_TOOL_DEVIATION_CLAIM_RE = re.compile(
137
+ r"(desvios?\s+do\s+happy\s+path\s*:\s*nenhum|nenhum\s+desvio|sem\s+desvios?|"
138
+ r"n[aã]o\s+houve\s+desvios?|houve\s+desvios?[^?]{0,100}\?\s*n[aã]o\s+houve|"
139
+ r"n[aã]o\s+foram\s+executados\s+probes?|no\s+deviations?|no\s+probes?)",
140
+ re.IGNORECASE,
141
+ )
142
+ SPECIALIST_REWRITE_COUNT_CLAIM_RE = re.compile(
143
+ r"\b(?P<count>\d+)\s+"
144
+ r"(?:nota(?:\(s\))?s?|arquivo(?:\(s\))?s?)"
145
+ r"[^.!?\n]{0,80}\b(?:reescrit|rewrite)",
146
+ re.IGNORECASE,
147
+ )
148
+ RUNTIME_CONTINUATION_UNAVAILABLE_RE = re.compile(
149
+ r"(runtime\s+headless|headless|cli)[^.!?\n]{0,160}"
150
+ r"(n[aã]o\s+possui|sem|lacks?|unavailable|indispon[ií]vel)[^.!?\n]{0,160}"
151
+ r"(invoke_agent|ferramenta|tool|subagente|subagent|med-knowledge-architect)",
152
+ re.IGNORECASE,
153
+ )
154
+ STATUS_VALUE_RE = re.compile(r"\b[a-z][a-z0-9_]*\b")
155
+ NEGATED_SUCCESS_PREFIX_RE = re.compile(r"\b(n[aã]o|not|never|sem)\b[\w\s]{0,24}$", re.IGNORECASE)
156
+ NEGATED_SUCCESS_SENTENCE_RE = re.compile(
157
+ r"\b(n[aã]o|not|never|sem)\b[^.!?\n]{0,160}"
158
+ r"\b(sucesso|success|conclu[ií]do|concluiu|completed|pronto|completo)\b",
159
+ re.IGNORECASE,
160
+ )
161
+ SCOPED_SUCCESS_WITH_GLOBAL_BLOCKER_RE = re.compile(
162
+ r"\b(mas|por[eé]m|contudo)\b[^.!?\n]{0,180}"
163
+ r"\b(workflow|fluxo|wiki)\b[^.!?\n]{0,180}"
164
+ r"\b(terminou|ficou|permanece|aguarda|bloque\w*|interromp\w*|waiting_agent|waiting_external|pendente)\b",
165
+ re.IGNORECASE,
166
+ )
167
+ BACKTICK_ABSOLUTE_PATH_RE = re.compile(r"`(?P<path>/[^`]+)`")
168
+ FILE_URI_RE = re.compile(r"file://(?P<path>/[^\s\]`>\"']+)")
169
+ PLAIN_ABSOLUTE_PATH_RE = re.compile(r"(?<![\w:/])(?P<path>/(?:Users|tmp|private/tmp)/[^\s)\]`>\"']+)")
170
+ TOOL_CONTENT_FILE_PATH_RE = re.compile(r"File Path:\s*`file://(?P<path>[^`]+)`")
171
+ PUBLIC_OUTPUT_FORBIDDEN_TERMS = (
172
+ "uv run",
173
+ "--apply",
174
+ "wiki/cli.py",
175
+ "--json",
176
+ "--dry-run",
177
+ "blocked_reason",
178
+ "receipt",
179
+ "recibo",
180
+ "schema",
181
+ "hash",
182
+ "fix-wiki --apply",
183
+ "finalize-agy-specialist-task",
184
+ "run-linker",
185
+ "resource_guard_active",
186
+ "compact-report",
187
+ "full-report",
188
+ "workflow_exit_code",
189
+ "código de saída",
190
+ "codigo de saida",
191
+ "código de retorno",
192
+ "codigo de retorno",
193
+ "exit code",
194
+ "returncode",
195
+ "background task",
196
+ "agy background fallback",
197
+ "harness externo",
198
+ "versionamento",
199
+ "workflow",
200
+ "linker",
201
+ "atestação",
202
+ "atestacao",
203
+ "homologado",
204
+ "logs",
205
+ "progress_view_model",
206
+ "process_chats_terminal_state",
207
+ "specialist_model_quota_exhausted",
208
+ "specialist_model_capacity_unavailable",
209
+ "guard_lease_mismatch",
210
+ "run_id",
211
+ "i am waiting",
212
+ "you will be notified",
213
+ "waiting for completion",
214
+ "no_pending",
215
+ )
216
+ TRANSCRIPT_CHILD_CONTAINER_KEYS = (
217
+ "$set",
218
+ "content",
219
+ "events",
220
+ "items",
221
+ "messages",
222
+ "records",
223
+ "response",
224
+ "responses",
225
+ "result",
226
+ "toolCalls",
227
+ "tool_calls",
228
+ "transcript",
229
+ )
230
+ CPU_SAMPLE_SCHEMA = "medical-notes-workbench.controlled-experiment-cpu-sample.v1"
231
+ HIGH_CPU_PERCENT_THRESHOLD = 85.0
232
+ HIGH_CPU_MIN_SAMPLE_COUNT = 2
233
+ HIGH_CPU_MIN_SPAN_SECONDS = 10.0
234
+ AGY_SELECTED_MODEL_RE = re.compile(r'Propagating selected model override to backend:\s+label="(?P<label>[^"]+)"')
235
+ FLASH_MODEL_RE = re.compile(r"\bflash\b|gemini[-\s\d.]*flash", re.IGNORECASE)
236
+ PROCESS_CHATS_WIKI_DELETION_RE = re.compile(
237
+ r"(?m)^\s*(?:D|deleted:)\s+(?P<path>.*(?:Wiki_Medicina|wiki)[^\n]*\.md)\s*$",
238
+ re.IGNORECASE,
239
+ )
240
+ ROOT_CAUSE_PUBLIC_LABELS: dict[str, tuple[str, ...]] = {
241
+ "environment_blocker.windows_path_or_venv": (
242
+ "ambiente Python",
243
+ "Acesso negado",
244
+ "venv",
245
+ ),
246
+ "specialist_model_capacity_unavailable": (
247
+ "cota",
248
+ "quota",
249
+ "capacidade",
250
+ "modelo especialista",
251
+ ),
252
+ "specialist_model_quota_exhausted": (
253
+ "cota",
254
+ "quota",
255
+ "capacidade",
256
+ "modelo especialista",
257
+ ),
258
+ "vocabulary_curation_required": (
259
+ "curadoria de vocabulário",
260
+ "vocabulary curation",
261
+ "vocabulário",
262
+ ),
263
+ }
264
+ LEGITIMATE_SPECIALIST_STOP_REASONS = {
265
+ "rewrite_output_validation_errors",
266
+ "specialist_model_capacity_unavailable",
267
+ "specialist_model_quota_exhausted",
268
+ "style_rewrite_agent_contract_violation",
269
+ "style_rewrite_output_missing",
270
+ "style_rewrite_still_requires_rewrite",
271
+ "target_hash_changed",
272
+ }
273
+ WAITING_AGENT_CONTINUATION_MARKERS = (
274
+ "med-knowledge-architect",
275
+ "finalize-agy-specialist-task",
276
+ "finalize-opencode-specialist-task",
277
+ "invoke_agent",
278
+ "define_subagent",
279
+ "invoke_subagent",
280
+ "finalize-style-rewrite-output",
281
+ "collect-style-rewrite-outputs",
282
+ "apply-specialist-style-rewrite",
283
+ "apply-style-rewrite",
284
+ )
285
+ NON_ERROR_DECISION_REASON_CODES = {
286
+ "style_rewrite_ready",
287
+ }
288
+ NON_SUCCESS_HUMAN_STATUS_MARKERS: dict[str, tuple[str, ...]] = {
289
+ "blocked": (
290
+ "bloquead",
291
+ "nao concluiu",
292
+ "nao foi conclu",
293
+ "nao fixou",
294
+ "pendente",
295
+ ),
296
+ "failed": (
297
+ "falhou",
298
+ "erro",
299
+ "nao concluiu",
300
+ "nao foi conclu",
301
+ ),
302
+ "waiting_agent": (
303
+ "aguard",
304
+ "bloquead",
305
+ "cota",
306
+ "quota",
307
+ "modelo especialista",
308
+ "nao fixou",
309
+ "nao foi fixada por completo",
310
+ "parcial",
311
+ "pendente",
312
+ "reescrita especializada",
313
+ ),
314
+ "waiting_external": (
315
+ "aguard",
316
+ "bloquead",
317
+ "cota",
318
+ "quota",
319
+ "capacidade",
320
+ "modelo especialista",
321
+ "nao fixou",
322
+ "pendente",
323
+ "sem capacidade",
324
+ ),
325
+ "waiting_human": (
326
+ "decisao humana",
327
+ "escolha humana",
328
+ "confirmacao",
329
+ "confirmar",
330
+ "aguard",
331
+ "pendente",
332
+ ),
333
+ "completed_with_link_blockers": (
334
+ "link",
335
+ "grafo",
336
+ "bloquead",
337
+ "pendente",
338
+ ),
339
+ }
340
+
341
+
342
+ def validate_agent_run_report(
343
+ *,
344
+ workflow_payload: JsonObject,
345
+ transcript: object | None = None,
346
+ final_report_text: str | None = None,
347
+ runtime_log_text: str | None = None,
348
+ workflow_payload_path: Path | None = None,
349
+ transcript_path: Path | None = None,
350
+ final_report_path: Path | None = None,
351
+ runtime_log_paths: list[Path] | None = None,
352
+ ) -> AgentRunReportValidation:
353
+ """Validate the agent's final report against the workflow's typed truth."""
354
+
355
+ raw_payload = _json_object(workflow_payload)
356
+ agent_directive_findings = _agent_directive_contract_findings(raw_payload)
357
+ payload = _payload_with_safe_diagnostic_context(raw_payload)
358
+ truth = _workflow_truth(payload)
359
+ primary_objective = _workflow_primary_objective_summary(payload)
360
+ final_text = _final_report_text(final_report_text=final_report_text, transcript=transcript)
361
+ findings: list[AgentRunReportFinding] = list(agent_directive_findings)
362
+ findings.extend(_legacy_specialist_route_findings(payload))
363
+ final_report_present = bool(final_text)
364
+
365
+ findings.extend(_public_output_findings(payload))
366
+ findings.extend(_public_report_pending_effect_success_findings(payload))
367
+ findings.extend(_stale_next_action_findings(payload))
368
+ if primary_objective is None:
369
+ findings.extend(_missing_fsm_primary_objective_findings(payload))
370
+ if final_text:
371
+ findings.extend(_final_report_permission_findings(payload, final_text))
372
+ incomplete_findings = _final_report_incomplete_findings(final_text, truth)
373
+ findings.extend(incomplete_findings)
374
+ if incomplete_findings:
375
+ final_report_present = False
376
+ findings.extend(_final_report_internal_term_findings(final_text))
377
+ findings.extend(_status_mismatch_findings(final_text, truth, primary_objective))
378
+ findings.extend(_unsupported_blocker_claim_findings(final_text, truth))
379
+ findings.extend(_success_claim_findings(final_text, truth))
380
+ findings.extend(_omitted_status_findings(final_text, truth))
381
+ findings.extend(_error_context_root_cause_findings(payload, final_text))
382
+ findings.extend(_final_report_local_path_leak_findings(final_text))
383
+ findings.extend(_invalid_reported_artifact_path_findings(final_text))
384
+ findings.extend(_workflow_payload_omission_findings(payload, final_text, transcript))
385
+ if primary_objective is not None:
386
+ findings.extend(_primary_objective_payload_findings(payload, primary_objective))
387
+ findings.extend(_primary_objective_success_claim_findings(final_text, primary_objective))
388
+ findings.extend(_primary_objective_omission_findings(final_text, primary_objective))
389
+ elif primary_objective is not None:
390
+ findings.extend(_primary_objective_payload_findings(payload, primary_objective))
391
+ findings.extend(_workflow_payload_consistency_findings(payload))
392
+ findings.extend(_runtime_log_findings(payload, runtime_log_text or "", final_text, transcript))
393
+ if transcript is not None:
394
+ findings.extend(_tool_payload_contract_findings(transcript))
395
+ findings.extend(_omitted_tool_error_findings(transcript, final_text))
396
+ findings.extend(_omitted_tool_deviation_findings(transcript, final_text))
397
+ findings.extend(_blocked_workflow_tool_result_findings(transcript, final_text))
398
+ findings.extend(_update_topic_success_claim_findings(transcript, truth))
399
+ findings.extend(_transcript_specialist_model_policy_findings(payload, transcript))
400
+ findings.extend(_specialist_completed_apply_step_findings(transcript))
401
+ findings.extend(_opencode_specialist_receipt_step_findings(payload, transcript))
402
+ findings.extend(_style_rewrite_batch_progress_checkpoint_findings(payload, transcript))
403
+ findings.extend(_specialist_rewrite_count_findings(transcript, final_text))
404
+ findings.extend(
405
+ _waiting_agent_continuation_findings(
406
+ payload,
407
+ transcript,
408
+ final_text,
409
+ runtime_log_text or "",
410
+ )
411
+ )
412
+ findings.extend(
413
+ _ready_continuation_stopped_findings(
414
+ payload,
415
+ transcript,
416
+ final_text,
417
+ runtime_log_text or "",
418
+ )
419
+ )
420
+ findings.extend(_waiting_external_continuation_attempt_findings(payload, transcript))
421
+ transcript_audit = _audit_agent_transcript_from_paths(
422
+ truth=truth,
423
+ workflow_payload_path=workflow_payload_path,
424
+ transcript_path=transcript_path,
425
+ final_report_path=final_report_path,
426
+ runtime_log_paths=runtime_log_paths or [],
427
+ )
428
+ if transcript_audit is not None:
429
+ findings.extend(_transcript_audit_findings(transcript_audit))
430
+
431
+ status = "blocked" if findings else "completed"
432
+ happy_path_metrics = happy_path_metrics_from_findings(
433
+ workflow=truth.workflow or _optional_text(payload, "workflow"),
434
+ run_id=truth.run_id or str(payload.get("run_id") or "unknown"),
435
+ findings=findings,
436
+ primary_objective_completed=_primary_objective_completed(primary_objective),
437
+ legitimate_stop_reason=_legitimate_stop_reason(payload, primary_objective),
438
+ )
439
+ public_report_view_model = _public_report_view_model(payload, primary_objective)
440
+ return AgentRunReportValidation(
441
+ status=status,
442
+ workflow=truth.workflow,
443
+ run_id=truth.run_id,
444
+ workflow_status=truth.workflow_status,
445
+ workflow_phase=truth.workflow_phase,
446
+ receipt_status=truth.receipt_status,
447
+ blocked_reason="agent_final_report_contract_violation" if findings else "",
448
+ next_action=(
449
+ "Corrigir o relatório final do agente para refletir o payload oficial, reportar erros de tool "
450
+ "e remover caminhos de artefatos inexistentes antes de concluir a rodada."
451
+ if findings
452
+ else ""
453
+ ),
454
+ final_report_present=final_report_present,
455
+ transcript_present=transcript is not None or transcript_path is not None,
456
+ workflow_payload_path=str(workflow_payload_path) if workflow_payload_path is not None else "",
457
+ transcript_path=str(transcript_path) if transcript_path is not None else "",
458
+ final_report_path=str(final_report_path) if final_report_path is not None else "",
459
+ primary_objective=primary_objective,
460
+ happy_path_metrics=happy_path_metrics,
461
+ public_report_view_model=public_report_view_model,
462
+ transcript_audit=transcript_audit,
463
+ finding_count=len(findings),
464
+ findings=findings,
465
+ )
466
+
467
+
468
+ def _audit_agent_transcript_from_paths(
469
+ *,
470
+ truth: _WorkflowTruth,
471
+ workflow_payload_path: Path | None,
472
+ transcript_path: Path | None,
473
+ final_report_path: Path | None,
474
+ runtime_log_paths: list[Path],
475
+ ) -> WorkflowTranscriptAuditResult | None:
476
+ if transcript_path is None:
477
+ return None
478
+ return audit_agent_transcript(
479
+ transcript_path=transcript_path,
480
+ workflow=_audit_workflow(truth.workflow),
481
+ workflow_payload_path=workflow_payload_path,
482
+ final_report_path=final_report_path,
483
+ runtime_log_paths=runtime_log_paths,
484
+ )
485
+
486
+
487
+ def _audit_workflow(workflow: str) -> AuditWorkflow:
488
+ normalized = workflow.strip().lower()
489
+ if normalized.startswith("/"):
490
+ normalized = normalized[1:]
491
+ if normalized.startswith("mednotes:"):
492
+ normalized = normalized.split(":", 1)[1]
493
+ normalized = normalized.replace("_", "-")
494
+ if normalized in {"process-chats", "fix-wiki", "link"}:
495
+ return cast(AuditWorkflow, normalized)
496
+ return "unknown"
497
+
498
+
499
+ def _transcript_audit_findings(
500
+ transcript_audit: WorkflowTranscriptAuditResult,
501
+ ) -> list[AgentRunReportFinding]:
502
+ return [_transcript_audit_finding(audit_finding) for audit_finding in transcript_audit.findings]
503
+
504
+
505
+ def _transcript_audit_finding(audit_finding: WorkflowDeviationFinding) -> AgentRunReportFinding:
506
+ next_action = audit_finding.promotion_gate or str(audit_finding.recommended_action)
507
+ return AgentRunReportFinding(
508
+ code=_agent_report_code_for_audit(audit_finding),
509
+ severity=_agent_report_severity_for_audit(audit_finding),
510
+ source="transcript_audit",
511
+ source_field="transcript_audit.findings",
512
+ expected=audit_finding.expected_contract,
513
+ actual=audit_finding.observed_behavior,
514
+ message=audit_finding.observed_behavior,
515
+ next_action=next_action,
516
+ evidence={
517
+ "evidence_ref": audit_finding.evidence_ref,
518
+ "recommended_action": audit_finding.recommended_action,
519
+ },
520
+ )
521
+
522
+
523
+ def _agent_report_code_for_audit(audit_finding: WorkflowDeviationFinding) -> AgentRunReportFindingCode:
524
+ return TRANSCRIPT_AUDIT_FINDING_CODE_MAP.get(
525
+ audit_finding.code,
526
+ AgentRunReportFindingCode.WORKFLOW_CONTRACT_CONTRADICTION,
527
+ )
528
+
529
+
530
+ def _agent_report_severity_for_audit(audit_finding: WorkflowDeviationFinding) -> AgentRunReportSeverity:
531
+ if audit_finding.severity == "blocking_candidate":
532
+ return "critical"
533
+ return "high"
534
+
535
+
536
+ def _json_object(value: object) -> JsonObject:
537
+ return JsonObjectAdapter.validate_python(value)
538
+
539
+
540
+ class _AgentReportFieldModel(BaseModel):
541
+ model_config = ConfigDict(extra="forbid", validate_assignment=True)
542
+
543
+
544
+ class _RuntimeCpuSample(_AgentReportFieldModel):
545
+ model_config = ConfigDict(extra="ignore", populate_by_name=True, validate_assignment=True)
546
+
547
+ schema_id: StrictStr = Field(default="", alias="schema")
548
+ elapsed_seconds: float = Field(default=0.0, ge=0)
549
+ total_cpu_percent: float = Field(default=0.0, ge=0)
550
+ max_cpu_percent: float = Field(default=0.0, ge=0)
551
+ process_count: StrictInt = Field(default=0, ge=0)
552
+ max_cpu_command: StrictStr = ""
553
+
554
+
555
+ class _SpecialistRuntimeBatchItem(_AgentReportFieldModel):
556
+ model_config = ConfigDict(extra="ignore", validate_assignment=True)
557
+
558
+ work_id: StrictStr = ""
559
+ agent: StrictStr = ""
560
+ model_policy: StrictStr = ""
561
+ required_model_tier: StrictStr = ""
562
+ preferred_model_tier: StrictStr = ""
563
+
564
+
565
+ class _WorkflowTruthPayloadFields(_AgentReportFieldModel):
566
+ workflow: StrictStr = ""
567
+ run_id: StrictStr = ""
568
+ status: StrictStr = ""
569
+ phase: StrictStr = ""
570
+ blocked_reason: StrictStr = ""
571
+
572
+
573
+ class _ProgressTruthFields(_AgentReportFieldModel):
574
+ workflow: StrictStr = ""
575
+ run_id: StrictStr = ""
576
+ status: StrictStr = ""
577
+ phase: StrictStr = ""
578
+ can_continue_now: StrictBool | None = None
579
+
580
+
581
+ class _PublicProgressFields(_AgentReportFieldModel):
582
+ user_action: StrictStr = ""
583
+
584
+
585
+ class _PublicReceiptFields(_AgentReportFieldModel):
586
+ next_action: StrictStr = ""
587
+
588
+
589
+ class _HumanDecisionPacketFields(_AgentReportFieldModel):
590
+ """Human-decision summary fields used only after payload shape validation."""
591
+
592
+ model_config = ConfigDict(extra="ignore", validate_assignment=True)
593
+
594
+ why_this_needs_you: StrictStr = ""
595
+ question: StrictStr = ""
596
+ evidence_summary: StrictStr = ""
597
+ type: StrictStr = ""
598
+ kind: StrictStr = ""
599
+
600
+
601
+ class _AgentDirectiveCapabilities(_AgentReportFieldModel):
602
+ model_config = ConfigDict(extra="ignore", populate_by_name=True, validate_assignment=True)
603
+
604
+ continue_: StrictBool = Field(False, alias="continue")
605
+ final_report: StrictBool = False
606
+
607
+
608
+ class _AgentDirectiveEffect(_AgentReportFieldModel):
609
+ model_config = ConfigDict(extra="ignore", validate_assignment=True)
610
+
611
+ kind: StrictStr = ""
612
+
613
+
614
+ class _AgentDirectiveControl(_AgentReportFieldModel):
615
+ model_config = ConfigDict(extra="ignore", validate_assignment=True)
616
+
617
+ status: StrictStr = ""
618
+ state: StrictStr = ""
619
+ capabilities: _AgentDirectiveCapabilities = Field(default_factory=_AgentDirectiveCapabilities)
620
+ effects: list[_AgentDirectiveEffect] = Field(default_factory=list)
621
+ blockers: list[StrictStr] = Field(default_factory=list)
622
+ resume: StrictStr = ""
623
+
624
+
625
+ class _ReceiptTruthFields(_AgentReportFieldModel):
626
+ workflow: StrictStr = ""
627
+ run_id: StrictStr = ""
628
+ status: StrictStr = ""
629
+
630
+
631
+ class _StateMachineTruthFields(_AgentReportFieldModel):
632
+ current_state: StrictStr = ""
633
+
634
+
635
+ class _AgentReportRelatedRecoveryFields(_AgentReportFieldModel):
636
+ status: StrictStr = ""
637
+
638
+
639
+ class _AgentReportApplyFields(_AgentReportFieldModel):
640
+ requested_apply: StrictBool | None = None
641
+
642
+
643
+ class _AgentReportOrchestrationPlanFields(_AgentReportFieldModel):
644
+ status: StrictStr = ""
645
+ automatic: StrictBool | None = None
646
+ executable_now: StrictBool | None = None
647
+ human_decision_required: StrictBool | None = None
648
+
649
+
650
+ class _AgentReportVersionControlSafetyFields(_AgentReportFieldModel):
651
+ mutation_without_guard: StrictBool | None = None
652
+ resource_guard_active: StrictBool | None = None
653
+ run_finish_seen: StrictBool | None = None
654
+ sync_status: StrictStr = ""
655
+ agent_instruction: StrictStr = ""
656
+
657
+
658
+ class _ProcessChatsTerminalFields(_AgentReportFieldModel):
659
+ workflow: StrictStr = ""
660
+ status: StrictStr = ""
661
+ phase: StrictStr = ""
662
+ process_chats_terminal_state: StrictStr = ""
663
+ process_chats_backlog_state: StrictStr = ""
664
+ item_count: StrictInt | None = None
665
+ total_available_count: StrictInt | None = None
666
+
667
+
668
+ class _AgentReportHeadlessExportFields(_AgentReportFieldModel):
669
+ embedded_count: StrictInt | None = None
670
+
671
+
672
+ class _AgentReportReportContractFields(_AgentReportFieldModel):
673
+ must_include: list[StrictStr] = Field(default_factory=list)
674
+ after_each_batch: StrictBool = False
675
+
676
+
677
+ class _SpecialistRuntimeBatch(_AgentReportFieldModel):
678
+ """Executable specialist batch projected from agent_directive effects."""
679
+
680
+ phase: StrictStr = ""
681
+ current_batch_items: list[_SpecialistRuntimeBatchItem] = Field(default_factory=list)
682
+ report_contract: _AgentReportReportContractFields = Field(default_factory=_AgentReportReportContractFields)
683
+
684
+
685
+ class _TranscriptEventFields(_AgentReportFieldModel):
686
+ model_config = ConfigDict(extra="ignore", populate_by_name=True, validate_assignment=True)
687
+
688
+ event_type: StrictStr = Field(default="", alias="type")
689
+ tool_name: StrictStr = ""
690
+ role: StrictStr = ""
691
+ status: StrictStr = ""
692
+ output: StrictStr = ""
693
+ parameters: JsonObject = Field(default_factory=dict)
694
+ content: object = ""
695
+
696
+
697
+ class _TranscriptTextParameters(_AgentReportFieldModel):
698
+ """Text parameters that can influence transcript-derived decisions."""
699
+
700
+ command: StrictStr = ""
701
+ role: StrictStr = ""
702
+
703
+
704
+ class _OpenCodeSpecialistTaskMetadataFields(_AgentReportFieldModel):
705
+ model_config = ConfigDict(extra="ignore", populate_by_name=True, validate_assignment=True)
706
+
707
+ schema_id: StrictStr = Field(default="", alias="schema")
708
+ work_id: StrictStr = ""
709
+ task_id: StrictStr = ""
710
+ provider_id: StrictStr = ""
711
+ model_id: StrictStr = ""
712
+ model_tier: StrictStr = ""
713
+ tool_sequence: list[StrictStr] = Field(default_factory=list)
714
+ prompt_contract: StrictStr = ""
715
+ raw_content_embedded: StrictBool | None = None
716
+
717
+
718
+ class _SpecialistTaskRunnerResultFields(_AgentReportFieldModel):
719
+ model_config = ConfigDict(extra="ignore", populate_by_name=True, validate_assignment=True)
720
+
721
+ schema_id: StrictStr = Field(default="", alias="schema")
722
+ status: StrictStr = ""
723
+ work_id: StrictStr = ""
724
+ next_apply_step: JsonObject | None = None
725
+
726
+
727
+ class _BlockedWorkflowToolResult(_AgentReportFieldModel):
728
+ tool_name: StrictStr = ""
729
+ status: StrictStr = ""
730
+ phase: StrictStr = ""
731
+ blocked_reason: StrictStr
732
+ work_id: StrictStr = ""
733
+
734
+
735
+ def _field_payload(source: JsonObject, field_names: tuple[str, ...]) -> JsonObject:
736
+ payload: JsonObject = {}
737
+ for field_name in field_names:
738
+ if field_name in source:
739
+ payload[field_name] = source[field_name]
740
+ return payload
741
+
742
+
743
+ def _object_field(source: JsonObject, field_name: str) -> JsonObject:
744
+ if field_name not in source or source[field_name] is None:
745
+ return {}
746
+ value = source[field_name]
747
+ if not isinstance(value, dict):
748
+ if field_name == "diagnostic_context":
749
+ return {}
750
+ raise ValueError(f"{field_name} must be an object")
751
+ return _json_object(value)
752
+
753
+
754
+ def _list_field(source: JsonObject, field_name: str) -> list[object]:
755
+ value = source.get(field_name)
756
+ if not isinstance(value, list):
757
+ return []
758
+ return list(value)
759
+
760
+
761
+ def _is_fsm_first_payload(payload: JsonObject) -> bool:
762
+ return _optional_text(payload, "schema") in FSM_FIRST_SCHEMAS
763
+
764
+
765
+ def _payload_with_safe_diagnostic_context(payload: JsonObject) -> JsonObject:
766
+ if isinstance(payload.get("diagnostic_context"), dict):
767
+ return payload
768
+ return {**payload, "diagnostic_context": {}}
769
+
770
+
771
+ def _agent_directive_from_payload(payload: JsonObject) -> tuple[AgentDirective | None, str]:
772
+ if "agent_directive" not in payload:
773
+ return None, "missing"
774
+ directive_payload = payload["agent_directive"]
775
+ if not isinstance(directive_payload, dict):
776
+ return None, "agent_directive_not_object"
777
+ try:
778
+ return AgentDirective.model_validate(directive_payload), ""
779
+ except ValidationError as exc:
780
+ first_error = exc.errors()[0] if exc.errors() else {}
781
+ location = ".".join(str(part) for part in first_error.get("loc", ())) or "agent_directive"
782
+ message = str(first_error.get("msg") or "invalid")
783
+ return None, f"{location}: {message}"
784
+
785
+
786
+ def _agent_directive_contract_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
787
+ if not _is_fsm_first_payload(payload):
788
+ return []
789
+ directive, directive_error = _agent_directive_from_payload(payload)
790
+ if directive is not None:
791
+ return []
792
+ return [_agent_directive_invalid_finding(payload, directive_error)]
793
+
794
+
795
+ def _agent_directive_control(payload: JsonObject) -> _AgentDirectiveControl:
796
+ directive, _error = _agent_directive_from_payload(payload)
797
+ if directive is None:
798
+ return _AgentDirectiveControl()
799
+ control = directive.control.to_payload()
800
+ return _AgentDirectiveControl.model_validate(
801
+ _field_payload(
802
+ control,
803
+ ("status", "state", "capabilities", "effects", "blockers", "resume"),
804
+ )
805
+ )
806
+
807
+
808
+ def _specialist_runtime_batch_from_agent_directive(payload: JsonObject) -> _SpecialistRuntimeBatch:
809
+ """Read executable specialist work only from the root agent directive."""
810
+
811
+ directive, _directive_error = _agent_directive_from_payload(payload)
812
+ if directive is None:
813
+ return _SpecialistRuntimeBatch()
814
+ batch_items: list[_SpecialistRuntimeBatchItem] = []
815
+ report_contract = _AgentReportReportContractFields()
816
+ for effect in directive.control.effects:
817
+ effect_payload = effect.payload
818
+ if effect.kind != WorkflowEffectKind.CALL_SPECIALIST_MODEL:
819
+ continue
820
+ if not _is_style_rewrite_specialist_effect(effect, effect_payload):
821
+ continue
822
+ batch_items.extend(
823
+ _SpecialistRuntimeBatchItem.model_validate(item)
824
+ for item in _list_field(effect_payload, "current_batch_items")
825
+ if isinstance(item, dict)
826
+ )
827
+ candidate_report_contract = _object_field(effect_payload, "report_contract")
828
+ if candidate_report_contract:
829
+ report_contract = _AgentReportReportContractFields.model_validate(
830
+ _field_payload(candidate_report_contract, ("must_include", "after_each_batch"))
831
+ )
832
+ return _SpecialistRuntimeBatch(
833
+ phase="style_rewrite" if batch_items else "",
834
+ current_batch_items=batch_items,
835
+ report_contract=report_contract,
836
+ )
837
+
838
+
839
+ def _is_style_rewrite_specialist_effect(effect: AgentEffect, effect_payload: JsonObject) -> bool:
840
+ """Identify fix-wiki style-rewrite work without consulting diagnostics."""
841
+
842
+ return (
843
+ str(effect_payload.get("kind") or "") == "style_rewrite"
844
+ or effect.target == "med-knowledge-architect"
845
+ or bool(_list_field(effect_payload, "current_batch_items"))
846
+ )
847
+
848
+
849
+ def _legacy_specialist_route_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
850
+ """Reject old diagnostic-only specialist batches as a contract violation."""
851
+
852
+ diagnostic = _object_field(payload, "diagnostic_context")
853
+ legacy_plan = _object_field(diagnostic, "orchestration" + "_plan")
854
+ if not _list_field(legacy_plan, "current_batch_items"):
855
+ return []
856
+ batch = _specialist_runtime_batch_from_agent_directive(payload)
857
+ if batch.current_batch_items:
858
+ return []
859
+ return [
860
+ AgentRunReportFinding(
861
+ code=AgentRunReportFindingCode.WORKFLOW_AGENT_DIRECTIVE_INVALID,
862
+ severity="critical",
863
+ source="workflow_payload",
864
+ source_field="diagnostic_context legacy specialist batch",
865
+ expected="agent_directive.control.effects[].payload.current_batch_items",
866
+ actual="specialist batch exposed only as diagnostic evidence",
867
+ message="O payload tentou expor trabalho especialista executavel fora do agent_directive root.",
868
+ next_action=(
869
+ "Reemitir o payload FSM com agent_directive.control.effects[] e manter diagnostic_context "
870
+ "apenas como evidencia."
871
+ ),
872
+ )
873
+ ]
874
+
875
+
876
+ class _WorkflowTruth:
877
+ def __init__(
878
+ self,
879
+ *,
880
+ workflow: str,
881
+ run_id: str,
882
+ workflow_status: str,
883
+ workflow_phase: str,
884
+ progress_status: str,
885
+ receipt_status: str,
886
+ blocked_reason: str,
887
+ ) -> None:
888
+ self.workflow = workflow
889
+ self.run_id = run_id
890
+ self.workflow_status = workflow_status
891
+ self.workflow_phase = workflow_phase
892
+ self.progress_status = progress_status
893
+ self.receipt_status = receipt_status
894
+ self.blocked_reason = blocked_reason
895
+
896
+
897
+ def _workflow_truth(payload: JsonObject) -> _WorkflowTruth:
898
+ fsm_first = _is_fsm_first_payload(payload)
899
+ root = _WorkflowTruthPayloadFields.model_validate(
900
+ _field_payload(payload, ("workflow", "run_id", "status", "phase", "blocked_reason"))
901
+ )
902
+ progress = _ProgressTruthFields.model_validate(
903
+ _field_payload(_object_field(payload, "progress_view_model"), ("workflow", "run_id", "status", "phase", "can_continue_now"))
904
+ )
905
+ receipt = _ReceiptTruthFields.model_validate(
906
+ _field_payload(_object_field(payload, "receipt"), ("workflow", "run_id", "status"))
907
+ )
908
+ snapshot = _StateMachineTruthFields.model_validate(
909
+ _field_payload(_object_field(payload, "state_machine_snapshot"), ("current_state",))
910
+ )
911
+ if fsm_first:
912
+ return _WorkflowTruth(
913
+ workflow=progress.workflow or receipt.workflow or root.workflow,
914
+ run_id=progress.run_id or receipt.run_id or root.run_id,
915
+ workflow_status=progress.status or receipt.status,
916
+ workflow_phase=progress.phase or snapshot.current_state,
917
+ progress_status=progress.status,
918
+ receipt_status=receipt.status,
919
+ blocked_reason="",
920
+ )
921
+ return _WorkflowTruth(
922
+ workflow=root.workflow or progress.workflow or receipt.workflow,
923
+ run_id=root.run_id or progress.run_id or receipt.run_id,
924
+ workflow_status=root.status or progress.status or receipt.status,
925
+ workflow_phase=root.phase or progress.phase or snapshot.current_state,
926
+ progress_status=progress.status or root.status,
927
+ receipt_status=receipt.status or root.status,
928
+ blocked_reason=root.blocked_reason,
929
+ )
930
+
931
+
932
+ def _final_report_text(*, final_report_text: str | None, transcript: object | None) -> str:
933
+ if final_report_text is not None:
934
+ return _strip_controlled_experiment_json_lines(final_report_text)
935
+ if transcript is None:
936
+ return ""
937
+ responses: list[str] = []
938
+ delta_parts: list[str] = []
939
+
940
+ def flush_delta_parts() -> None:
941
+ if not delta_parts:
942
+ return
943
+ responses.append("".join(delta_parts))
944
+ delta_parts.clear()
945
+
946
+ def append_response(text: str, *, delta: bool = False) -> None:
947
+ if not text.strip():
948
+ return
949
+ if delta:
950
+ delta_parts.append(text)
951
+ return
952
+ flush_delta_parts()
953
+ responses.append(text)
954
+
955
+ def visit(value: object) -> None:
956
+ if isinstance(value, list):
957
+ for item in value:
958
+ visit(item)
959
+ return
960
+ if not isinstance(value, dict):
961
+ return
962
+ event_type = str(value.get("type") or "").upper()
963
+ if event_type in {"TOOL_USE", "TOOL_RESULT"}:
964
+ flush_delta_parts()
965
+ if event_type == "PLANNER_RESPONSE":
966
+ for field in ("content", "text", "message", "response"):
967
+ raw = value.get(field)
968
+ if isinstance(raw, str) and raw.strip():
969
+ append_response(raw)
970
+ break
971
+ if event_type in {"GEMINI", "MESSAGE"}:
972
+ role = str(value.get("role") or "").lower()
973
+ if event_type == "GEMINI" or role in {"assistant", "model"}:
974
+ text = _transcript_message_text(value.get("content"))
975
+ if text.strip():
976
+ append_response(text, delta=bool(value.get("delta")))
977
+ for child in _transcript_child_containers(value):
978
+ visit(child)
979
+
980
+ visit(transcript)
981
+ flush_delta_parts()
982
+ return _strip_controlled_experiment_json_lines("\n\n".join(responses))
983
+
984
+
985
+ def _strip_controlled_experiment_json_lines(text: str) -> str:
986
+ lines: list[str] = []
987
+ for line in text.splitlines():
988
+ stripped = line.strip()
989
+ if stripped.startswith("{") and (
990
+ "medical-notes-workbench.controlled-experiment-cpu-summary.v1" in stripped
991
+ or "medical-notes-workbench.controlled-experiment-output-truncated.v1" in stripped
992
+ ):
993
+ continue
994
+ lines.append(line)
995
+ return "\n".join(lines)
996
+
997
+
998
+ def _transcript_message_text(value: object) -> str:
999
+ if isinstance(value, str):
1000
+ return value
1001
+ if isinstance(value, list):
1002
+ parts = [_transcript_message_text(item) for item in value]
1003
+ return "\n".join(part for part in parts if part.strip())
1004
+ if isinstance(value, dict):
1005
+ for field in ("text", "content", "message"):
1006
+ text = _transcript_message_text(value.get(field))
1007
+ if text.strip():
1008
+ return text
1009
+ parts = value.get("parts")
1010
+ if isinstance(parts, list):
1011
+ return _transcript_message_text(parts)
1012
+ return ""
1013
+
1014
+
1015
+ def _final_report_incomplete_findings(final_text: str, truth: _WorkflowTruth) -> list[AgentRunReportFinding]:
1016
+ if not _final_report_looks_like_progress_only(final_text):
1017
+ return []
1018
+ status = truth.workflow_status or truth.progress_status or truth.receipt_status or "unknown"
1019
+ return [
1020
+ AgentRunReportFinding(
1021
+ code=AgentRunReportFindingCode.FINAL_REPORT_INCOMPLETE,
1022
+ severity="high",
1023
+ source="final_report",
1024
+ source_field="final_report_text",
1025
+ expected="relatorio final com status publico, resultado primario, mutacoes, pendencias e erros",
1026
+ actual="progress_only",
1027
+ message="A resposta capturada parece mensagem intermediaria, nao relatorio final do workflow.",
1028
+ next_action=(
1029
+ "Tratar a rodada como incompleta e exigir fechamento que diga se a Wiki foi corrigida, "
1030
+ "o que mudou, o estado do grafo/Related Notes e qualquer bloqueio ou erro de runtime."
1031
+ ),
1032
+ evidence={"workflow_status": status},
1033
+ )
1034
+ ]
1035
+
1036
+
1037
+ def _final_report_permission_findings(payload: JsonObject, final_text: str) -> list[AgentRunReportFinding]:
1038
+ if not final_text.strip():
1039
+ return []
1040
+ directive, _directive_error = _agent_directive_from_payload(payload)
1041
+ if directive is None:
1042
+ return []
1043
+ control = directive.control
1044
+ if control.capabilities.final_report:
1045
+ return []
1046
+ return [
1047
+ AgentRunReportFinding(
1048
+ code=AgentRunReportFindingCode.FINAL_REPORT_NOT_ALLOWED,
1049
+ severity="high",
1050
+ source="workflow_payload",
1051
+ source_field="agent_directive.control.capabilities.final_report",
1052
+ expected=f"status={control.status} final_report=false",
1053
+ actual="final_report_present",
1054
+ message="A diretiva oficial ainda não autoriza relatório final para este estado do workflow.",
1055
+ next_action=(
1056
+ "Continuar pela rota oficial ou reportar o bloqueio real antes de emitir uma resposta final."
1057
+ ),
1058
+ evidence={"directive_status": control.status, "directive_state": control.state},
1059
+ )
1060
+ ]
1061
+
1062
+
1063
+ def _agent_directive_invalid_finding(payload: JsonObject, directive_error: str) -> AgentRunReportFinding:
1064
+ progress = _ProgressTruthFields.model_validate(
1065
+ _field_payload(_object_field(payload, "progress_view_model"), ("status", "can_continue_now"))
1066
+ )
1067
+ return AgentRunReportFinding(
1068
+ code=AgentRunReportFindingCode.WORKFLOW_AGENT_DIRECTIVE_INVALID,
1069
+ severity="high",
1070
+ source="workflow_payload",
1071
+ source_field="agent_directive.control",
1072
+ expected="agent_directive valido com control tipado para payload FSM-first",
1073
+ actual=directive_error or "invalid",
1074
+ message="Payload FSM-first nao trouxe agent_directive.control valido no root.",
1075
+ next_action=(
1076
+ "Corrigir o produtor FSM para emitir agent_directive antes de validar ou aceitar relatorio final."
1077
+ ),
1078
+ evidence={
1079
+ "schema": _optional_text(payload, "schema"),
1080
+ "progress_status": progress.status,
1081
+ "can_continue_now": progress.can_continue_now,
1082
+ },
1083
+ )
1084
+
1085
+
1086
+ def _final_report_looks_like_progress_only(final_text: str) -> bool:
1087
+ if len(final_text.strip()) > 600:
1088
+ return False
1089
+ folded = _fold_text(final_text)
1090
+ substance_markers = (
1091
+ "status:",
1092
+ "receipt status",
1093
+ "fixou a wiki",
1094
+ "wiki ficou",
1095
+ "nao fixou",
1096
+ "nao foi fixada",
1097
+ "mutacao",
1098
+ "arquivos",
1099
+ "grafo",
1100
+ "related notes",
1101
+ "notas relacionadas",
1102
+ "bloque",
1103
+ "pendente",
1104
+ "parcial",
1105
+ "cota",
1106
+ "quota",
1107
+ "erro",
1108
+ "falhou",
1109
+ )
1110
+ if _folded_contains_any(folded, substance_markers):
1111
+ return False
1112
+ lines = [line.strip() for line in final_text.splitlines() if line.strip()]
1113
+ if not lines:
1114
+ return False
1115
+ progress_markers = (
1116
+ "i have started",
1117
+ "i started",
1118
+ "started the",
1119
+ "waiting for",
1120
+ "waiting for completion",
1121
+ "waiting for the execution",
1122
+ "aguardando resultado",
1123
+ "aguardando o resultado",
1124
+ "aguardando conclusao",
1125
+ "aguardando a conclusao",
1126
+ "em andamento",
1127
+ "vou aguardar",
1128
+ )
1129
+ return all(_folded_contains_any(_fold_text(line), progress_markers) for line in lines)
1130
+
1131
+
1132
+ def _status_mismatch_findings(
1133
+ final_text: str,
1134
+ truth: _WorkflowTruth,
1135
+ primary_objective: PrimaryObjectiveSummary | None,
1136
+ ) -> list[AgentRunReportFinding]:
1137
+ findings: list[AgentRunReportFinding] = []
1138
+ reported = _reported_status_fields(final_text)
1139
+ receipt_status = reported.get("receipt.status")
1140
+ if receipt_status and truth.receipt_status and receipt_status != truth.receipt_status:
1141
+ findings.append(
1142
+ AgentRunReportFinding(
1143
+ code=AgentRunReportFindingCode.RECEIPT_STATUS_MISMATCH,
1144
+ severity="high",
1145
+ source="final_report",
1146
+ source_field="receipt.status",
1147
+ expected=truth.receipt_status,
1148
+ actual=receipt_status,
1149
+ message=(
1150
+ "O relatório final declarou um receipt.status diferente do recibo oficial do workflow."
1151
+ ),
1152
+ next_action="Reescrever o relatório usando receipt.status do payload oficial.",
1153
+ )
1154
+ )
1155
+ progress_status = reported.get("progress_view_model.status")
1156
+ if progress_status and truth.progress_status and progress_status != truth.progress_status:
1157
+ findings.append(
1158
+ AgentRunReportFinding(
1159
+ code=AgentRunReportFindingCode.PROGRESS_STATUS_MISMATCH,
1160
+ severity="high",
1161
+ source="final_report",
1162
+ source_field="progress_view_model.status",
1163
+ expected=truth.progress_status,
1164
+ actual=progress_status,
1165
+ message=(
1166
+ "O relatório final declarou um progress_view_model.status diferente do payload oficial."
1167
+ ),
1168
+ next_action="Reescrever o relatório usando progress_view_model.status como fonte canônica.",
1169
+ )
1170
+ )
1171
+ root_status = reported.get("status")
1172
+ expected_root_statuses = _acceptable_public_statuses(truth, primary_objective)
1173
+ if root_status and expected_root_statuses and root_status not in expected_root_statuses:
1174
+ findings.append(
1175
+ AgentRunReportFinding(
1176
+ code=AgentRunReportFindingCode.PROGRESS_STATUS_MISMATCH,
1177
+ severity="medium",
1178
+ source="final_report",
1179
+ source_field="status",
1180
+ expected=", ".join(sorted(expected_root_statuses)),
1181
+ actual=root_status,
1182
+ message="O relatório final declarou um status diferente do status canônico do workflow.",
1183
+ next_action="Corrigir o status público do relatório final antes de concluir a rodada.",
1184
+ )
1185
+ )
1186
+ return findings
1187
+
1188
+
1189
+ def _acceptable_public_statuses(
1190
+ truth: _WorkflowTruth,
1191
+ primary_objective: PrimaryObjectiveSummary | None,
1192
+ ) -> set[str]:
1193
+ """Statuses a public final report may name without contradicting the FSM."""
1194
+ statuses: set[str] = set()
1195
+ if truth.workflow_status:
1196
+ statuses.add(truth.workflow_status)
1197
+ if isinstance(primary_objective, ProcessChatsPrimaryObjectiveSummary):
1198
+ statuses.add(primary_objective.process_status)
1199
+ if isinstance(primary_objective, WorkflowPrimaryObjectiveSummary):
1200
+ statuses.add(primary_objective.status)
1201
+ return statuses
1202
+
1203
+
1204
+ def _reported_status_fields(final_text: str) -> dict[str, str]:
1205
+ reported: dict[str, str] = {}
1206
+ patterns: tuple[tuple[str, re.Pattern[str]], ...] = (
1207
+ (
1208
+ "receipt.status",
1209
+ re.compile(r"(?im)^\s*(?:[-*]\s*)?(?:receipt\s+status|receipt\.status)\s*[:=]\s*`?(?P<value>[a-z0-9_]+)`?"),
1210
+ ),
1211
+ (
1212
+ "progress_view_model.status",
1213
+ re.compile(
1214
+ r"(?im)^\s*(?:[-*]\s*)?(?:progress_view_model\.status|progress\s+status)\s*[:=]\s*`?(?P<value>[a-z0-9_]+)`?"
1215
+ ),
1216
+ ),
1217
+ (
1218
+ "status",
1219
+ re.compile(r"(?im)^\s*(?:[-*]\s*)?(?:status)\s*[:=]\s*`?(?P<value>[a-z0-9_]+)`?"),
1220
+ ),
1221
+ )
1222
+ for field, pattern in patterns:
1223
+ match = pattern.search(final_text)
1224
+ if not match:
1225
+ continue
1226
+ value = _normalize_status(match.group("value"))
1227
+ if value and value in KNOWN_WORKFLOW_STATUSES:
1228
+ reported[field] = value
1229
+ return reported
1230
+
1231
+
1232
+ def _normalize_status(value: str) -> str:
1233
+ match = STATUS_VALUE_RE.search(value.strip().lower())
1234
+ return match.group(0) if match else ""
1235
+
1236
+
1237
+ def _success_claim_findings(final_text: str, truth: _WorkflowTruth) -> list[AgentRunReportFinding]:
1238
+ status = truth.workflow_status or truth.progress_status or truth.receipt_status
1239
+ if status not in NON_SUCCESS_STATUSES:
1240
+ return []
1241
+ if not _has_positive_success_claim(final_text):
1242
+ return []
1243
+ return [
1244
+ AgentRunReportFinding(
1245
+ code=AgentRunReportFindingCode.SUCCESS_CLAIM_MISMATCH,
1246
+ severity="medium",
1247
+ source="final_report",
1248
+ source_field="final_report_text",
1249
+ expected=status,
1250
+ actual="success_claim",
1251
+ message="O relatório final usou linguagem de sucesso para um workflow que não está concluído.",
1252
+ next_action="Trocar linguagem de sucesso por progresso parcial, bloqueio ou espera externa conforme o payload oficial.",
1253
+ )
1254
+ ]
1255
+
1256
+
1257
+ def _public_report_pending_effect_success_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
1258
+ """Ensure the human-facing report cannot outrank pending FSM effects."""
1259
+
1260
+ control = _agent_directive_control(payload)
1261
+ if control.status != "waiting_agent" or control.capabilities.continue_ is not True:
1262
+ return []
1263
+ if not control.effects and not control.resume.strip():
1264
+ return []
1265
+ reports = _object_field(payload, "reports")
1266
+ findings: list[AgentRunReportFinding] = []
1267
+ public_sources = [("reports.summary", _optional_text(reports, "summary"))]
1268
+ if "public_report" in reports:
1269
+ public_report = WorkflowPublicReport.model_validate(reports["public_report"])
1270
+ public_sources.append(("reports.public_report.headline", public_report.headline))
1271
+ public_sources.extend(
1272
+ (f"reports.public_report.lines[{index}]", line) for index, line in enumerate(public_report.lines)
1273
+ )
1274
+ for source_field, text in public_sources:
1275
+ if not _has_positive_success_claim(text):
1276
+ continue
1277
+ findings.append(
1278
+ AgentRunReportFinding(
1279
+ code=AgentRunReportFindingCode.SUCCESS_CLAIM_MISMATCH,
1280
+ severity="medium",
1281
+ source="workflow_payload",
1282
+ source_field=source_field,
1283
+ expected="waiting_agent",
1284
+ actual="success_claim",
1285
+ message="O relatório público declarou sucesso enquanto a FSM ainda exige continuação por agente.",
1286
+ next_action=(
1287
+ "Projetar reports.* a partir da transição FSM e manter linguagem de progresso parcial "
1288
+ "até agent_directive.control.capabilities.final_report=true."
1289
+ ),
1290
+ )
1291
+ )
1292
+ return findings
1293
+
1294
+
1295
+ def _unsupported_blocker_claim_findings(final_text: str, truth: _WorkflowTruth) -> list[AgentRunReportFinding]:
1296
+ status = truth.workflow_status or truth.progress_status or truth.receipt_status
1297
+ if status in NON_SUCCESS_STATUSES or truth.blocked_reason:
1298
+ return []
1299
+ for match in UNSUPPORTED_BLOCKER_CLAIM_RE.finditer(final_text):
1300
+ sentence = _fold_text(_sentence_containing_match(final_text, match.start(), match.end()))
1301
+ if "sem bloque" in sentence or "nao bloque" in sentence or "não bloque" in sentence:
1302
+ continue
1303
+ return [
1304
+ AgentRunReportFinding(
1305
+ code=AgentRunReportFindingCode.WORKFLOW_CONTRACT_CONTRADICTION,
1306
+ severity="high",
1307
+ source="final_report",
1308
+ source_field="final_report_text",
1309
+ expected=status or "workflow sem blocked_reason",
1310
+ actual=sentence[:180],
1311
+ message="O relatório final declarou bloqueio/duplicidade que não existe no payload oficial.",
1312
+ next_action=(
1313
+ "Usar somente status, blocked_reason e decision oficiais para declarar bloqueio; "
1314
+ "se o agente suspeitar duplicidade, registrar como suspeita e seguir a próxima ação oficial."
1315
+ ),
1316
+ )
1317
+ ]
1318
+ return []
1319
+
1320
+
1321
+ def _has_positive_success_claim(final_text: str) -> bool:
1322
+ for match in SUCCESS_CLAIM_RE.finditer(final_text):
1323
+ prefix = final_text[max(0, match.start() - 32) : match.start()]
1324
+ if NEGATED_SUCCESS_PREFIX_RE.search(prefix):
1325
+ continue
1326
+ sentence = _fold_text(_sentence_containing_match(final_text, match.start(), match.end()))
1327
+ if NEGATED_SUCCESS_SENTENCE_RE.search(sentence):
1328
+ continue
1329
+ if _is_partial_success_sentence(sentence):
1330
+ continue
1331
+ return True
1332
+ return False
1333
+
1334
+
1335
+ def _is_partial_success_sentence(sentence: str) -> bool:
1336
+ if "sem pendenc" in sentence or "sem blocker" in sentence or "sem bloque" in sentence:
1337
+ return False
1338
+ if not any(marker in sentence for marker in ("publicacao", "publicou", "publicad")):
1339
+ return False
1340
+ return any(marker in sentence for marker in ("pendenc", "pendente", "blocker", "bloque", "parcial"))
1341
+
1342
+
1343
+ def _is_component_success_sentence(sentence: str) -> bool:
1344
+ if any(marker in sentence for marker in ("wiki", "workflow", "fluxo")):
1345
+ return False
1346
+ return any(marker in sentence for marker in SCOPED_SUCCESS_CONTEXT_MARKERS)
1347
+
1348
+
1349
+ def _is_scoped_success_with_global_blocker(sentence: str) -> bool:
1350
+ return (
1351
+ any(marker in sentence for marker in SCOPED_SUCCESS_CONTEXT_MARKERS)
1352
+ and SCOPED_SUCCESS_WITH_GLOBAL_BLOCKER_RE.search(sentence) is not None
1353
+ )
1354
+
1355
+
1356
+ def _sentence_containing_match(text: str, start: int, end: int) -> str:
1357
+ boundaries = "\n.!?"
1358
+ sentence_start = max(text.rfind(boundary, 0, start) for boundary in boundaries) + 1
1359
+ sentence_end_candidates = [
1360
+ index
1361
+ for boundary in boundaries
1362
+ if (index := text.find(boundary, end)) != -1
1363
+ ]
1364
+ sentence_end = min(sentence_end_candidates) if sentence_end_candidates else len(text)
1365
+ return text[sentence_start:sentence_end]
1366
+
1367
+
1368
+ def _omitted_status_findings(final_text: str, truth: _WorkflowTruth) -> list[AgentRunReportFinding]:
1369
+ status = truth.workflow_status or truth.progress_status or truth.receipt_status
1370
+ if status not in NON_SUCCESS_STATUSES:
1371
+ return []
1372
+ if status in final_text.lower() or _mentions_non_success_status_publicly(final_text, status):
1373
+ return []
1374
+ return [
1375
+ AgentRunReportFinding(
1376
+ code=AgentRunReportFindingCode.WORKFLOW_STATUS_OMITTED,
1377
+ severity="medium",
1378
+ source="final_report",
1379
+ source_field="progress_view_model.status",
1380
+ expected=status,
1381
+ actual="omitted",
1382
+ message="O relatório final não deixou claro que o workflow ainda não está concluído.",
1383
+ next_action=(
1384
+ "Explicar em linguagem pública que o workflow ficou parcial, bloqueado ou aguardando "
1385
+ "continuação; o identificador técnico é opcional."
1386
+ ),
1387
+ )
1388
+ ]
1389
+
1390
+
1391
+ def _mentions_non_success_status_publicly(final_text: str, status: str) -> bool:
1392
+ markers = NON_SUCCESS_HUMAN_STATUS_MARKERS.get(status, ())
1393
+ if not markers:
1394
+ return False
1395
+ folded = _fold_text(final_text)
1396
+ return _folded_contains_any(folded, markers)
1397
+
1398
+
1399
+ def _workflow_primary_objective_summary(
1400
+ payload: JsonObject,
1401
+ ) -> PrimaryObjectiveSummary | None:
1402
+ return (
1403
+ fix_wiki_primary_objective_summary(payload)
1404
+ or process_chats_primary_objective_summary(payload)
1405
+ or _generic_primary_objective_summary(payload)
1406
+ )
1407
+
1408
+
1409
+ def _generic_primary_objective_summary(payload: JsonObject) -> WorkflowPrimaryObjectiveSummary | None:
1410
+ reports = _object_field(payload, "reports")
1411
+ details = _object_field(reports, "details")
1412
+ if "primary_objective_summary" not in details:
1413
+ return None
1414
+ summary = details["primary_objective_summary"]
1415
+ if not isinstance(summary, dict):
1416
+ raise ValueError("reports.details.primary_objective_summary must be an object")
1417
+ return WorkflowPrimaryObjectiveSummary.model_validate(summary)
1418
+
1419
+
1420
+ def _primary_objective_completed(
1421
+ objective: PrimaryObjectiveSummary | None,
1422
+ ) -> bool:
1423
+ if objective is None:
1424
+ return False
1425
+ if isinstance(objective, FixWikiPrimaryObjectiveSummary):
1426
+ return objective.wiki_fixed == "yes"
1427
+ if isinstance(objective, WorkflowPrimaryObjectiveSummary):
1428
+ return objective.completed
1429
+ return objective.process_status in {
1430
+ "no_pending",
1431
+ "preview_ready",
1432
+ "ready_to_publish",
1433
+ "published",
1434
+ "completed_with_link_blockers",
1435
+ "completed",
1436
+ }
1437
+
1438
+
1439
+ def _legitimate_stop_reason(
1440
+ payload: JsonObject,
1441
+ objective: PrimaryObjectiveSummary | None,
1442
+ ) -> str:
1443
+ progress = _object_field(payload, "progress_view_model")
1444
+ status = _optional_text(progress, "status") or _optional_text(payload, "status")
1445
+ if status == "waiting_external" and _payload_has_external_wait_evidence(payload):
1446
+ return "waiting_external"
1447
+ if status == "waiting_human" and _human_decision_packet(payload) is not None:
1448
+ return "waiting_human"
1449
+ if isinstance(objective, FixWikiPrimaryObjectiveSummary) and objective.wiki_fixed == "waiting_external":
1450
+ return "waiting_external"
1451
+ if isinstance(objective, WorkflowPrimaryObjectiveSummary):
1452
+ if objective.status == "waiting_external" or "waiting_external" in objective.status:
1453
+ return "waiting_external"
1454
+ if objective.status == "waiting_human" or "waiting_human" in objective.status:
1455
+ return "waiting_human"
1456
+ return ""
1457
+
1458
+
1459
+ def _payload_has_external_wait_evidence(payload: JsonObject) -> bool:
1460
+ folded = _fold_text(json.dumps(payload, ensure_ascii=False, sort_keys=True))
1461
+ return any(
1462
+ marker in folded
1463
+ for marker in (
1464
+ "quota",
1465
+ "cota",
1466
+ "capacity",
1467
+ "capacidade",
1468
+ "waiting_external",
1469
+ "external_wait",
1470
+ )
1471
+ )
1472
+
1473
+
1474
+ def _public_report_view_model(
1475
+ payload: JsonObject,
1476
+ objective: PrimaryObjectiveSummary | None,
1477
+ ) -> WorkflowPublicReportViewModel | None:
1478
+ if objective is None:
1479
+ return None
1480
+ if isinstance(objective, FixWikiPrimaryObjectiveSummary):
1481
+ return _fix_wiki_public_report_view_model(payload, objective)
1482
+ if isinstance(objective, ProcessChatsPrimaryObjectiveSummary):
1483
+ return _process_chats_public_report_view_model(payload, objective)
1484
+ return _generic_public_report_view_model(payload, objective)
1485
+
1486
+
1487
+ def _fix_wiki_public_report_view_model(
1488
+ payload: JsonObject,
1489
+ objective: FixWikiPrimaryObjectiveSummary,
1490
+ ) -> WorkflowPublicReportViewModel:
1491
+ mutation_state = "changed" if objective.mutation_count > 0 or objective.written_count > 0 else "unchanged"
1492
+ human_reason = _human_decision_reason(payload)
1493
+ return WorkflowPublicReportViewModel(
1494
+ workflow="/mednotes:fix-wiki",
1495
+ run_id=str(payload.get("run_id") or ""),
1496
+ objective_answer=_fix_wiki_public_objective_answer(objective.wiki_fixed),
1497
+ headline=objective.wiki_summary,
1498
+ mutation_state=mutation_state,
1499
+ mutation_summary=objective.mutation_summary,
1500
+ remaining_work_summary=_join_public_parts(objective.graph_summary, objective.related_notes_summary),
1501
+ next_step_summary=_public_next_step(payload, fallback=objective.related_notes_summary),
1502
+ user_attention_required=bool(human_reason),
1503
+ human_reason=human_reason,
1504
+ internal_terms_present=False,
1505
+ )
1506
+
1507
+
1508
+ def _process_chats_public_report_view_model(
1509
+ payload: JsonObject,
1510
+ objective: ProcessChatsPrimaryObjectiveSummary,
1511
+ ) -> WorkflowPublicReportViewModel:
1512
+ mutation_state = "changed" if objective.notes_status == "published" and objective.note_count > 0 else "unchanged"
1513
+ human_reason = _human_decision_reason(payload)
1514
+ return WorkflowPublicReportViewModel(
1515
+ workflow="/mednotes:process-chats",
1516
+ run_id=str(payload.get("run_id") or ""),
1517
+ objective_answer=_process_chats_public_objective_answer(objective.process_status),
1518
+ headline=objective.process_summary,
1519
+ mutation_state=mutation_state,
1520
+ mutation_summary=objective.wiki_write_summary,
1521
+ remaining_work_summary=_join_public_parts(objective.raw_summary, objective.coverage_summary, objective.linker_summary),
1522
+ next_step_summary=_public_next_step(payload, fallback=objective.linker_summary),
1523
+ user_attention_required=bool(human_reason),
1524
+ human_reason=human_reason,
1525
+ internal_terms_present=False,
1526
+ )
1527
+
1528
+
1529
+ def _generic_public_report_view_model(
1530
+ payload: JsonObject,
1531
+ objective: WorkflowPrimaryObjectiveSummary,
1532
+ ) -> WorkflowPublicReportViewModel:
1533
+ human_reason = _human_decision_reason(payload)
1534
+ return WorkflowPublicReportViewModel(
1535
+ workflow=objective.workflow,
1536
+ run_id=objective.run_id,
1537
+ objective_answer=_generic_public_objective_answer(objective),
1538
+ headline=objective.objective,
1539
+ mutation_state=objective.mutation_state,
1540
+ mutation_summary=objective.mutation_summary,
1541
+ remaining_work_summary=objective.remaining_work_summary,
1542
+ next_step_summary=_public_next_step(payload, fallback=objective.next_step_summary),
1543
+ user_attention_required=bool(human_reason),
1544
+ human_reason=human_reason,
1545
+ internal_terms_present=False,
1546
+ )
1547
+
1548
+
1549
+ def _fix_wiki_public_objective_answer(value: str) -> WorkflowPublicObjectiveAnswer:
1550
+ match value:
1551
+ case "yes":
1552
+ return "yes"
1553
+ case "waiting_agent":
1554
+ return "waiting_agent"
1555
+ case "waiting_external":
1556
+ return "waiting_external"
1557
+ case "failed":
1558
+ return "failed"
1559
+ case "no":
1560
+ return "no"
1561
+ case _:
1562
+ return "partial"
1563
+
1564
+
1565
+ def _process_chats_public_objective_answer(value: str) -> WorkflowPublicObjectiveAnswer:
1566
+ match value:
1567
+ case "published" | "completed" | "completed_with_link_blockers" | "no_pending":
1568
+ return "yes"
1569
+ case "blocked":
1570
+ return "no"
1571
+ case "failed":
1572
+ return "failed"
1573
+ case _:
1574
+ return "partial"
1575
+
1576
+
1577
+ def _generic_public_objective_answer(
1578
+ objective: WorkflowPrimaryObjectiveSummary,
1579
+ ) -> WorkflowPublicObjectiveAnswer:
1580
+ if objective.completed:
1581
+ return "yes"
1582
+ if objective.status == "failed" or "failed" in objective.status:
1583
+ return "failed"
1584
+ if objective.status == "waiting_external" or "waiting_external" in objective.status:
1585
+ return "waiting_external"
1586
+ if objective.status == "waiting_human" or "waiting_human" in objective.status:
1587
+ return "waiting_human"
1588
+ if objective.status == "blocked" or "blocked" in objective.status:
1589
+ return "no"
1590
+ if objective.status == "waiting_agent" or "waiting_agent" in objective.status:
1591
+ return "waiting_agent"
1592
+ return "partial"
1593
+
1594
+
1595
+ def _human_decision_reason(payload: JsonObject) -> str:
1596
+ packet = _human_decision_packet(payload)
1597
+ if packet is None:
1598
+ return ""
1599
+ for value in (packet.why_this_needs_you, packet.question, packet.evidence_summary, packet.type, packet.kind):
1600
+ if value.strip():
1601
+ return value.strip()
1602
+ return "Decisao humana pendente."
1603
+
1604
+
1605
+ def _human_decision_packet(payload: JsonObject) -> _HumanDecisionPacketFields | None:
1606
+ packet = _object_field(payload, "human_decision_packet")
1607
+ if not packet:
1608
+ return None
1609
+ return _HumanDecisionPacketFields.model_validate(
1610
+ _field_payload(packet, ("why_this_needs_you", "question", "evidence_summary", "type", "kind"))
1611
+ )
1612
+
1613
+
1614
+ def _public_next_step(payload: JsonObject, *, fallback: str) -> str:
1615
+ progress = _PublicProgressFields.model_validate(
1616
+ _field_payload(_object_field(payload, "progress_view_model"), ("user_action",))
1617
+ )
1618
+ user_action = progress.user_action.strip()
1619
+ if user_action:
1620
+ return user_action
1621
+ receipt = _PublicReceiptFields.model_validate(_field_payload(_object_field(payload, "receipt"), ("next_action",)))
1622
+ next_action = receipt.next_action.strip()
1623
+ if next_action:
1624
+ return next_action
1625
+ return fallback
1626
+
1627
+
1628
+ def _join_public_parts(*parts: str) -> str:
1629
+ cleaned = [part.strip() for part in parts if part.strip()]
1630
+ if not cleaned:
1631
+ return "Sem pendencias descritas."
1632
+ return " ".join(cleaned)
1633
+
1634
+
1635
+ def _primary_objective_payload_findings(
1636
+ payload: JsonObject,
1637
+ objective: PrimaryObjectiveSummary,
1638
+ ) -> list[AgentRunReportFinding]:
1639
+ if not isinstance(objective, ProcessChatsPrimaryObjectiveSummary):
1640
+ return []
1641
+ if objective.process_status != "unknown":
1642
+ return []
1643
+ terminal = _ProcessChatsTerminalFields.model_validate(
1644
+ _field_payload(payload, ("workflow", "phase", "status", "item_count"))
1645
+ )
1646
+ workflow = terminal.workflow
1647
+ phase = terminal.phase
1648
+ status = terminal.status
1649
+ item_count = terminal.item_count or 0
1650
+ if workflow != "/mednotes:process-chats":
1651
+ return []
1652
+ if phase not in {"triage", "architect", "publish_dry_run", "publish_apply"} and not item_count:
1653
+ return []
1654
+ return [
1655
+ AgentRunReportFinding(
1656
+ code=AgentRunReportFindingCode.PROCESS_CHATS_PRIMARY_OBJECTIVE_UNRESOLVED,
1657
+ severity="high",
1658
+ source="workflow_payload",
1659
+ source_field="workflow/phase/status",
1660
+ expected="process-chats deve terminar em preview/publicação/linker ou blocker explícito antes do relatório final",
1661
+ actual=f"phase={phase or 'missing'} status={status or 'missing'} item_count={item_count}",
1662
+ message=(
1663
+ "O payload oficial ainda não prova que process-chats cumpriu o objetivo primário."
1664
+ ),
1665
+ next_action=(
1666
+ "Continuar a rota oficial de process-chats até publicar/preparar preview com coverage, "
1667
+ "rodar linker ou emitir blocker real antes de concluir."
1668
+ ),
1669
+ )
1670
+ ]
1671
+
1672
+
1673
+ def _missing_fsm_primary_objective_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
1674
+ schema = _optional_text(payload, "schema")
1675
+ workflow = _optional_text(payload, "workflow")
1676
+ if schema not in FSM_FIRST_SCHEMAS:
1677
+ if workflow == "/mednotes:process-chats":
1678
+ return [
1679
+ AgentRunReportFinding(
1680
+ code=AgentRunReportFindingCode.PROCESS_CHATS_PRIMARY_OBJECTIVE_UNRESOLVED,
1681
+ severity="high",
1682
+ source="workflow_payload",
1683
+ source_field="reports.details.primary_objective_summary",
1684
+ expected="process-chats-fsm-result.v1 com reports.details.primary_objective_summary tipado",
1685
+ actual=schema or "schema ausente",
1686
+ message="O payload não trouxe o resumo primário canônico emitido pela FSM de process-chats.",
1687
+ next_action=(
1688
+ "Reexecutar /mednotes:process-chats pela rota FSM-first antes de validar o relatório final."
1689
+ ),
1690
+ )
1691
+ ]
1692
+ return []
1693
+ return [
1694
+ AgentRunReportFinding(
1695
+ code=AgentRunReportFindingCode.PRIMARY_OBJECTIVE_OMITTED,
1696
+ severity="high",
1697
+ source="workflow_payload",
1698
+ source_field="reports.details.primary_objective_summary",
1699
+ expected="payload FSM-first com reports.details.primary_objective_summary tipado",
1700
+ actual=schema or "schema ausente",
1701
+ message=f"O payload de {workflow or 'workflow FSM-first'} não trouxe o resumo primário canônico emitido pela FSM.",
1702
+ next_action="Corrigir a projeção FSM para emitir primary_objective_summary antes de validar relatório final.",
1703
+ )
1704
+ ]
1705
+
1706
+
1707
+ def _safe_positive_int(value: object) -> int:
1708
+ if isinstance(value, bool) or value is None:
1709
+ return 0
1710
+ if isinstance(value, int | float):
1711
+ return max(0, int(value))
1712
+ if isinstance(value, str):
1713
+ try:
1714
+ return max(0, int(value))
1715
+ except ValueError:
1716
+ return 0
1717
+ return 0
1718
+
1719
+
1720
+ def _public_output_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
1721
+ findings: list[AgentRunReportFinding] = []
1722
+ for source_field, text in _public_text_sources(payload):
1723
+ lowered = text.lower()
1724
+ hits = [term for term in PUBLIC_OUTPUT_FORBIDDEN_TERMS if term in lowered]
1725
+ if not hits:
1726
+ continue
1727
+ findings.append(
1728
+ AgentRunReportFinding(
1729
+ code=AgentRunReportFindingCode.PUBLIC_OUTPUT_INTERNAL_TERM_LEAK,
1730
+ severity="medium",
1731
+ source="workflow_payload",
1732
+ source_field=source_field,
1733
+ expected="linguagem pública sem comandos internos",
1734
+ actual=", ".join(hits),
1735
+ message="O payload público do workflow expôs termos internos de automação/desenvolvimento.",
1736
+ next_action=(
1737
+ "Trocar o texto público por linguagem de usuário; deixe comandos, schemas, recibos e hashes "
1738
+ "apenas em JSON/logs técnicos."
1739
+ ),
1740
+ evidence={"forbidden_terms": hits},
1741
+ )
1742
+ )
1743
+ return findings
1744
+
1745
+
1746
+ def _final_report_internal_term_findings(final_text: str) -> list[AgentRunReportFinding]:
1747
+ lowered = final_text.lower()
1748
+ hits = [term for term in PUBLIC_OUTPUT_FORBIDDEN_TERMS if term in lowered]
1749
+ if not hits:
1750
+ return []
1751
+ return [
1752
+ AgentRunReportFinding(
1753
+ code=AgentRunReportFindingCode.PUBLIC_OUTPUT_INTERNAL_TERM_LEAK,
1754
+ severity="medium",
1755
+ source="final_report",
1756
+ source_field="final_report_text",
1757
+ expected="resposta pública sem nomes de campos, recibos, hashes ou estado técnico do guard",
1758
+ actual=", ".join(hits),
1759
+ message="A resposta final do agente expôs termos internos de automação/desenvolvimento.",
1760
+ next_action=(
1761
+ "Reescrever a resposta final em linguagem de usuário; deixe nomes de campos, recibos, "
1762
+ "hashes e detalhes técnicos do guard apenas em logs/JSON."
1763
+ ),
1764
+ evidence={"forbidden_terms": hits},
1765
+ )
1766
+ ]
1767
+
1768
+
1769
+ def _public_text_sources(payload: JsonObject) -> list[tuple[str, str]]:
1770
+ sources: list[tuple[str, str]] = []
1771
+ progress = _object_field(payload, "progress_view_model")
1772
+ receipt = _object_field(payload, "receipt")
1773
+ decision = _object_field(payload, "decision")
1774
+ reports = _object_field(payload, "reports")
1775
+ for field, value in (
1776
+ ("progress_view_model.message", _optional_text(progress, "message")),
1777
+ ("progress_view_model.user_action", _optional_text(progress, "user_action")),
1778
+ ("receipt.next_action", _optional_text(receipt, "next_action")),
1779
+ ("decision.public_summary", _optional_text(decision, "public_summary")),
1780
+ ("decision.next_action", _optional_text(decision, "next_action")),
1781
+ ("reports.summary", _optional_text(reports, "summary")),
1782
+ ):
1783
+ if value.strip():
1784
+ sources.append((field, value))
1785
+ if "public_report" in reports:
1786
+ public_report = WorkflowPublicReport.model_validate(reports["public_report"])
1787
+ if public_report.headline.strip():
1788
+ sources.append(("reports.public_report.headline", public_report.headline))
1789
+ for index, line in enumerate(public_report.lines):
1790
+ if line.strip():
1791
+ sources.append((f"reports.public_report.lines[{index}]", line))
1792
+ return sources
1793
+
1794
+
1795
+ def _optional_text(source: JsonObject, field_name: str) -> str:
1796
+ if field_name not in source or source[field_name] is None:
1797
+ return ""
1798
+ value = source[field_name]
1799
+ if not isinstance(value, str):
1800
+ raise ValueError(f"{field_name} must be text")
1801
+ return value
1802
+
1803
+
1804
+ def _stale_next_action_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
1805
+ progress = _object_field(payload, "progress_view_model")
1806
+ receipt = _object_field(payload, "receipt")
1807
+ decision = _object_field(payload, "decision")
1808
+ diagnostic = _object_field(payload, "diagnostic_context")
1809
+ related_state = _AgentReportRelatedRecoveryFields.model_validate(
1810
+ _field_payload(_object_field(diagnostic, "related_notes_recovery_state"), ("status",))
1811
+ )
1812
+ apply_context = _AgentReportApplyFields.model_validate(
1813
+ _field_payload(_object_field(diagnostic, "apply"), ("requested_apply",))
1814
+ )
1815
+ status = _optional_text(progress, "status") or _optional_text(receipt, "status")
1816
+ requested_apply = apply_context.requested_apply is True
1817
+ texts = [
1818
+ ("receipt.next_action", _optional_text(receipt, "next_action")),
1819
+ ("progress_view_model.user_action", _optional_text(progress, "user_action")),
1820
+ ("progress_view_model.resume_action", _optional_text(progress, "resume_action")),
1821
+ ("decision.next_action", _optional_text(decision, "next_action")),
1822
+ ]
1823
+ findings: list[AgentRunReportFinding] = []
1824
+ for source_field, text in texts:
1825
+ folded = _fold_text(text)
1826
+ if not folded:
1827
+ continue
1828
+ reason = ""
1829
+ if status == "waiting_external" and re.search(r"\b(dry-run|preview|previa|diagnostico)\b", folded):
1830
+ reason = "waiting_external_next_action_repeats_preview"
1831
+ if (
1832
+ status == "waiting_external"
1833
+ and related_state.status == "waiting_for_retry"
1834
+ and "export" in folded
1835
+ and "retom" not in folded
1836
+ ):
1837
+ reason = "related_notes_wait_next_action_regenerates_export"
1838
+ if requested_apply and status in NON_SUCCESS_STATUSES and re.search(r"\b(dry-run|preview|previa)\b", folded):
1839
+ reason = "apply_block_next_action_loops_to_preview"
1840
+ if not reason:
1841
+ continue
1842
+ findings.append(
1843
+ AgentRunReportFinding(
1844
+ code=AgentRunReportFindingCode.STALE_NEXT_ACTION,
1845
+ severity="high",
1846
+ source="workflow_payload",
1847
+ source_field=source_field,
1848
+ expected="próxima ação coerente com status/estado FSM",
1849
+ actual=text,
1850
+ message="A próxima ação pública ficou stale ou circular em relação ao estado canônico do workflow.",
1851
+ next_action="Gerar next_action a partir de progress_view_model/decision/receipt canônicos e revalidar o payload.",
1852
+ evidence={"reason": reason},
1853
+ )
1854
+ )
1855
+ return findings
1856
+
1857
+
1858
+ def _workflow_payload_consistency_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
1859
+ progress = _ProgressTruthFields.model_validate(
1860
+ _field_payload(_object_field(payload, "progress_view_model"), ("status", "can_continue_now"))
1861
+ )
1862
+ if not _agent_directive_requires_waiting_agent_continuation(payload):
1863
+ return []
1864
+ if progress.status == "waiting_agent" and progress.can_continue_now is True:
1865
+ return []
1866
+ return [
1867
+ AgentRunReportFinding(
1868
+ code=AgentRunReportFindingCode.WORKFLOW_CONTRACT_CONTRADICTION,
1869
+ severity="high",
1870
+ source="workflow_payload",
1871
+ source_field="agent_directive.control",
1872
+ expected="agent_directive com effects executáveis deve projetar progress_view_model.status=waiting_agent e can_continue_now=true",
1873
+ actual=f"status={progress.status or 'missing'} can_continue_now={progress.can_continue_now}",
1874
+ message=(
1875
+ "O payload oficial mistura continuação assistida executável com estado que não autoriza continuar."
1876
+ ),
1877
+ next_action=(
1878
+ "Corrigir a projeção FSM antes de confiar no relatório do agente ou repetir o experimento."
1879
+ ),
1880
+ )
1881
+ ]
1882
+
1883
+
1884
+ def _waiting_agent_continuation_findings(
1885
+ payload: JsonObject,
1886
+ transcript: object,
1887
+ final_text: str,
1888
+ runtime_log_text: str,
1889
+ ) -> list[AgentRunReportFinding]:
1890
+ status, can_continue = _agent_continuation_status(payload)
1891
+ if status != "waiting_agent" or can_continue is not True:
1892
+ return []
1893
+ if not _agent_directive_requires_waiting_agent_continuation(payload):
1894
+ return []
1895
+ if _transcript_attempted_waiting_agent_continuation(
1896
+ transcript
1897
+ ) or _runtime_log_attempted_waiting_agent_continuation(runtime_log_text):
1898
+ return []
1899
+ if _reported_runtime_continuation_unavailable(final_text):
1900
+ return []
1901
+ return [
1902
+ AgentRunReportFinding(
1903
+ code=AgentRunReportFindingCode.WAITING_AGENT_CONTINUATION_OMITTED,
1904
+ severity="high",
1905
+ source="transcript",
1906
+ source_field="progress_view_model.status",
1907
+ expected="agente deve continuar pelos effects do agent_directive antes do relatório final",
1908
+ actual="relatório final emitido sem subagente/aplicação de reescrita",
1909
+ message="O workflow ficou em waiting_agent com continuação automática pronta, mas o agente encerrou sem executar a continuação.",
1910
+ next_action="Continuar pelo agent_directive.control.effects ou reportar explicitamente a incapacidade da CLI de invocar o subagente.",
1911
+ )
1912
+ ]
1913
+
1914
+
1915
+ def _agent_directive_requires_waiting_agent_continuation(payload: JsonObject) -> bool:
1916
+ control = _agent_directive_control(payload)
1917
+ if control.status != "waiting_agent" or control.capabilities.continue_ is not True:
1918
+ return False
1919
+ return bool(control.effects or control.resume.strip())
1920
+
1921
+
1922
+ def _agent_continuation_status(payload: JsonObject) -> tuple[str, bool | None]:
1923
+ control = _agent_directive_control(payload)
1924
+ if control.status:
1925
+ return control.status, control.capabilities.continue_
1926
+ progress = _ProgressTruthFields.model_validate(
1927
+ _field_payload(_object_field(payload, "progress_view_model"), ("status", "can_continue_now"))
1928
+ )
1929
+ return progress.status, progress.can_continue_now
1930
+
1931
+
1932
+ def _ready_continuation_stopped_findings(
1933
+ payload: JsonObject,
1934
+ transcript: object,
1935
+ final_text: str,
1936
+ runtime_log_text: str,
1937
+ ) -> list[AgentRunReportFinding]:
1938
+ status, can_continue = _agent_continuation_status(payload)
1939
+ if status != "waiting_agent" or can_continue is not True:
1940
+ return []
1941
+ if not _agent_directive_requires_waiting_agent_continuation(payload):
1942
+ return []
1943
+ transcript_attempted = _transcript_attempted_waiting_agent_continuation(transcript)
1944
+ runtime_attempted = _runtime_log_attempted_waiting_agent_continuation(runtime_log_text)
1945
+ if not (transcript_attempted or runtime_attempted):
1946
+ return []
1947
+ if _reported_runtime_continuation_unavailable(final_text):
1948
+ return []
1949
+ if _transcript_reports_legitimate_specialist_stop(
1950
+ transcript,
1951
+ final_text,
1952
+ ) or _runtime_log_reports_legitimate_specialist_stop(runtime_log_text, final_text):
1953
+ return []
1954
+ folded = _fold_text(final_text)
1955
+ if not any(marker in folded for marker in ("proxima acao", "próxima ação", "retomar", "restam", "restantes")):
1956
+ return []
1957
+ return [
1958
+ AgentRunReportFinding(
1959
+ code=AgentRunReportFindingCode.READY_CONTINUATION_STOPPED,
1960
+ severity="high",
1961
+ source="workflow_payload",
1962
+ source_field="progress_view_model.status",
1963
+ expected="waiting_agent/can_continue_now=true deve continuar pela rota oficial ate quota, capacidade, validacao ruim ou fila vazia",
1964
+ actual="relatório final encerrou a rodada com continuação executável ainda pronta",
1965
+ message=(
1966
+ "O agente começou a continuação automática, mas parou e pediu retomada mesmo com o workflow ainda executável."
1967
+ ),
1968
+ next_action=(
1969
+ "Continuar pelo agent_directive.control.effects em vez de encerrar; se parar, reporte quota/capacidade/validação real como blocker."
1970
+ ),
1971
+ )
1972
+ ]
1973
+
1974
+
1975
+ def _reported_runtime_continuation_unavailable(final_text: str) -> bool:
1976
+ if not final_text:
1977
+ return False
1978
+ return bool(RUNTIME_CONTINUATION_UNAVAILABLE_RE.search(final_text))
1979
+
1980
+
1981
+ def _transcript_reports_legitimate_specialist_stop(transcript: object, final_text: str) -> bool:
1982
+ folded = _fold_text(final_text)
1983
+ if not folded:
1984
+ return False
1985
+ for event in _iter_transcript_events(transcript):
1986
+ if event.event_type.casefold() not in {"tool_result", "run_command"}:
1987
+ continue
1988
+ output_text = _transcript_tool_output_text(event)
1989
+ payload = _json_payload_from_tool_output(output_text)
1990
+ if not payload:
1991
+ if _raw_transcript_output_reports_specialist_stop(output_text, folded):
1992
+ return True
1993
+ continue
1994
+ schema = _optional_text(payload, "schema")
1995
+ if schema != "medical-notes-workbench.specialist-task-runner-result.v1":
1996
+ if _raw_transcript_output_reports_specialist_stop(output_text, folded):
1997
+ return True
1998
+ continue
1999
+ status = _optional_text(payload, "status")
2000
+ blocked_reason = _optional_text(payload, "blocked_reason")
2001
+ if status not in {"blocked", "failed", "waiting_external"}:
2002
+ continue
2003
+ if blocked_reason not in LEGITIMATE_SPECIALIST_STOP_REASONS:
2004
+ continue
2005
+ if _folded_contains_any(
2006
+ folded,
2007
+ (blocked_reason, *ROOT_CAUSE_PUBLIC_LABELS.get(blocked_reason, ())),
2008
+ ):
2009
+ return True
2010
+ return False
2011
+
2012
+
2013
+ def _raw_transcript_output_reports_specialist_stop(output_text: str, folded_final_text: str) -> bool:
2014
+ folded_output = _fold_text(output_text)
2015
+ if not folded_output or not folded_final_text:
2016
+ return False
2017
+ for blocked_reason in LEGITIMATE_SPECIALIST_STOP_REASONS:
2018
+ if blocked_reason not in folded_output:
2019
+ continue
2020
+ if _folded_contains_any(
2021
+ folded_final_text,
2022
+ (blocked_reason, *ROOT_CAUSE_PUBLIC_LABELS.get(blocked_reason, ())),
2023
+ ):
2024
+ return True
2025
+ return False
2026
+
2027
+
2028
+ def _waiting_external_continuation_attempt_findings(
2029
+ payload: JsonObject,
2030
+ transcript: object,
2031
+ ) -> list[AgentRunReportFinding]:
2032
+ progress = _ProgressTruthFields.model_validate(
2033
+ _field_payload(_object_field(payload, "progress_view_model"), ("status", "can_continue_now"))
2034
+ )
2035
+ if progress.status != "waiting_external" and progress.can_continue_now is not False:
2036
+ return []
2037
+ if not _transcript_attempted_waiting_agent_continuation(transcript):
2038
+ return []
2039
+ return [
2040
+ AgentRunReportFinding(
2041
+ code=AgentRunReportFindingCode.WAITING_EXTERNAL_CONTINUATION_ATTEMPTED,
2042
+ severity="critical",
2043
+ source="transcript",
2044
+ source_field="progress_view_model.status",
2045
+ expected="waiting_external/can_continue_now=false deve parar sem invocar especialista ou comandos internos",
2046
+ actual="transcript tentou continuação especializada após o hard stop do workflow",
2047
+ message=(
2048
+ "O agente ignorou um estado não executável do workflow e tentou continuar a reescrita especializada."
2049
+ ),
2050
+ next_action=(
2051
+ "Não aplicar outputs dessa tentativa; corrigir o relatório/agente e retomar somente quando "
2052
+ "um runner oficial produzir recibo tipado."
2053
+ ),
2054
+ )
2055
+ ]
2056
+
2057
+
2058
+ def _specialist_completed_apply_step_findings(transcript: object) -> list[AgentRunReportFinding]:
2059
+ pending_work_id = ""
2060
+ pending_apply_command = ""
2061
+ for event in _iter_transcript_events(transcript):
2062
+ event_type = event.event_type.casefold()
2063
+ if event_type in {"tool_result", "run_command"}:
2064
+ payload = _json_payload_from_tool_output(_transcript_tool_output_text(event))
2065
+ result = _SpecialistTaskRunnerResultFields.model_validate(
2066
+ _field_payload(payload, ("schema", "status", "work_id", "next_apply_step"))
2067
+ )
2068
+ if result.schema_id == "medical-notes-workbench.specialist-task-runner-result.v1" and result.status == "completed":
2069
+ pending_work_id = result.work_id
2070
+ if result.next_apply_step:
2071
+ pending_apply_command = _optional_text(result.next_apply_step, "command_family")
2072
+ if not pending_apply_command:
2073
+ pending_apply_command = "apply-specialist-style-rewrite"
2074
+ continue
2075
+ if event_type != "tool_use" or not pending_work_id:
2076
+ continue
2077
+ command = _event_parameter_text(event, "command")
2078
+ if not command:
2079
+ tool_name = event.tool_name.casefold()
2080
+ if tool_name == "read_file":
2081
+ return [_specialist_apply_step_omitted_finding(pending_work_id, "read_file")]
2082
+ continue
2083
+ folded = _fold_text(command)
2084
+ if pending_apply_command and pending_apply_command in folded and pending_work_id in command:
2085
+ pending_work_id = ""
2086
+ pending_apply_command = ""
2087
+ continue
2088
+ if _is_command_before_required_specialist_apply(folded):
2089
+ return [_specialist_apply_step_omitted_finding(pending_work_id, command)]
2090
+ return []
2091
+
2092
+
2093
+ def _opencode_specialist_receipt_step_findings(
2094
+ payload: JsonObject,
2095
+ transcript: object,
2096
+ ) -> list[AgentRunReportFinding]:
2097
+ batch = _specialist_runtime_batch_from_agent_directive(payload)
2098
+ if batch.phase != "style_rewrite":
2099
+ return []
2100
+ pending_work_ids: set[str] = set()
2101
+ for event in _iter_transcript_events(transcript):
2102
+ metadata = _opencode_task_metadata_from_event(event)
2103
+ if metadata is not None and metadata.work_id:
2104
+ pending_work_ids.add(metadata.work_id)
2105
+ continue
2106
+ if event.event_type.casefold() != "tool_use":
2107
+ continue
2108
+ command = _event_parameter_text(event, "command")
2109
+ if not command:
2110
+ continue
2111
+ folded = _fold_text(command)
2112
+ finalized_work_id = _command_argument(command, "--work-id") if "finalize-opencode-specialist-task" in folded else ""
2113
+ if finalized_work_id and finalized_work_id in pending_work_ids:
2114
+ pending_work_ids.remove(finalized_work_id)
2115
+ continue
2116
+ if "apply-specialist-style-rewrite" not in folded:
2117
+ continue
2118
+ work_id = _command_argument(command, "--work-id")
2119
+ if pending_work_ids and (not work_id or work_id in pending_work_ids):
2120
+ return [_specialist_apply_step_omitted_finding(work_id or sorted(pending_work_ids)[0], command)]
2121
+ return []
2122
+
2123
+
2124
+ def _is_command_before_required_specialist_apply(folded_command: str) -> bool:
2125
+ return any(
2126
+ marker in folded_command
2127
+ for marker in (
2128
+ "fix-wiki --apply",
2129
+ "plan-subagents",
2130
+ "finalize-agy-specialist-task",
2131
+ "finalize-opencode-specialist-task",
2132
+ "finalize-style-rewrite-output",
2133
+ "collect-style-rewrite-outputs",
2134
+ "apply-style-rewrite",
2135
+ )
2136
+ )
2137
+
2138
+
2139
+ def _specialist_apply_step_omitted_finding(work_id: str, actual: str) -> AgentRunReportFinding:
2140
+ return AgentRunReportFinding(
2141
+ code=AgentRunReportFindingCode.SPECIALIST_APPLY_STEP_OMITTED,
2142
+ severity="high",
2143
+ source="transcript",
2144
+ source_field="tool_result.output.next_apply_step",
2145
+ expected=(
2146
+ "quando a etapa especialista retorna completed, o proximo comando relevante deve ser "
2147
+ "apply-specialist-style-rewrite para o mesmo work_id"
2148
+ ),
2149
+ actual=actual,
2150
+ message=(
2151
+ "O agente recebeu uma reescrita especialista validada, mas desviou antes de aplicar o recibo oficial."
2152
+ ),
2153
+ next_action=(
2154
+ "Usar next_apply_step.arguments imediatamente após a etapa especialista completed; "
2155
+ "não ler manifesto, rerodar fix-wiki, chamar plan-subagents ou lançar outro especialista antes do apply."
2156
+ ),
2157
+ evidence={"work_id": work_id},
2158
+ )
2159
+
2160
+
2161
+ def _transcript_attempted_waiting_agent_continuation(transcript: object) -> bool:
2162
+ def visit(value: object) -> bool:
2163
+ if isinstance(value, list):
2164
+ return any(visit(item) for item in value)
2165
+ if not isinstance(value, dict):
2166
+ return False
2167
+ event = _transcript_event_fields(value)
2168
+ if event is not None and event.event_type.casefold() in {"tool_use", "tool_result"}:
2169
+ raw_event = repr(event.model_dump(mode="json")).casefold()
2170
+ if any(marker in raw_event for marker in WAITING_AGENT_CONTINUATION_MARKERS):
2171
+ return True
2172
+ event_type = str(value.get("type") or "").upper()
2173
+ if event_type == "RUN_COMMAND":
2174
+ raw = repr(value).casefold()
2175
+ if any(marker in raw for marker in WAITING_AGENT_CONTINUATION_MARKERS):
2176
+ return True
2177
+ if event_type == "PLANNER_RESPONSE":
2178
+ tool_calls = value.get("tool_calls")
2179
+ if isinstance(tool_calls, list):
2180
+ raw = repr(tool_calls).casefold()
2181
+ if any(marker in raw for marker in WAITING_AGENT_CONTINUATION_MARKERS):
2182
+ return True
2183
+ if _looks_like_saved_gemini_tool_call(value):
2184
+ raw = repr(value).casefold()
2185
+ if any(marker in raw for marker in WAITING_AGENT_CONTINUATION_MARKERS):
2186
+ return True
2187
+ for child in _transcript_child_containers(value):
2188
+ if visit(child):
2189
+ return True
2190
+ return False
2191
+
2192
+ return visit(transcript)
2193
+
2194
+
2195
+ def _runtime_log_attempted_waiting_agent_continuation(runtime_log_text: str) -> bool:
2196
+ folded = _fold_text(runtime_log_text)
2197
+ if not folded.strip():
2198
+ return False
2199
+ return _folded_contains_any(folded, WAITING_AGENT_CONTINUATION_MARKERS)
2200
+
2201
+
2202
+ def _runtime_log_reports_legitimate_specialist_stop(runtime_log_text: str, final_text: str) -> bool:
2203
+ folded_log = _fold_text(runtime_log_text)
2204
+ folded_final = _fold_text(final_text)
2205
+ if not folded_log.strip() or not folded_final.strip():
2206
+ return False
2207
+ if not _runtime_log_attempted_waiting_agent_continuation(runtime_log_text):
2208
+ return False
2209
+ for blocked_reason in LEGITIMATE_SPECIALIST_STOP_REASONS:
2210
+ if not _folded_contains_any(folded_log, (blocked_reason,)):
2211
+ continue
2212
+ if _folded_contains_any(
2213
+ folded_final,
2214
+ (blocked_reason, *ROOT_CAUSE_PUBLIC_LABELS.get(blocked_reason, ())),
2215
+ ):
2216
+ return True
2217
+ quota_markers = (
2218
+ "terminalquotaerror",
2219
+ "quota_exhausted",
2220
+ "exhausted your capacity",
2221
+ "capacity on this model",
2222
+ )
2223
+ if _folded_contains_any(folded_log, quota_markers) and _folded_contains_any(
2224
+ folded_final,
2225
+ (
2226
+ "specialist_model_quota_exhausted",
2227
+ *ROOT_CAUSE_PUBLIC_LABELS["specialist_model_quota_exhausted"],
2228
+ ),
2229
+ ):
2230
+ return True
2231
+ return False
2232
+
2233
+
2234
+ def _transcript_used_native_specialist_invocation(transcript: object) -> bool:
2235
+ native_tool_names = {"invoke_agent", "invoke_subagent", "define_subagent", "send_message"}
2236
+ for event in _iter_transcript_events(transcript):
2237
+ tool_name = event.tool_name.casefold()
2238
+ raw_event = repr(event.model_dump(mode="json")).casefold()
2239
+ if tool_name in native_tool_names and (
2240
+ "med-knowledge-architect" in raw_event or "style_rewrite" in raw_event
2241
+ ):
2242
+ return True
2243
+ if tool_name in {"run_command", "run_shell_command"}:
2244
+ command = _event_parameter_text(event, "command").casefold()
2245
+ if (
2246
+ "med-knowledge-architect" in command
2247
+ or "finalize-style-rewrite-output" in command
2248
+ or "apply-style-rewrite" in command
2249
+ ):
2250
+ return True
2251
+ return False
2252
+
2253
+
2254
+ def _looks_like_saved_gemini_tool_call(value: JsonObject) -> bool:
2255
+ return isinstance(value.get("name"), str) and (
2256
+ "args" in value
2257
+ or "functionResponse" in value
2258
+ or "result" in value
2259
+ or "resultDisplay" in value
2260
+ )
2261
+
2262
+
2263
+ def _blocked_workflow_tool_result_findings(
2264
+ transcript: object,
2265
+ final_text: str,
2266
+ ) -> list[AgentRunReportFinding]:
2267
+ blocked_results = _blocked_workflow_tool_results(transcript)
2268
+ if not blocked_results:
2269
+ return []
2270
+ folded = _fold_text(final_text)
2271
+ findings: list[AgentRunReportFinding] = []
2272
+ seen: set[str] = set()
2273
+ for result in blocked_results:
2274
+ key = f"{result.tool_name}:{result.phase}:{result.blocked_reason}:{result.work_id}"
2275
+ if key in seen:
2276
+ continue
2277
+ seen.add(key)
2278
+ reason_folded = _fold_text(result.blocked_reason)
2279
+ if folded and reason_folded.strip() and reason_folded in folded:
2280
+ continue
2281
+ if _final_report_explains_blocked_tool_result(result, folded):
2282
+ continue
2283
+ findings.append(
2284
+ AgentRunReportFinding(
2285
+ code=AgentRunReportFindingCode.BLOCKED_TOOL_RESULT_OMITTED,
2286
+ severity="high",
2287
+ source="transcript",
2288
+ source_field="tool_result.output.blocked_reason",
2289
+ expected="relatório final deve reportar qualquer payload de workflow bloqueado dentro de tool_result",
2290
+ actual=result.blocked_reason,
2291
+ message=(
2292
+ "O transcript contém um comando com tool status=success, mas o payload oficial dentro "
2293
+ "do output ficou bloqueado."
2294
+ ),
2295
+ next_action=(
2296
+ "Reportar o blocked_reason literal, explicar o impacto no workflow e não tratar a "
2297
+ "tool call como sucesso do workflow."
2298
+ ),
2299
+ evidence={
2300
+ "tool_name": result.tool_name,
2301
+ "phase": result.phase,
2302
+ "work_id": result.work_id,
2303
+ "status": result.status,
2304
+ },
2305
+ )
2306
+ )
2307
+ return findings
2308
+
2309
+
2310
+ def _final_report_explains_blocked_tool_result(result: _BlockedWorkflowToolResult, folded_text: str) -> bool:
2311
+ if not folded_text:
2312
+ return False
2313
+ if result.blocked_reason != "style_rewrite_still_requires_rewrite":
2314
+ return False
2315
+ has_rewrite_context = any(marker in folded_text for marker in ("reescrita", "rewrite"))
2316
+ has_not_applied = any(
2317
+ marker in folded_text
2318
+ for marker in (
2319
+ "parou antes",
2320
+ "nao foi aplicada",
2321
+ "nao foi aplicado",
2322
+ "não foi aplicada",
2323
+ "não foi aplicado",
2324
+ "nenhuma nota",
2325
+ "pendente",
2326
+ )
2327
+ )
2328
+ has_style_cause = any(
2329
+ marker in folded_text
2330
+ for marker in (
2331
+ "criterios de estilo",
2332
+ "critérios de estilo",
2333
+ "nao atendeu",
2334
+ "não atendeu",
2335
+ "excesso de callouts",
2336
+ "visual didatico pendente",
2337
+ "visual didático pendente",
2338
+ "nota validada",
2339
+ )
2340
+ )
2341
+ return has_rewrite_context and has_not_applied and has_style_cause
2342
+
2343
+
2344
+ def _blocked_workflow_tool_results(transcript: object) -> list[_BlockedWorkflowToolResult]:
2345
+ results: list[_BlockedWorkflowToolResult] = []
2346
+ for event in _iter_transcript_events(transcript):
2347
+ if event.event_type.casefold() not in {"tool_result", "run_command"}:
2348
+ continue
2349
+ payload = _workflow_payload_from_tool_output(_transcript_tool_output_text(event))
2350
+ if not payload:
2351
+ continue
2352
+ status = _optional_text(payload, "status")
2353
+ blocked_reason = _optional_text(payload, "blocked_reason")
2354
+ if status != "blocked" or not blocked_reason:
2355
+ continue
2356
+ results.append(
2357
+ _BlockedWorkflowToolResult(
2358
+ tool_name=event.tool_name,
2359
+ status=status,
2360
+ phase=_optional_text(payload, "phase"),
2361
+ blocked_reason=blocked_reason,
2362
+ work_id=_optional_text(payload, "work_id"),
2363
+ )
2364
+ )
2365
+ return results
2366
+
2367
+
2368
+ def _json_payload_from_tool_output(output: str) -> JsonObject:
2369
+ candidate = output.split("---", 1)[1] if "---" in output else output
2370
+ start = candidate.find("{")
2371
+ if start < 0:
2372
+ return {}
2373
+ decoder = json.JSONDecoder()
2374
+ try:
2375
+ parsed, _end = decoder.raw_decode(candidate[start:])
2376
+ except json.JSONDecodeError:
2377
+ return {}
2378
+ if not isinstance(parsed, dict):
2379
+ return {}
2380
+ return _json_object(parsed)
2381
+
2382
+
2383
+ def _tool_payload_contract_findings(transcript: object) -> list[AgentRunReportFinding]:
2384
+ findings: list[AgentRunReportFinding] = []
2385
+ for event in _iter_transcript_events(transcript):
2386
+ if event.event_type.casefold() not in {"tool_result", "run_command"}:
2387
+ continue
2388
+ payload = _json_payload_from_tool_output(_transcript_tool_output_text(event))
2389
+ schema = _optional_text(payload, "schema") if payload else ""
2390
+ if schema not in STYLE_REWRITE_APPLY_RESULT_SCHEMAS:
2391
+ continue
2392
+ try:
2393
+ StyleRewriteAtomicApplyResult.model_validate(payload)
2394
+ except ValidationError as exc:
2395
+ findings.append(_effect_payload_contract_invalid_finding(schema, exc))
2396
+ return findings
2397
+
2398
+
2399
+ def _effect_payload_contract_invalid_finding(schema: str, exc: ValidationError) -> AgentRunReportFinding:
2400
+ first_error = exc.errors()[0] if exc.errors() else {}
2401
+ location = ".".join(str(part) for part in first_error.get("loc", ())) or "$"
2402
+ message = str(first_error.get("msg") or str(exc))
2403
+ return AgentRunReportFinding(
2404
+ code=AgentRunReportFindingCode.EFFECT_PAYLOAD_CONTRACT_INVALID,
2405
+ severity="critical",
2406
+ source="transcript.tool_result.output",
2407
+ source_field=schema,
2408
+ expected="payload de efeito validado por modelo Pydantic fechado antes de dirigir relatório ou contagem",
2409
+ actual=f"{location}: {message}",
2410
+ message=f"Tool output {schema} violou o contrato tipado antes de poder dirigir o workflow.",
2411
+ next_action=(
2412
+ "Reexecutar ou corrigir o produtor do efeito para emitir payload completo; não usar esse output "
2413
+ "para declarar apply, contagem ou conclusão."
2414
+ ),
2415
+ )
2416
+
2417
+
2418
+ def _transcript_tool_output_text(event: _TranscriptEventFields) -> str:
2419
+ if event.output:
2420
+ return event.output
2421
+ if isinstance(event.content, str):
2422
+ return event.content
2423
+ return ""
2424
+
2425
+
2426
+ def _workflow_payload_from_tool_output(output: str) -> JsonObject:
2427
+ if "blocked_reason" not in output or "blocked" not in output:
2428
+ return {}
2429
+ return _json_payload_from_tool_output(output)
2430
+
2431
+
2432
+ def _iter_transcript_events(transcript: object) -> list[_TranscriptEventFields]:
2433
+ events: list[_TranscriptEventFields] = []
2434
+
2435
+ def visit(value: object) -> None:
2436
+ if isinstance(value, list):
2437
+ for item in value:
2438
+ visit(item)
2439
+ return
2440
+ if not isinstance(value, dict):
2441
+ return
2442
+ event = _transcript_event_fields(value)
2443
+ if event is not None:
2444
+ events.append(event)
2445
+ events.extend(_planner_response_tool_call_events(value))
2446
+ for child in _transcript_child_containers(value):
2447
+ visit(child)
2448
+
2449
+ visit(transcript)
2450
+ return events
2451
+
2452
+
2453
+ def _transcript_child_containers(value: JsonObject) -> list[object]:
2454
+ children: list[object] = []
2455
+ for key in TRANSCRIPT_CHILD_CONTAINER_KEYS:
2456
+ child = value.get(key)
2457
+ if isinstance(child, (dict, list)):
2458
+ children.append(child)
2459
+ return children
2460
+
2461
+
2462
+ def _planner_response_tool_call_events(value: JsonObject) -> list[_TranscriptEventFields]:
2463
+ event_type = str(value.get("type") or "").upper()
2464
+ if event_type != "PLANNER_RESPONSE":
2465
+ return []
2466
+ tool_calls = value.get("tool_calls")
2467
+ if not isinstance(tool_calls, list):
2468
+ return []
2469
+ events: list[_TranscriptEventFields] = []
2470
+ for raw_tool_call in tool_calls:
2471
+ if not isinstance(raw_tool_call, dict):
2472
+ continue
2473
+ args = raw_tool_call.get("args")
2474
+ parameters: JsonObject = {}
2475
+ if isinstance(args, dict):
2476
+ command = args.get("command") or args.get("CommandLine")
2477
+ if isinstance(command, str) and command.strip():
2478
+ parameters["command"] = command
2479
+ tool_name = raw_tool_call.get("name")
2480
+ events.append(
2481
+ _TranscriptEventFields.model_validate(
2482
+ {
2483
+ "type": "tool_use",
2484
+ "tool_name": tool_name if isinstance(tool_name, str) else "",
2485
+ "parameters": parameters,
2486
+ "content": raw_tool_call,
2487
+ }
2488
+ )
2489
+ )
2490
+ return events
2491
+
2492
+
2493
+ def _transcript_event_fields(value: JsonObject) -> _TranscriptEventFields | None:
2494
+ normalized = dict(value)
2495
+ if not normalized.get("tool_name"):
2496
+ tool = normalized.get("tool")
2497
+ if isinstance(tool, str):
2498
+ normalized["tool_name"] = tool
2499
+ parameters = normalized.get("parameters")
2500
+ normalized_parameters = dict(parameters) if isinstance(parameters, dict) else {}
2501
+ metadata = normalized.get("metadata")
2502
+ if isinstance(metadata, dict) and "metadata" not in normalized_parameters:
2503
+ normalized_parameters["metadata"] = metadata
2504
+ if normalized_parameters:
2505
+ normalized["parameters"] = normalized_parameters
2506
+ try:
2507
+ return _TranscriptEventFields.model_validate(normalized)
2508
+ except ValueError:
2509
+ return None
2510
+
2511
+
2512
+ def _event_parameter_text(event: _TranscriptEventFields, field_name: str) -> str:
2513
+ """Read transcript tool parameters only after the event was normalized."""
2514
+
2515
+ try:
2516
+ parameters = _TranscriptTextParameters.model_validate(_field_payload(event.parameters, ("command", "role")))
2517
+ except ValidationError:
2518
+ return ""
2519
+ match field_name:
2520
+ case "command":
2521
+ return parameters.command
2522
+ case "role":
2523
+ return parameters.role
2524
+ case _:
2525
+ raise ValueError(f"unsupported transcript text parameter: {field_name}")
2526
+
2527
+
2528
+ def _opencode_task_metadata_from_event(
2529
+ event: _TranscriptEventFields,
2530
+ ) -> _OpenCodeSpecialistTaskMetadataFields | None:
2531
+ if event.tool_name.casefold() != "task":
2532
+ return None
2533
+ candidates = [
2534
+ event.parameters.get("metadata"),
2535
+ event.parameters.get("task_metadata"),
2536
+ event.parameters.get("taskMetadata"),
2537
+ ]
2538
+ for candidate in candidates:
2539
+ if not isinstance(candidate, dict):
2540
+ continue
2541
+ parsed = _opencode_task_metadata_from_candidate(JsonObjectAdapter.validate_python(candidate))
2542
+ if parsed is not None:
2543
+ return parsed
2544
+ return None
2545
+
2546
+
2547
+ def _opencode_task_metadata_from_candidate(candidate: JsonObject) -> _OpenCodeSpecialistTaskMetadataFields | None:
2548
+ if str(candidate.get("schema") or "") == "medical-notes-workbench.opencode-specialist-task-metadata.v1":
2549
+ try:
2550
+ return _OpenCodeSpecialistTaskMetadataFields.model_validate(candidate)
2551
+ except ValidationError:
2552
+ return None
2553
+ native_model = candidate.get("model")
2554
+ if not isinstance(native_model, dict):
2555
+ return None
2556
+ provider_id = str(native_model.get("providerID") or native_model.get("provider_id") or "").strip()
2557
+ native_model_id = str(native_model.get("modelID") or native_model.get("model_id") or "").strip()
2558
+ if not provider_id and not native_model_id:
2559
+ return None
2560
+ model_id = native_model_id
2561
+ if provider_id and native_model_id and "/" not in native_model_id:
2562
+ model_id = f"{provider_id}/{native_model_id}"
2563
+ payload = {
2564
+ "schema": "medical-notes-workbench.opencode-specialist-task-metadata.v1",
2565
+ "work_id": str(candidate.get("work_id") or candidate.get("workID") or ""),
2566
+ "task_id": str(candidate.get("task_id") or candidate.get("taskID") or ""),
2567
+ "provider_id": provider_id,
2568
+ "model_id": model_id,
2569
+ "model_tier": "specialist",
2570
+ "tool_sequence": ["task"],
2571
+ "prompt_contract": str(candidate.get("prompt_contract") or ""),
2572
+ "raw_content_embedded": None,
2573
+ }
2574
+ try:
2575
+ return _OpenCodeSpecialistTaskMetadataFields.model_validate(payload)
2576
+ except ValidationError:
2577
+ return None
2578
+
2579
+
2580
+ def _workflow_payload_omission_findings(
2581
+ payload: JsonObject,
2582
+ final_text: str,
2583
+ transcript: object | None,
2584
+ ) -> list[AgentRunReportFinding]:
2585
+ findings: list[AgentRunReportFinding] = []
2586
+ folded = _fold_text(final_text)
2587
+ final_report_incomplete = _final_report_looks_like_progress_only(final_text)
2588
+ diagnostic = _object_field(payload, "diagnostic_context")
2589
+ findings.extend(_omitted_agent_event_findings(diagnostic, folded))
2590
+ findings.extend(_omitted_version_control_safety_findings(payload, folded, transcript))
2591
+ findings.extend(_api_accounting_findings(payload, folded))
2592
+ findings.extend(_omitted_operational_warning_findings(diagnostic, folded))
2593
+ findings.extend(
2594
+ _content_quality_audit_findings(
2595
+ payload,
2596
+ folded,
2597
+ final_report_incomplete=final_report_incomplete,
2598
+ )
2599
+ )
2600
+ return findings
2601
+
2602
+
2603
+ def _error_context_root_cause_findings(payload: JsonObject, final_text: str) -> list[AgentRunReportFinding]:
2604
+ root_cause, source_field = _canonical_root_cause(payload)
2605
+ if not root_cause:
2606
+ return []
2607
+ folded = _fold_text(final_text)
2608
+ if _folded_contains_any(
2609
+ folded,
2610
+ (root_cause, *ROOT_CAUSE_PUBLIC_LABELS.get(root_cause, ())),
2611
+ ):
2612
+ return []
2613
+ return [
2614
+ AgentRunReportFinding(
2615
+ code=AgentRunReportFindingCode.MISSING_ERROR_CONTEXT_ROOT_CAUSE,
2616
+ severity="high",
2617
+ source="workflow_payload",
2618
+ source_field=source_field,
2619
+ expected=root_cause,
2620
+ actual="omitted",
2621
+ message="O relatório final omitiu a causa raiz oficial do workflow.",
2622
+ next_action=(
2623
+ "Reescrever o relatório final priorizando error_context.root_cause/decision.reason_code "
2624
+ "antes de resumir exit code ou saída ruidosa da ferramenta."
2625
+ ),
2626
+ evidence={"root_cause": root_cause},
2627
+ )
2628
+ ]
2629
+
2630
+
2631
+ def _canonical_root_cause(payload: JsonObject) -> tuple[str, str]:
2632
+ error_context = _object_field(payload, "error_context")
2633
+ root_cause = _optional_text(error_context, "root_cause")
2634
+ if root_cause:
2635
+ return root_cause, "error_context.root_cause"
2636
+
2637
+ decision = _object_field(payload, "decision")
2638
+ reason_code = _optional_text(decision, "reason_code")
2639
+ if reason_code and reason_code not in NON_ERROR_DECISION_REASON_CODES:
2640
+ return reason_code, "decision.reason_code"
2641
+
2642
+ blocked_reason = _optional_text(payload, "blocked_reason")
2643
+ if blocked_reason:
2644
+ return blocked_reason, "blocked_reason"
2645
+
2646
+ return "", ""
2647
+
2648
+
2649
+ def _omitted_agent_event_findings(diagnostic: JsonObject, folded_final_text: str) -> list[AgentRunReportFinding]:
2650
+ events = _collect_agent_events(diagnostic)
2651
+ relevant = [
2652
+ event
2653
+ for event in events
2654
+ if str(event.get("severity") or "").lower() in {"medium", "high", "critical"}
2655
+ ]
2656
+ if not relevant:
2657
+ return []
2658
+ omitted = [
2659
+ event
2660
+ for event in relevant
2661
+ if not _folded_contains_any(
2662
+ folded_final_text,
2663
+ (str(event.get(key) or "") for key in ("code", "root_cause_code", "type")),
2664
+ )
2665
+ ]
2666
+ if not omitted:
2667
+ return []
2668
+ return [
2669
+ AgentRunReportFinding(
2670
+ code=AgentRunReportFindingCode.AGENT_EVENT_OMITTED,
2671
+ severity="high",
2672
+ source="workflow_payload",
2673
+ source_field="diagnostic_context.agent_events",
2674
+ expected="eventos de agente medium+ devem aparecer no relatório final",
2675
+ actual=", ".join(str(event.get("code") or event.get("type") or "agent_event") for event in omitted[:5]),
2676
+ message="O relatório final omitiu agent_events relevantes emitidos pelo workflow.",
2677
+ next_action="Listar os agent_events relevantes e explicar impacto/mitigação no relatório da rodada.",
2678
+ )
2679
+ ]
2680
+
2681
+
2682
+ def _omitted_version_control_safety_findings(
2683
+ payload: JsonObject,
2684
+ folded_final_text: str,
2685
+ transcript: object | None,
2686
+ ) -> list[AgentRunReportFinding]:
2687
+ safety = _AgentReportVersionControlSafetyFields.model_validate(
2688
+ _field_payload(
2689
+ _object_field(payload, "version_control_safety"),
2690
+ (
2691
+ "mutation_without_guard",
2692
+ "resource_guard_active",
2693
+ "run_finish_seen",
2694
+ "sync_status",
2695
+ "agent_instruction",
2696
+ ),
2697
+ )
2698
+ )
2699
+ findings: list[AgentRunReportFinding] = []
2700
+ if safety.mutation_without_guard is not True:
2701
+ pass
2702
+ elif not _folded_contains_any(
2703
+ folded_final_text,
2704
+ ("mutation_without_guard", "vault_guard", "version control", "controle de versao", "controle de versão"),
2705
+ ):
2706
+ findings.append(
2707
+ AgentRunReportFinding(
2708
+ code=AgentRunReportFindingCode.VERSION_CONTROL_SAFETY_OMITTED,
2709
+ severity="high",
2710
+ source="workflow_payload",
2711
+ source_field="version_control_safety.mutation_without_guard",
2712
+ expected="mutation_without_guard=true deve ser reportado",
2713
+ actual="omitted",
2714
+ message="O relatório final omitiu sinal de mutação sem guard de version control.",
2715
+ next_action="Reportar o sinal de version_control_safety e classificar se é limitação do harness ou bug do workflow.",
2716
+ )
2717
+ )
2718
+ if (
2719
+ safety.resource_guard_active is True
2720
+ and safety.run_finish_seen is False
2721
+ and not _mentions_guard_finish_pending(folded_final_text)
2722
+ and not _accepts_guard_finish_closed_confirmation(safety, folded_final_text)
2723
+ and not _transcript_confirms_guard_finish_closed(transcript)
2724
+ ):
2725
+ findings.append(
2726
+ AgentRunReportFinding(
2727
+ code=AgentRunReportFindingCode.RUN_FINISH_OMITTED,
2728
+ severity="high",
2729
+ source="workflow_payload",
2730
+ source_field="version_control_safety.run_finish_seen",
2731
+ expected="run_finish_seen=false com resource_guard_active=true deve ser reportado",
2732
+ actual="omitted",
2733
+ message="O relatório final omitiu que a proteção do vault ainda estava aberta.",
2734
+ next_action=(
2735
+ "Fechar a proteção pela rota oficial ou reportar explicitamente que o workflow terminou "
2736
+ "com pendência de proteção/version control."
2737
+ ),
2738
+ evidence={"sync_status": safety.sync_status},
2739
+ )
2740
+ )
2741
+ return findings
2742
+
2743
+
2744
+ def _transcript_confirms_guard_finish_closed(transcript: object | None) -> bool:
2745
+ if transcript is None:
2746
+ return False
2747
+ for event in _iter_transcript_events(transcript):
2748
+ if event.event_type.casefold() not in {"tool_result", "run_command"}:
2749
+ continue
2750
+ output_text = _transcript_tool_output_text(event)
2751
+ payload = _json_payload_from_tool_output(output_text)
2752
+ if _payload_confirms_guard_finish_closed(payload):
2753
+ return True
2754
+ folded = _fold_text(output_text)
2755
+ if (
2756
+ "vault-run-finish-public" in folded
2757
+ and "resource_guard_active" in folded
2758
+ and "false" in folded
2759
+ and "run_finish_seen" in folded
2760
+ and "true" in folded
2761
+ ):
2762
+ return True
2763
+ return False
2764
+
2765
+
2766
+ def _payload_confirms_guard_finish_closed(payload: JsonObject) -> bool:
2767
+ if _optional_text(payload, "schema") != "medical-notes-workbench.vault-run-finish-public.v1":
2768
+ return False
2769
+ safety = payload.get("version_control_safety")
2770
+ if not isinstance(safety, dict):
2771
+ return False
2772
+ return safety.get("resource_guard_active") is False and safety.get("run_finish_seen") is True
2773
+
2774
+
2775
+ def _mentions_guard_finish_pending(folded_text: str) -> bool:
2776
+ if not folded_text:
2777
+ return False
2778
+ has_guard = any(
2779
+ marker in folded_text
2780
+ for marker in (
2781
+ "vault_guard",
2782
+ "run_finish",
2783
+ "run-finish",
2784
+ "protecao do vault",
2785
+ "proteção do vault",
2786
+ "version control",
2787
+ "controle de versao",
2788
+ "controle de versão",
2789
+ "alteracoes concorrentes",
2790
+ "alterações concorrentes",
2791
+ "bloqueio de escrita concorrente",
2792
+ "ponto de restauracao",
2793
+ "ponto de restauração",
2794
+ )
2795
+ )
2796
+ has_pending = any(
2797
+ marker in folded_text
2798
+ for marker in (
2799
+ "pendente",
2800
+ "abert",
2801
+ "ativa",
2802
+ "nao encerr",
2803
+ "não encerr",
2804
+ "nao fech",
2805
+ "não fech",
2806
+ "pending_run_finish",
2807
+ )
2808
+ )
2809
+ return has_guard and has_pending
2810
+
2811
+
2812
+ def _accepts_guard_finish_closed_confirmation(
2813
+ safety: _AgentReportVersionControlSafetyFields,
2814
+ folded_text: str,
2815
+ ) -> bool:
2816
+ if not folded_text:
2817
+ return False
2818
+ folded_instruction = _fold_text(safety.agent_instruction)
2819
+ if "antes do run-finish" not in folded_instruction and "before run-finish" not in folded_instruction:
2820
+ return False
2821
+ has_guard = any(
2822
+ marker in folded_text
2823
+ for marker in (
2824
+ "protecao do vault",
2825
+ "proteção do vault",
2826
+ "protecao do repositorio",
2827
+ "proteção do repositório",
2828
+ "vault guard",
2829
+ "vault_guard",
2830
+ "version control",
2831
+ "controle de versao",
2832
+ "controle de versão",
2833
+ )
2834
+ )
2835
+ has_closed = any(
2836
+ marker in folded_text
2837
+ for marker in (
2838
+ "encerrad",
2839
+ "fechad",
2840
+ "finalizad",
2841
+ "repositorio limpo",
2842
+ "repositório limpo",
2843
+ "clean",
2844
+ )
2845
+ )
2846
+ return has_guard and has_closed
2847
+
2848
+
2849
+ def _runtime_log_findings(
2850
+ payload: JsonObject,
2851
+ runtime_log_text: str,
2852
+ final_text: str,
2853
+ transcript: object | None,
2854
+ ) -> list[AgentRunReportFinding]:
2855
+ findings = _runtime_performance_findings(runtime_log_text)
2856
+ findings.extend(_runtime_route_probe_findings(payload, runtime_log_text))
2857
+ findings.extend(_runtime_process_chats_vault_deletion_findings(payload, runtime_log_text))
2858
+ findings.extend(_runtime_specialist_model_policy_findings(payload, runtime_log_text, transcript))
2859
+ folded_log = _fold_text(runtime_log_text)
2860
+ if not folded_log:
2861
+ return findings
2862
+ runtime_errors = _runtime_error_labels(folded_log)
2863
+ if not runtime_errors:
2864
+ return findings
2865
+ folded_final = _fold_text(final_text)
2866
+ omitted = [
2867
+ label
2868
+ for label in runtime_errors
2869
+ if not _folded_contains_any(folded_final, _runtime_error_report_markers(label))
2870
+ ]
2871
+ if not omitted:
2872
+ return findings
2873
+ findings.append(
2874
+ AgentRunReportFinding(
2875
+ code=AgentRunReportFindingCode.RUNTIME_ERROR_OMITTED,
2876
+ severity="high",
2877
+ source="runtime_log",
2878
+ source_field="runtime_log",
2879
+ expected="erros de runtime/headless devem aparecer no relatório final da rodada",
2880
+ actual=", ".join(omitted),
2881
+ message="O log do runtime contém erro relevante que o relatório final do agente não reportou.",
2882
+ next_action=(
2883
+ "Reescrever o relatório final incorporando o erro do runtime e seu impacto no workflow, "
2884
+ "mesmo quando o processo headless retornou exit code 0."
2885
+ ),
2886
+ evidence={"runtime_errors": omitted},
2887
+ )
2888
+ )
2889
+ return findings
2890
+
2891
+
2892
+ def _runtime_process_chats_vault_deletion_findings(
2893
+ payload: JsonObject,
2894
+ runtime_log_text: str,
2895
+ ) -> list[AgentRunReportFinding]:
2896
+ if _optional_text(payload, "workflow") != "/mednotes:process-chats":
2897
+ return []
2898
+ folded_log = runtime_log_text or ""
2899
+ if not folded_log:
2900
+ return []
2901
+ deleted_paths = [
2902
+ match.group("path").strip()
2903
+ for match in PROCESS_CHATS_WIKI_DELETION_RE.finditer(folded_log)
2904
+ if match.group("path").strip()
2905
+ ]
2906
+ if not deleted_paths:
2907
+ return []
2908
+ return [
2909
+ AgentRunReportFinding(
2910
+ code=AgentRunReportFindingCode.PROCESS_CHATS_VAULT_DELETION_WITHOUT_RECEIPT,
2911
+ severity="critical",
2912
+ source="runtime_log",
2913
+ source_field="git status",
2914
+ expected="process-chats não deve apagar notas Wiki sem recibo tipado de merge/delete",
2915
+ actual=", ".join(deleted_paths[:5]),
2916
+ message=(
2917
+ "O runtime observou deleção de nota Wiki durante process-chats sem recibo tipado que autorize essa mutação."
2918
+ ),
2919
+ next_action=(
2920
+ "Parar a rodada, restaurar pelo vault guard/version control e repetir somente pela rota oficial "
2921
+ "de canonical merge/delete com receipt validado."
2922
+ ),
2923
+ evidence={"deleted_paths": deleted_paths[:20]},
2924
+ )
2925
+ ]
2926
+
2927
+
2928
+ def _runtime_specialist_model_policy_findings(
2929
+ payload: JsonObject,
2930
+ runtime_log_text: str,
2931
+ transcript: object | None,
2932
+ ) -> list[AgentRunReportFinding]:
2933
+ batch = _specialist_runtime_batch_from_agent_directive(payload)
2934
+ if batch.phase != "style_rewrite" or not batch.current_batch_items:
2935
+ return []
2936
+ specialist_items = [
2937
+ item
2938
+ for item in batch.current_batch_items
2939
+ if item.required_model_tier in {"specialist", "pro"}
2940
+ or item.preferred_model_tier == "pro"
2941
+ or item.model_policy == "medical_specialist_authoring.v1"
2942
+ or item.agent == "med-knowledge-architect"
2943
+ ]
2944
+ if not specialist_items:
2945
+ return []
2946
+ observed_model = _observed_agy_selected_model(runtime_log_text)
2947
+ if not observed_model or FLASH_MODEL_RE.search(observed_model) is None:
2948
+ return []
2949
+ if transcript is None or not _transcript_used_native_specialist_invocation(transcript):
2950
+ return []
2951
+ return [
2952
+ AgentRunReportFinding(
2953
+ code=AgentRunReportFindingCode.SPECIALIST_MODEL_POLICY_VIOLATION,
2954
+ severity="critical",
2955
+ source="runtime_log",
2956
+ source_field="runtime_log.selected_model+transcript.specialist_invocation",
2957
+ expected="tarefas médicas especializadas exigem modelo especialista/Pro sem fallback para Flash",
2958
+ actual=observed_model,
2959
+ message="O runtime selecionou Flash durante uma tarefa de reescrita médica especializada.",
2960
+ next_action=(
2961
+ "Não aplicar outputs desse lote; relançar a tarefa por runner oficial capaz de garantir "
2962
+ "modelo especialista/Pro e recibo atestado."
2963
+ ),
2964
+ evidence={
2965
+ "observed_model": observed_model,
2966
+ "transcript_specialist_invocation": "native",
2967
+ "work_ids": [item.work_id for item in specialist_items if item.work_id],
2968
+ "required_model_tiers": sorted({item.required_model_tier for item in specialist_items}),
2969
+ "model_policies": sorted({item.model_policy for item in specialist_items if item.model_policy}),
2970
+ },
2971
+ )
2972
+ ]
2973
+
2974
+
2975
+ def _transcript_specialist_model_policy_findings(
2976
+ payload: JsonObject,
2977
+ transcript: object,
2978
+ ) -> list[AgentRunReportFinding]:
2979
+ batch = _specialist_runtime_batch_from_agent_directive(payload)
2980
+ if batch.phase != "style_rewrite":
2981
+ return []
2982
+ specialist_items = [
2983
+ item
2984
+ for item in batch.current_batch_items
2985
+ if item.required_model_tier in {"specialist", "pro"}
2986
+ or item.preferred_model_tier == "pro"
2987
+ or item.model_policy == "medical_specialist_authoring.v1"
2988
+ or item.agent == "med-knowledge-architect"
2989
+ ]
2990
+ if not specialist_items:
2991
+ return []
2992
+ findings: list[AgentRunReportFinding] = []
2993
+ seen: set[tuple[str, str]] = set()
2994
+ for event in _iter_transcript_events(transcript):
2995
+ if event.event_type.casefold() != "tool_use":
2996
+ continue
2997
+ opencode_metadata = _opencode_task_metadata_from_event(event)
2998
+ if opencode_metadata is not None:
2999
+ observed_model = opencode_metadata.model_id
3000
+ if not observed_model or FLASH_MODEL_RE.search(observed_model):
3001
+ key = ("opencode-task-model", observed_model)
3002
+ if key not in seen:
3003
+ seen.add(key)
3004
+ findings.append(
3005
+ AgentRunReportFinding(
3006
+ code=AgentRunReportFindingCode.SPECIALIST_MODEL_POLICY_VIOLATION,
3007
+ severity="critical",
3008
+ source="transcript",
3009
+ source_field="transcript.tool_use.parameters.metadata.model_id",
3010
+ expected=(
3011
+ "OpenCode task especialista deve provar modelo especialista/Pro via "
3012
+ "opencode_task_metadata, sem fallback para Flash/Lite/Nano"
3013
+ ),
3014
+ actual=observed_model or "<missing>",
3015
+ message=(
3016
+ "A task OpenCode de autoria médica especializada registrou modelo ausente "
3017
+ "ou proibido pela política de modelo."
3018
+ ),
3019
+ next_action=(
3020
+ "Descartar outputs sem recibo valido e repetir a task OpenCode com modelo "
3021
+ "especialista aceito antes de aplicar."
3022
+ ),
3023
+ evidence={
3024
+ "harness": "opencode",
3025
+ "observed_model": observed_model,
3026
+ "provider_id": opencode_metadata.provider_id,
3027
+ "task_id": opencode_metadata.task_id,
3028
+ "work_id": opencode_metadata.work_id,
3029
+ "work_ids": [item.work_id for item in specialist_items if item.work_id],
3030
+ },
3031
+ )
3032
+ )
3033
+ command = _event_parameter_text(event, "command")
3034
+ if _command_uses_unverified_specialist_model_escape(command):
3035
+ key = ("unverified-specialist-model-escape", "public-workflow")
3036
+ if key not in seen:
3037
+ seen.add(key)
3038
+ findings.append(
3039
+ AgentRunReportFinding(
3040
+ code=AgentRunReportFindingCode.SPECIALIST_MODEL_POLICY_VIOLATION,
3041
+ severity="critical",
3042
+ source="transcript",
3043
+ source_field="transcript.tool_use.parameters.command.env",
3044
+ expected=(
3045
+ "fluxo publico não deve usar dev-escape para aceitar modelo especialista "
3046
+ "não verificado pelo Workbench"
3047
+ ),
3048
+ actual="MEDNOTES_ALLOW_UNVERIFIED_SPECIALIST_MODEL",
3049
+ message=(
3050
+ "O agente tentou contornar a proveniência de modelo especialista com variável "
3051
+ "de escape de desenvolvedor."
3052
+ ),
3053
+ next_action=(
3054
+ "Descartar o output desse item, reportar a violação e retomar pela rota oficial "
3055
+ "com recibo/proveniência validada pelo Workbench."
3056
+ ),
3057
+ evidence={
3058
+ "work_ids": [item.work_id for item in specialist_items if item.work_id],
3059
+ "tool_name": event.tool_name,
3060
+ },
3061
+ )
3062
+ )
3063
+ return findings
3064
+
3065
+
3066
+ def _command_uses_unverified_specialist_model_escape(command: str) -> bool:
3067
+ if "MEDNOTES_ALLOW_UNVERIFIED_SPECIALIST_MODEL" not in command:
3068
+ return False
3069
+ return "finalize-style-rewrite-output" in command or "apply-specialist-style-rewrite" in command
3070
+
3071
+
3072
+ def _style_rewrite_batch_progress_checkpoint_findings(
3073
+ payload: JsonObject,
3074
+ transcript: object,
3075
+ ) -> list[AgentRunReportFinding]:
3076
+ batch = _specialist_runtime_batch_from_agent_directive(payload)
3077
+ if batch.phase != "style_rewrite":
3078
+ return []
3079
+ if not batch.report_contract.after_each_batch:
3080
+ return []
3081
+ saw_batch_apply = False
3082
+ assistant_message_buffer: list[str] = []
3083
+ for event in _iter_transcript_events(transcript):
3084
+ event_type = event.event_type.casefold()
3085
+ if event_type == "message":
3086
+ role = (event.role or _event_parameter_text(event, "role")).casefold()
3087
+ if saw_batch_apply and role in {"", "assistant", "model"}:
3088
+ text = _transcript_message_text(event.content)
3089
+ if text.strip():
3090
+ assistant_message_buffer.append(text)
3091
+ continue
3092
+ if event_type == "tool_result":
3093
+ continue
3094
+ if event_type != "tool_use":
3095
+ continue
3096
+ command = _event_parameter_text(event, "command")
3097
+ if not command:
3098
+ continue
3099
+ if saw_batch_apply and _looks_like_style_rewrite_batch_report("\n".join(assistant_message_buffer)):
3100
+ saw_batch_apply = False
3101
+ assistant_message_buffer = []
3102
+ if _is_real_style_rewrite_apply_command(command):
3103
+ saw_batch_apply = True
3104
+ assistant_message_buffer = []
3105
+ continue
3106
+ if saw_batch_apply and _is_next_style_rewrite_batch_command(command):
3107
+ return [
3108
+ AgentRunReportFinding(
3109
+ code=AgentRunReportFindingCode.BATCH_PROGRESS_REPORT_OMITTED,
3110
+ severity="high",
3111
+ source="transcript",
3112
+ source_field="transcript.tool_use.parameters.command",
3113
+ expected=(
3114
+ "após aplicar um lote de style-rewrite, o agente deve emitir resumo humano "
3115
+ "com qualidade, preservação e pendências antes de planejar/rodar a próxima leva"
3116
+ ),
3117
+ actual=command,
3118
+ message=(
3119
+ "O agente continuou a próxima etapa de reescrita sem cumprir o checkpoint de relatório do lote."
3120
+ ),
3121
+ next_action=(
3122
+ "Interromper a conclusão da rodada, reportar o lote aplicado em termos humanos e só então "
3123
+ "retomar a próxima leva pela rota oficial."
3124
+ ),
3125
+ evidence={
3126
+ "command": command,
3127
+ "batch_work_ids": [item.work_id for item in batch.current_batch_items if item.work_id],
3128
+ },
3129
+ )
3130
+ ]
3131
+ return []
3132
+
3133
+
3134
+ def _specialist_rewrite_count_findings(transcript: object, final_text: str) -> list[AgentRunReportFinding]:
3135
+ work_ids = _applied_specialist_rewrite_work_ids(transcript)
3136
+ if not work_ids:
3137
+ return []
3138
+ reported_count = _reported_specialist_rewrite_count(final_text)
3139
+ if reported_count is None or reported_count == len(work_ids):
3140
+ return []
3141
+ return [
3142
+ AgentRunReportFinding(
3143
+ code=AgentRunReportFindingCode.SPECIALIST_REWRITE_COUNT_MISMATCH,
3144
+ severity="high",
3145
+ source="transcript",
3146
+ source_field="tool_result.output.style_rewrite_applied_count",
3147
+ expected=str(len(work_ids)),
3148
+ actual=str(reported_count),
3149
+ message="O relatório final declarou uma contagem de notas reescritas diferente dos applies oficiais observados.",
3150
+ next_action=(
3151
+ "Reescrever o relatório final usando a contagem real de applies oficiais e listar qualquer item aplicado, "
3152
+ "bloqueado ou pendente sem arredondar a evidência."
3153
+ ),
3154
+ evidence={"work_ids": work_ids},
3155
+ )
3156
+ ]
3157
+
3158
+
3159
+ def _applied_specialist_rewrite_work_ids(transcript: object) -> list[str]:
3160
+ work_ids: list[str] = []
3161
+
3162
+ def append(value: object) -> None:
3163
+ work_id = str(value or "").strip()
3164
+ if work_id and work_id not in work_ids:
3165
+ work_ids.append(work_id)
3166
+
3167
+ for event in _iter_transcript_events(transcript):
3168
+ if event.event_type.casefold() not in {"tool_result", "run_command"}:
3169
+ continue
3170
+ payload = _json_payload_from_tool_output(_transcript_tool_output_text(event))
3171
+ schema = _optional_text(payload, "schema")
3172
+ if schema not in {
3173
+ "medical-notes-workbench.style-rewrite-atomic-apply-agent-stdout.v1",
3174
+ "medical-notes-workbench.style-rewrite-atomic-apply-result.v1",
3175
+ }:
3176
+ continue
3177
+ if _optional_text(payload, "status").casefold() in {"blocked", "failed", "waiting_external"}:
3178
+ continue
3179
+ candidates = [payload]
3180
+ nested_apply = _object_field(payload, "apply")
3181
+ if nested_apply:
3182
+ candidates.append(nested_apply)
3183
+ for candidate in candidates:
3184
+ try:
3185
+ apply_result = StyleRewriteAtomicApplyResult.model_validate(candidate)
3186
+ except ValidationError:
3187
+ continue
3188
+ fallback_work_id = (apply_result.work_id or _optional_text(payload, "work_id")).strip()
3189
+ for item in apply_result.items:
3190
+ if item.written:
3191
+ append(item.work_id or fallback_work_id)
3192
+ if apply_result.written_count > 0:
3193
+ append(fallback_work_id)
3194
+ return work_ids
3195
+
3196
+
3197
+ def _reported_specialist_rewrite_count(final_text: str) -> int | None:
3198
+ folded = _fold_text(final_text)
3199
+ for match in SPECIALIST_REWRITE_COUNT_CLAIM_RE.finditer(folded):
3200
+ return _as_int(match.group("count"))
3201
+ return None
3202
+
3203
+
3204
+ def _tool_result_has_style_rewrite_progress_checkpoint(output: str) -> bool:
3205
+ payload = _json_payload_from_tool_output(output)
3206
+ if not payload:
3207
+ return False
3208
+ candidate: object = payload
3209
+ if _optional_text(payload, "schema") == "medical-notes-workbench.style-rewrite-atomic-apply-agent-stdout.v1":
3210
+ candidate = payload["human_progress_checkpoint"] if "human_progress_checkpoint" in payload else None
3211
+ if not isinstance(candidate, dict):
3212
+ return False
3213
+ if candidate.get("schema") != "medical-notes-workbench.style-rewrite-human-progress-checkpoint.v1":
3214
+ return False
3215
+ text = "\n".join(
3216
+ str(candidate.get(key) or "")
3217
+ for key in (
3218
+ "summary",
3219
+ "content_quality",
3220
+ "linker_summary",
3221
+ "remaining_summary",
3222
+ )
3223
+ )
3224
+ preserved = candidate.get("preserved")
3225
+ if isinstance(preserved, list):
3226
+ text += "\n" + "\n".join(str(item) for item in preserved)
3227
+ return _looks_like_style_rewrite_batch_report(text)
3228
+
3229
+
3230
+ def _is_real_style_rewrite_apply_command(command: str) -> bool:
3231
+ folded = _fold_text(command)
3232
+ if "apply-specialist-style-rewrite" in folded:
3233
+ return True
3234
+ return "apply-style-rewrite" in folded and "--dry-run" not in folded
3235
+
3236
+
3237
+ def _is_next_style_rewrite_batch_command(command: str) -> bool:
3238
+ folded = _fold_text(command)
3239
+ if "plan-subagents" in folded and "style-rewrite" in folded:
3240
+ return True
3241
+ return "fix-wiki" in folded and "--apply" in folded
3242
+
3243
+
3244
+ def _looks_like_style_rewrite_batch_report(text: str) -> bool:
3245
+ folded = _fold_text(text)
3246
+ if not folded:
3247
+ return False
3248
+ has_batch = "lote" in folded or "batch" in folded
3249
+ has_quality = "qualidade" in folded or "quality" in folded
3250
+ has_preservation = any(
3251
+ marker in folded
3252
+ for marker in (
3253
+ "yaml",
3254
+ "proveniencia",
3255
+ "proveniência",
3256
+ "links preserv",
3257
+ "preservou links",
3258
+ "preserved links",
3259
+ )
3260
+ )
3261
+ has_remaining = any(
3262
+ marker in folded
3263
+ for marker in (
3264
+ "restam",
3265
+ "restante",
3266
+ "remaining",
3267
+ "pendente",
3268
+ "faltam",
3269
+ "continua",
3270
+ )
3271
+ )
3272
+ return has_batch and has_quality and has_preservation and has_remaining
3273
+
3274
+
3275
+ def _command_argument(command: str, option: str) -> str:
3276
+ try:
3277
+ parts = shlex.split(command)
3278
+ except ValueError:
3279
+ pattern = re.compile(rf"{re.escape(option)}\s+(?P<value>\S+)")
3280
+ match = pattern.search(command)
3281
+ return match.group("value") if match else ""
3282
+ for index, part in enumerate(parts[:-1]):
3283
+ if part == option:
3284
+ return parts[index + 1]
3285
+ return ""
3286
+
3287
+
3288
+ def _observed_runtime_model(runtime_log_text: str) -> str:
3289
+ labels = [match.group("label").strip() for match in AGY_SELECTED_MODEL_RE.finditer(runtime_log_text)]
3290
+ if labels:
3291
+ return labels[-1]
3292
+ flash_match = FLASH_MODEL_RE.search(runtime_log_text)
3293
+ return flash_match.group(0) if flash_match else ""
3294
+
3295
+
3296
+ def _observed_agy_selected_model(runtime_log_text: str) -> str:
3297
+ labels = [match.group("label").strip() for match in AGY_SELECTED_MODEL_RE.finditer(runtime_log_text)]
3298
+ return labels[-1] if labels else ""
3299
+
3300
+
3301
+ def _runtime_performance_findings(runtime_log_text: str) -> list[AgentRunReportFinding]:
3302
+ samples = _runtime_cpu_samples(runtime_log_text)
3303
+ findings: list[AgentRunReportFinding] = []
3304
+ active_runs: dict[str, list[_RuntimeCpuSample]] = {}
3305
+ for sample in sorted(samples, key=lambda item: item.elapsed_seconds):
3306
+ command_family = _cpu_command_family(sample.max_cpu_command)
3307
+ for stale_family in tuple(active_runs):
3308
+ if stale_family == command_family:
3309
+ continue
3310
+ findings.extend(
3311
+ _runtime_performance_findings_for_family(
3312
+ stale_family,
3313
+ active_runs.pop(stale_family),
3314
+ total_sample_count=len(samples),
3315
+ )
3316
+ )
3317
+ if max(sample.total_cpu_percent, sample.max_cpu_percent) >= HIGH_CPU_PERCENT_THRESHOLD:
3318
+ active_runs.setdefault(command_family, []).append(sample)
3319
+ continue
3320
+ if command_family in active_runs:
3321
+ findings.extend(
3322
+ _runtime_performance_findings_for_family(
3323
+ command_family,
3324
+ active_runs.pop(command_family),
3325
+ total_sample_count=len(samples),
3326
+ )
3327
+ )
3328
+ for command_family, family_samples in active_runs.items():
3329
+ findings.extend(
3330
+ _runtime_performance_findings_for_family(
3331
+ command_family,
3332
+ family_samples,
3333
+ total_sample_count=len(samples),
3334
+ )
3335
+ )
3336
+ return findings
3337
+
3338
+
3339
+ def _runtime_route_probe_findings(
3340
+ payload: JsonObject,
3341
+ runtime_log_text: str,
3342
+ ) -> list[AgentRunReportFinding]:
3343
+ if not _is_process_chats_terminal_no_pending(payload):
3344
+ return []
3345
+ commands = [
3346
+ sample.max_cpu_command
3347
+ for sample in _runtime_cpu_samples(runtime_log_text)
3348
+ if _is_route_probe_command(sample.max_cpu_command)
3349
+ ]
3350
+ if not commands:
3351
+ return []
3352
+ unique_commands = list(dict.fromkeys(commands))
3353
+ return [
3354
+ AgentRunReportFinding(
3355
+ code=AgentRunReportFindingCode.RUNTIME_ROUTE_PROBE_OBSERVED,
3356
+ severity="medium",
3357
+ source="runtime_log",
3358
+ source_field="runtime_log.cpu_samples.max_cpu_command",
3359
+ expected=(
3360
+ "process-chats terminal sem chats novos deve executar a checagem oficial direta "
3361
+ "sem probes recursivos de descoberta"
3362
+ ),
3363
+ actual="; ".join(command[:160] for command in unique_commands),
3364
+ message=(
3365
+ "O runtime registrou busca/probe recursivo durante um fluxo terminal simples; "
3366
+ "isso é atrito de rota e deve aparecer no relatório da rodada."
3367
+ ),
3368
+ next_action=(
3369
+ "Endurecer launcher/runbook ou harness para iniciar pela porta pública `list-pending --summary` "
3370
+ "sem busca exploratória, e repetir a rodada validando transcript/runtime log."
3371
+ ),
3372
+ evidence={
3373
+ "schema": CPU_SAMPLE_SCHEMA,
3374
+ "commands": unique_commands[:5],
3375
+ },
3376
+ )
3377
+ ]
3378
+
3379
+
3380
+ def _is_process_chats_terminal_no_pending(payload: JsonObject) -> bool:
3381
+ fields = _ProcessChatsTerminalFields.model_validate(
3382
+ _field_payload(
3383
+ payload,
3384
+ (
3385
+ "workflow",
3386
+ "status",
3387
+ "phase",
3388
+ "process_chats_terminal_state",
3389
+ "process_chats_backlog_state",
3390
+ "item_count",
3391
+ "total_available_count",
3392
+ ),
3393
+ )
3394
+ )
3395
+ if fields.workflow != "/mednotes:process-chats" or fields.status != "completed":
3396
+ return False
3397
+ if fields.process_chats_terminal_state == "no_pending":
3398
+ return True
3399
+ if fields.phase == "pending_backlog" and fields.process_chats_backlog_state == "no_pending_raws":
3400
+ return True
3401
+ if fields.item_count == 0 and fields.total_available_count == 0:
3402
+ return True
3403
+ return False
3404
+
3405
+
3406
+ def _is_route_probe_command(command: str) -> bool:
3407
+ parts = shlex.split(command)
3408
+ if not parts:
3409
+ return False
3410
+ executable = Path(parts[0]).name
3411
+ if executable == "grep" and "-r" in parts:
3412
+ return True
3413
+ if executable in {"find", "mdfind"}:
3414
+ return True
3415
+ if executable in {"rg", "ripgrep"} and any(part in {"-g", "--glob", "--files"} for part in parts):
3416
+ return True
3417
+ return False
3418
+
3419
+
3420
+ def _runtime_performance_findings_for_family(
3421
+ command_family: str,
3422
+ high_samples: list[_RuntimeCpuSample],
3423
+ *,
3424
+ total_sample_count: int,
3425
+ ) -> list[AgentRunReportFinding]:
3426
+ if len(high_samples) < HIGH_CPU_MIN_SAMPLE_COUNT:
3427
+ return []
3428
+ observed_span = _estimated_high_cpu_span_seconds(high_samples)
3429
+ if observed_span < HIGH_CPU_MIN_SPAN_SECONDS:
3430
+ return []
3431
+ max_total_cpu = max(sample.total_cpu_percent for sample in high_samples)
3432
+ max_process_cpu = max(sample.max_cpu_percent for sample in high_samples)
3433
+ max_observed_cpu = max(max_total_cpu, max_process_cpu)
3434
+ max_sample = max(high_samples, key=lambda sample: max(sample.total_cpu_percent, sample.max_cpu_percent))
3435
+ return [
3436
+ AgentRunReportFinding(
3437
+ code=AgentRunReportFindingCode.RUNTIME_PERFORMANCE_BUG,
3438
+ severity="medium",
3439
+ source="runtime_log",
3440
+ source_field="runtime_log.cpu_samples",
3441
+ expected="workflow longo deve manter CPU sob controle ou reportar progresso claro antes de monopolizar a sessão",
3442
+ actual=(
3443
+ f"{len(high_samples)} amostras acima de {HIGH_CPU_PERCENT_THRESHOLD:.0f}% "
3444
+ f"por {observed_span:.1f}s; pico={max_observed_cpu:.1f}%"
3445
+ ),
3446
+ message="A execução registrou CPU alta sustentada; isso é bug de performance/UX do workflow.",
3447
+ next_action=(
3448
+ "Investigar a fase do workflow que monopolizou CPU, adicionar progresso/limites quando necessário "
3449
+ "e reportar o impacto na próxima rodada de experimento."
3450
+ ),
3451
+ evidence={
3452
+ "schema": CPU_SAMPLE_SCHEMA,
3453
+ "command_family": command_family,
3454
+ "sample_count": len(high_samples),
3455
+ "total_sample_count": total_sample_count,
3456
+ "threshold_percent": HIGH_CPU_PERCENT_THRESHOLD,
3457
+ "observed_span_seconds": round(observed_span, 2),
3458
+ "max_cpu_percent": round(max_observed_cpu, 2),
3459
+ "max_total_cpu_percent": round(max_total_cpu, 2),
3460
+ "max_process_cpu_percent": round(max_process_cpu, 2),
3461
+ "max_cpu_command": max_sample.max_cpu_command[:500],
3462
+ },
3463
+ )
3464
+ ]
3465
+
3466
+
3467
+ def _estimated_high_cpu_span_seconds(high_samples: list[_RuntimeCpuSample]) -> float:
3468
+ elapsed_values = sorted(sample.elapsed_seconds for sample in high_samples)
3469
+ if len(elapsed_values) < 2:
3470
+ return 0.0
3471
+ gaps = [
3472
+ after - before
3473
+ for before, after in zip(elapsed_values, elapsed_values[1:], strict=False)
3474
+ if after > before
3475
+ ]
3476
+ sample_window = min(gaps) if gaps else 0.0
3477
+ return max(elapsed_values) - min(elapsed_values) + sample_window
3478
+
3479
+
3480
+ def _cpu_command_family(command: str) -> str:
3481
+ folded = command.casefold()
3482
+ if "mednotes/wiki/cli.py" in folded or "fix-wiki --apply" in folded:
3483
+ return "workbench_cli"
3484
+ if "/gemini" in folded or " gemini " in folded or folded.startswith("gemini "):
3485
+ return "external_model_runtime"
3486
+ return "other"
3487
+
3488
+
3489
+ def _runtime_cpu_samples(runtime_log_text: str) -> list[_RuntimeCpuSample]:
3490
+ samples: list[_RuntimeCpuSample] = []
3491
+ for line in runtime_log_text.splitlines():
3492
+ candidate = line.strip()
3493
+ if not candidate.startswith("{"):
3494
+ continue
3495
+ try:
3496
+ sample = _RuntimeCpuSample.model_validate_json(candidate)
3497
+ except ValueError:
3498
+ continue
3499
+ if sample.schema_id == CPU_SAMPLE_SCHEMA:
3500
+ samples.append(sample)
3501
+ return samples
3502
+
3503
+
3504
+ def _runtime_error_labels(folded_log: str) -> list[str]:
3505
+ labels: list[str] = []
3506
+ if "resource_exhausted" in folded_log or "code 429" in folded_log or " 429 " in folded_log:
3507
+ labels.append("RESOURCE_EXHAUSTED/429 quota")
3508
+ if "etimedout" in folded_log or "read timed out" in folded_log:
3509
+ labels.append("specialist model runtime timeout")
3510
+ if "agent executor error" in folded_log:
3511
+ labels.append("agent executor error")
3512
+ recovered_antigravity_auth = (
3513
+ "you are not logged into antigravity" in folded_log
3514
+ and ("auth succeeded" in folded_log or "silent auth succeeded" in folded_log)
3515
+ and "authentication timed out" not in folded_log
3516
+ )
3517
+ if (
3518
+ "authentication timed out" in folded_log
3519
+ or ("you are not logged into antigravity" in folded_log and not recovered_antigravity_auth)
3520
+ ):
3521
+ labels.append("antigravity authentication transient")
3522
+ return labels
3523
+
3524
+
3525
+ def _runtime_error_report_markers(label: str) -> tuple[str, ...]:
3526
+ folded = _fold_text(label)
3527
+ if "resource_exhausted" in folded or "429" in folded or "quota" in folded:
3528
+ return ("resource_exhausted", "429", "quota", "cota", "cota 429")
3529
+ if "timeout" in folded:
3530
+ return (
3531
+ "etimedout",
3532
+ "read etimedout",
3533
+ "read timed out",
3534
+ "timeout",
3535
+ "tempo esgotado",
3536
+ "modelo especialista",
3537
+ )
3538
+ if "executor" in folded:
3539
+ return ("agent executor error", "executor", "erro de executor")
3540
+ if "authentication" in folded or "antigravity" in folded:
3541
+ return (
3542
+ "not logged into antigravity",
3543
+ "authentication timed out",
3544
+ "auth timed out",
3545
+ "antigravity",
3546
+ "autenticacao",
3547
+ "autenticação",
3548
+ )
3549
+ return (label,)
3550
+
3551
+
3552
+ def _content_quality_audit_findings(
3553
+ payload: JsonObject,
3554
+ folded_final_text: str,
3555
+ *,
3556
+ final_report_incomplete: bool = False,
3557
+ ) -> list[AgentRunReportFinding]:
3558
+ if final_report_incomplete:
3559
+ return []
3560
+ batch = _specialist_runtime_batch_from_agent_directive(payload)
3561
+ report_contract = batch.report_contract
3562
+ if "content_quality_audit" not in set(report_contract.must_include):
3563
+ return []
3564
+ if _mentions_content_quality_audit(folded_final_text):
3565
+ return []
3566
+ if _mentions_content_quality_audit_not_applicable(folded_final_text):
3567
+ return []
3568
+ return [
3569
+ AgentRunReportFinding(
3570
+ code=AgentRunReportFindingCode.CONTENT_QUALITY_AUDIT_OMITTED,
3571
+ severity="high",
3572
+ source="workflow_payload",
3573
+ source_field="agent_directive.control.effects[].payload.report_contract.must_include",
3574
+ expected="auditoria de conteúdo/qualidade antes-depois das notas reescritas",
3575
+ actual="omitted",
3576
+ message="O relatório final omitiu a auditoria de conteúdo exigida para notas reescritas.",
3577
+ next_action=(
3578
+ "Reescrever o relatório final com auditoria antes/depois por nota: preservação de YAML/proveniência/links, "
3579
+ "qualidade clínica/didática e classificação resolvida/parcial/não resolvida/piorou."
3580
+ ),
3581
+ )
3582
+ ]
3583
+
3584
+
3585
+ def _mentions_content_quality_audit(folded_text: str) -> bool:
3586
+ has_audit = any(
3587
+ marker in folded_text
3588
+ for marker in ("auditoria de conteudo", "auditoria de qualidade", "content quality audit")
3589
+ )
3590
+ has_before_after = any(
3591
+ marker in folded_text
3592
+ for marker in ("antes/depois", "antes e depois", "before/after")
3593
+ )
3594
+ has_quality = any(
3595
+ marker in folded_text
3596
+ for marker in ("qualidade clinica", "qualidade de conteudo", "bug de conteudo", "bug de ux")
3597
+ )
3598
+ has_outcome_classification = any(
3599
+ marker in folded_text
3600
+ for marker in ("resolvid", "parcial", "nao resolvid", "não resolvid", "piorou")
3601
+ )
3602
+ return has_audit and has_before_after and has_quality and has_outcome_classification
3603
+
3604
+
3605
+ def _mentions_content_quality_audit_not_applicable(folded_text: str) -> bool:
3606
+ has_specialist_block = any(
3607
+ marker in folded_text
3608
+ for marker in (
3609
+ "specialist_model_quota_exhausted",
3610
+ "cota do modelo",
3611
+ "cota de uso do modelo",
3612
+ "cota no modelo",
3613
+ "quota do modelo",
3614
+ "capacidade do modelo",
3615
+ "capacidade externa do modelo",
3616
+ "limitacoes temporarias de cota",
3617
+ "limitações temporárias de cota",
3618
+ "modelo especialista",
3619
+ "modelo medico",
3620
+ "modelo médico",
3621
+ "modelo especializado",
3622
+ "modelo medico especializado",
3623
+ "modelo médico especializado",
3624
+ "modelo de ia especializado",
3625
+ "bloqueio imediato do modelo",
3626
+ "reescrita medica especializada",
3627
+ "reescrita médica especializada",
3628
+ "conteudo gerado",
3629
+ "conteúdo gerado",
3630
+ "criterios de estilo",
3631
+ "critérios de estilo",
3632
+ "visual didatico pendente",
3633
+ "visual didático pendente",
3634
+ )
3635
+ )
3636
+ has_rewrite_context = any(marker in folded_text for marker in ("reescrita", "rewrite", "conteudo clinico"))
3637
+ has_no_applied_output = any(
3638
+ marker in folded_text
3639
+ for marker in (
3640
+ "bloquead",
3641
+ "bloqueio",
3642
+ "interrompid",
3643
+ "nao foi aplicad",
3644
+ "não foi aplicad",
3645
+ "nenhuma nota",
3646
+ "nao avaliad",
3647
+ "não avaliad",
3648
+ "pendente",
3649
+ )
3650
+ )
3651
+ return has_specialist_block and has_rewrite_context and has_no_applied_output
3652
+
3653
+
3654
+ def _api_accounting_findings(payload: JsonObject, folded_final_text: str) -> list[AgentRunReportFinding]:
3655
+ headless = _AgentReportHeadlessExportFields.model_validate(
3656
+ _field_payload(
3657
+ _object_field(
3658
+ _object_field(_object_field(payload, "diagnostic_context"), "related_notes_export_recovery"),
3659
+ "headless_export",
3660
+ ),
3661
+ ("embedded_count",),
3662
+ )
3663
+ )
3664
+ embedded_count = _as_int(headless.embedded_count)
3665
+ if embedded_count <= 0:
3666
+ return []
3667
+ denies_api_work = bool(
3668
+ re.search(
3669
+ r"(api_calls\s*[:=]\s*0|0\s+chamadas?\s+(?:a|à|ao|de)?\s*api|"
3670
+ r"n[aã]o\s+houve\s+chamadas?|sem\s+chamadas?|no\s+api)",
3671
+ folded_final_text,
3672
+ )
3673
+ )
3674
+ if not denies_api_work:
3675
+ return []
3676
+ return [
3677
+ AgentRunReportFinding(
3678
+ code=AgentRunReportFindingCode.API_ACCOUNTING_MISMATCH,
3679
+ severity="medium",
3680
+ source="workflow_payload",
3681
+ source_field="diagnostic_context.related_notes_export_recovery.headless_export.embedded_count",
3682
+ expected="relatório deve reconciliar embedded_count antes de afirmar zero chamadas de API",
3683
+ actual=f"embedded_count={embedded_count}",
3684
+ message="O relatório final afirmou zero trabalho de API apesar de o payload indicar embeddings gerados.",
3685
+ next_action="Explicar a diferença entre api_calls do workflow e embedded_count do export, ou corrigir os contadores.",
3686
+ )
3687
+ ]
3688
+
3689
+
3690
+ def _omitted_operational_warning_findings(
3691
+ diagnostic: JsonObject,
3692
+ folded_final_text: str,
3693
+ ) -> list[AgentRunReportFinding]:
3694
+ warnings = _collect_graph_warnings(diagnostic)
3695
+ codes = {str(warning.get("code") or "") for warning in warnings if isinstance(warning, dict)}
3696
+ if "catalog_missing" not in codes or "catalog" in folded_final_text:
3697
+ return []
3698
+ return [
3699
+ AgentRunReportFinding(
3700
+ code=AgentRunReportFindingCode.OPERATIONAL_WARNING_OMITTED,
3701
+ severity="medium",
3702
+ source="workflow_payload",
3703
+ source_field="diagnostic_context.graph_audit_final.warnings",
3704
+ expected="warning catalog_missing deve aparecer no relatório de experimento",
3705
+ actual="omitted",
3706
+ message="O relatório final omitiu warning operacional catalog_missing.",
3707
+ next_action="Reportar o warning e decidir se CATALOGO_WIKI.json é legado ou artefato ainda obrigatório.",
3708
+ )
3709
+ ]
3710
+
3711
+
3712
+ def _collect_agent_events(value: object) -> list[JsonObject]:
3713
+ events: list[JsonObject] = []
3714
+ seen: set[tuple[str, str, str]] = set()
3715
+
3716
+ def visit(item: object) -> None:
3717
+ if isinstance(item, list):
3718
+ for child in item:
3719
+ visit(child)
3720
+ return
3721
+ if not isinstance(item, dict):
3722
+ return
3723
+ agent_events = item.get("agent_events")
3724
+ if isinstance(agent_events, list):
3725
+ for event in agent_events:
3726
+ if isinstance(event, dict):
3727
+ event_payload = _json_object(event)
3728
+ key = (
3729
+ _optional_text(event_payload, "code"),
3730
+ _optional_text(event_payload, "type"),
3731
+ _optional_text(event_payload, "phase"),
3732
+ )
3733
+ if key in seen:
3734
+ continue
3735
+ seen.add(key)
3736
+ events.append(event_payload)
3737
+ for child in item.values():
3738
+ if isinstance(child, (dict, list)):
3739
+ visit(child)
3740
+
3741
+ visit(value)
3742
+ return events
3743
+
3744
+
3745
+ def _collect_graph_warnings(diagnostic: JsonObject) -> list[JsonObject]:
3746
+ graph = _object_field(diagnostic, "graph_audit_final")
3747
+ warnings = graph.get("warnings")
3748
+ if not isinstance(warnings, list):
3749
+ return []
3750
+ return [_json_object(warning) for warning in warnings if isinstance(warning, dict)]
3751
+
3752
+
3753
+ def _folded_contains_any(folded_text: str, candidates: Iterable[object]) -> bool:
3754
+ for candidate in candidates:
3755
+ text = _fold_text(str(candidate or ""))
3756
+ if text.strip() and text.strip() in folded_text:
3757
+ return True
3758
+ return False
3759
+
3760
+
3761
+ def _as_int(value: object) -> int:
3762
+ return _safe_positive_int(value)
3763
+
3764
+
3765
+ def _primary_objective_omission_findings(
3766
+ final_text: str,
3767
+ objective: PrimaryObjectiveSummary,
3768
+ ) -> list[AgentRunReportFinding]:
3769
+ if isinstance(objective, ProcessChatsPrimaryObjectiveSummary):
3770
+ return _process_chats_primary_objective_omission_findings(final_text, objective)
3771
+ if isinstance(objective, WorkflowPrimaryObjectiveSummary):
3772
+ return _generic_primary_objective_omission_findings(final_text, objective)
3773
+
3774
+ checks = (
3775
+ ("primary_objective.wiki_fixed", _mentions_wiki_outcome(final_text), objective.wiki_summary),
3776
+ ("primary_objective.mutation_summary", _mentions_mutation_outcome(final_text, objective), objective.mutation_summary),
3777
+ ("primary_objective.graph_summary", _mentions_graph_outcome(final_text, objective), objective.graph_summary),
3778
+ (
3779
+ "primary_objective.related_notes_summary",
3780
+ _mentions_related_notes_outcome(final_text, objective),
3781
+ objective.related_notes_summary,
3782
+ ),
3783
+ )
3784
+ findings: list[AgentRunReportFinding] = []
3785
+ for source_field, present, expected_summary in checks:
3786
+ if present:
3787
+ continue
3788
+ findings.append(
3789
+ AgentRunReportFinding(
3790
+ code=AgentRunReportFindingCode.PRIMARY_OBJECTIVE_OMITTED,
3791
+ severity="high",
3792
+ source="final_report",
3793
+ source_field=source_field,
3794
+ expected=expected_summary,
3795
+ actual="omitted",
3796
+ message="O relatório final não respondeu uma pergunta obrigatória do objetivo primário do fix-wiki.",
3797
+ next_action=(
3798
+ "Reescrever o relatório final respondendo: fixou a Wiki, o que mutou, "
3799
+ "se o grafo melhorou e se Notas Relacionadas foi atualizado ou ficou pendente."
3800
+ ),
3801
+ )
3802
+ )
3803
+ return findings
3804
+
3805
+
3806
+ def _primary_objective_success_claim_findings(
3807
+ final_text: str,
3808
+ objective: PrimaryObjectiveSummary,
3809
+ ) -> list[AgentRunReportFinding]:
3810
+ if isinstance(objective, WorkflowPrimaryObjectiveSummary):
3811
+ if objective.completed or not _has_positive_success_claim(final_text):
3812
+ return []
3813
+ return [
3814
+ AgentRunReportFinding(
3815
+ code=AgentRunReportFindingCode.SUCCESS_CLAIM_MISMATCH,
3816
+ severity="medium",
3817
+ source="final_report",
3818
+ source_field="primary_objective.completed",
3819
+ expected="completed=false",
3820
+ actual="success_claim",
3821
+ message="O relatório final declarou sucesso para um objetivo primário que a FSM ainda não concluiu.",
3822
+ next_action="Trocar sucesso simples por prévia, espera, bloqueio ou etapa pendente conforme primary_objective_summary.",
3823
+ )
3824
+ ]
3825
+ if not isinstance(objective, ProcessChatsPrimaryObjectiveSummary):
3826
+ return []
3827
+ if objective.process_status != "completed_with_link_blockers":
3828
+ return []
3829
+ if not _has_positive_success_claim(final_text):
3830
+ return []
3831
+ return [
3832
+ AgentRunReportFinding(
3833
+ code=AgentRunReportFindingCode.SUCCESS_CLAIM_MISMATCH,
3834
+ severity="medium",
3835
+ source="final_report",
3836
+ source_field="primary_objective.process_status",
3837
+ expected=objective.process_status,
3838
+ actual="success_claim",
3839
+ message="O relatório final usou linguagem de sucesso para process-chats com linker/grafo pendente.",
3840
+ next_action="Trocar sucesso simples por publicação concluída com pendência explícita de linker/grafo.",
3841
+ )
3842
+ ]
3843
+
3844
+
3845
+ def _generic_primary_objective_omission_findings(
3846
+ final_text: str,
3847
+ objective: WorkflowPrimaryObjectiveSummary,
3848
+ ) -> list[AgentRunReportFinding]:
3849
+ folded = _fold_text(final_text)
3850
+ checks = (
3851
+ ("primary_objective.objective_status", _mentions_generic_objective_status(folded, objective), objective.status),
3852
+ (
3853
+ "primary_objective.mutation_summary",
3854
+ _mentions_summary_fragment(folded, objective.mutation_summary),
3855
+ objective.mutation_summary,
3856
+ ),
3857
+ (
3858
+ "primary_objective.remaining_work_summary",
3859
+ _mentions_summary_fragment(folded, objective.remaining_work_summary),
3860
+ objective.remaining_work_summary,
3861
+ ),
3862
+ (
3863
+ "primary_objective.next_step_summary",
3864
+ _mentions_summary_fragment(folded, objective.next_step_summary),
3865
+ objective.next_step_summary,
3866
+ ),
3867
+ )
3868
+ findings: list[AgentRunReportFinding] = []
3869
+ for source_field, present, expected_summary in checks:
3870
+ if present:
3871
+ continue
3872
+ findings.append(
3873
+ AgentRunReportFinding(
3874
+ code=AgentRunReportFindingCode.PRIMARY_OBJECTIVE_OMITTED,
3875
+ severity="high",
3876
+ source="final_report",
3877
+ source_field=source_field,
3878
+ expected=expected_summary,
3879
+ actual="omitted",
3880
+ message="O relatório final não respondeu uma pergunta obrigatória do objetivo primário do workflow.",
3881
+ next_action="Reescrever o relatório final usando reports.details.primary_objective_summary.",
3882
+ )
3883
+ )
3884
+ return findings
3885
+
3886
+
3887
+ def _mentions_generic_objective_status(
3888
+ folded_text: str,
3889
+ objective: WorkflowPrimaryObjectiveSummary,
3890
+ ) -> bool:
3891
+ if objective.status in folded_text:
3892
+ return True
3893
+ answer = _generic_public_objective_answer(objective)
3894
+ markers = NON_SUCCESS_HUMAN_STATUS_MARKERS.get(answer, ())
3895
+ return _folded_contains_any(folded_text, markers)
3896
+
3897
+
3898
+ def _mentions_summary_fragment(folded_text: str, summary: str) -> bool:
3899
+ words = [word for word in _fold_text(summary).split() if len(word) >= 5]
3900
+ if not words:
3901
+ return False
3902
+ return sum(1 for word in words[:8] if word in folded_text) >= min(2, len(words))
3903
+
3904
+
3905
+ def _process_chats_primary_objective_omission_findings(
3906
+ final_text: str,
3907
+ objective: ProcessChatsPrimaryObjectiveSummary,
3908
+ ) -> list[AgentRunReportFinding]:
3909
+ checks = (
3910
+ (
3911
+ "primary_objective.process_status",
3912
+ _mentions_process_chats_status(final_text, objective),
3913
+ objective.process_summary,
3914
+ ),
3915
+ ("primary_objective.raw_summary", _mentions_process_chats_raw(final_text, objective), objective.raw_summary),
3916
+ (
3917
+ "primary_objective.wiki_write_summary",
3918
+ _mentions_process_chats_wiki_write(final_text, objective),
3919
+ objective.wiki_write_summary,
3920
+ ),
3921
+ (
3922
+ "primary_objective.coverage_summary",
3923
+ _mentions_process_chats_coverage(final_text),
3924
+ objective.coverage_summary,
3925
+ ),
3926
+ (
3927
+ "primary_objective.linker_summary",
3928
+ _mentions_process_chats_linker(final_text, objective),
3929
+ objective.linker_summary,
3930
+ ),
3931
+ )
3932
+ findings: list[AgentRunReportFinding] = []
3933
+ for source_field, present, expected_summary in checks:
3934
+ if present:
3935
+ continue
3936
+ findings.append(
3937
+ AgentRunReportFinding(
3938
+ code=AgentRunReportFindingCode.PRIMARY_OBJECTIVE_OMITTED,
3939
+ severity="high",
3940
+ source="final_report",
3941
+ source_field=source_field,
3942
+ expected=expected_summary,
3943
+ actual="omitted",
3944
+ message="O relatório final não respondeu uma pergunta obrigatória do objetivo primário do process-chats.",
3945
+ next_action=(
3946
+ "Reescrever o relatório final respondendo: se publicou ou só preparou prévia, "
3947
+ "quais raws foram cobertos/processados, o que foi escrito na Wiki, "
3948
+ "se coverage/manifest bateram e qual foi o estado do linker/grafo."
3949
+ ),
3950
+ )
3951
+ )
3952
+ return findings
3953
+
3954
+
3955
+ def _mentions_wiki_outcome(final_text: str) -> bool:
3956
+ folded = _fold_text(final_text)
3957
+ return "wiki" in folded and any(marker in folded for marker in ("fixou", "corrig", "parcial", "pendente", "nao"))
3958
+
3959
+
3960
+ def _mentions_mutation_outcome(final_text: str, objective: FixWikiPrimaryObjectiveSummary) -> bool:
3961
+ folded = _fold_text(final_text)
3962
+ if not any(marker in folded for marker in ("mutacao", "alterad", "modificad", "grav", "mudanca", "mudancas")):
3963
+ return False
3964
+ if objective.mutation_count == 0:
3965
+ return any(marker in folded for marker in (" 0 ", ": 0", "0 arquivo", "nenhum", "nada"))
3966
+ if str(objective.mutation_count) not in folded:
3967
+ return False
3968
+ if objective.written_count and objective.written_count != objective.mutation_count:
3969
+ return str(objective.written_count) in folded and any(
3970
+ marker in folded for marker in ("grav", "salv", "escrit", "workflow")
3971
+ )
3972
+ return True
3973
+
3974
+
3975
+ def _mentions_graph_outcome(final_text: str, objective: FixWikiPrimaryObjectiveSummary) -> bool:
3976
+ folded = _fold_text(final_text)
3977
+ if "grafo" not in folded and "graph" not in folded:
3978
+ return False
3979
+ match objective.graph_status:
3980
+ case "clean":
3981
+ return any(
3982
+ marker in folded
3983
+ for marker in (
3984
+ "limpo",
3985
+ "sem bloqueio",
3986
+ "sem blockers",
3987
+ "sem erro",
3988
+ "sem comparacao",
3989
+ "sem comparação",
3990
+ "grafo limpo",
3991
+ "graph clean",
3992
+ "terminou sem bloqueios",
3993
+ "terminou sem erros",
3994
+ )
3995
+ )
3996
+ case "improved":
3997
+ return any(marker in folded for marker in ("melhor", "reduz", "corrig"))
3998
+ case "blocked":
3999
+ return any(marker in folded for marker in ("bloque", "pendente", "erro"))
4000
+ case "unchanged":
4001
+ return any(marker in folded for marker in ("nao melhorou", "não melhorou", "permaneceu", "inalter"))
4002
+ case "worse":
4003
+ return any(marker in folded for marker in ("pior", "regred"))
4004
+ case "unknown":
4005
+ return any(marker in folded for marker in ("sem comparacao", "sem comparação", "nao confirmou", "não confirmou"))
4006
+
4007
+
4008
+ def _mentions_related_notes_outcome(final_text: str, objective: FixWikiPrimaryObjectiveSummary) -> bool:
4009
+ folded = _fold_text(final_text)
4010
+ if "related notes" not in folded and "notas relacionadas" not in folded:
4011
+ return False
4012
+ if objective.related_notes_status == "pending" and "cota" in _fold_text(objective.related_notes_summary):
4013
+ return "cota" in folded or "quota" in folded
4014
+ if objective.related_notes_status == "updated" and any(
4015
+ marker in folded
4016
+ for marker in (
4017
+ "convergencia total esta pendente",
4018
+ "convergencia pendente",
4019
+ "pendente da aplicacao",
4020
+ "pendente de aplicacao",
4021
+ "ficou pendente",
4022
+ "estao pendentes",
4023
+ "está pendente",
4024
+ )
4025
+ ):
4026
+ return False
4027
+ return True
4028
+
4029
+
4030
+ def _mentions_process_chats_status(final_text: str, objective: ProcessChatsPrimaryObjectiveSummary) -> bool:
4031
+ folded = _fold_text(final_text)
4032
+ if objective.process_status in {"preview_ready", "ready_to_publish"}:
4033
+ return any(marker in folded for marker in ("previa", "preview", "pronta", "ready_to_publish"))
4034
+ return any(marker in folded for marker in ("publicacao", "publicou", "publicad", "process-chats"))
4035
+
4036
+
4037
+ def _mentions_process_chats_raw(final_text: str, objective: ProcessChatsPrimaryObjectiveSummary) -> bool:
4038
+ folded = _fold_text(final_text)
4039
+ if not any(marker in folded for marker in ("raw", "chat", "chats")):
4040
+ return False
4041
+ if objective.raw_count == 0:
4042
+ return any(marker in folded for marker in ("0", "nenhum", "nao processad", "ainda nao"))
4043
+ return str(objective.raw_count) in folded
4044
+
4045
+
4046
+ def _mentions_process_chats_wiki_write(final_text: str, objective: ProcessChatsPrimaryObjectiveSummary) -> bool:
4047
+ folded = _fold_text(final_text)
4048
+ if "wiki" not in folded:
4049
+ return False
4050
+ if not any(marker in folded for marker in ("arquivo", "nota", "escrit", "grav", "publicad")):
4051
+ return False
4052
+ if objective.note_count == 0:
4053
+ return any(marker in folded for marker in ("0", "nenhum", "nada", "ainda nao"))
4054
+ return str(objective.note_count) in folded
4055
+
4056
+
4057
+ def _mentions_process_chats_coverage(final_text: str) -> bool:
4058
+ folded = _fold_text(final_text)
4059
+ return ("coverage" in folded or "cobertura" in folded) and "manifest" in folded
4060
+
4061
+
4062
+ def _mentions_process_chats_linker(final_text: str, objective: ProcessChatsPrimaryObjectiveSummary) -> bool:
4063
+ folded = _fold_text(final_text)
4064
+ if not any(marker in folded for marker in ("linker", "grafo", "related notes", "notas relacionadas")):
4065
+ return False
4066
+ if objective.linker_status == "blocked":
4067
+ return any(marker in folded for marker in ("pendente", "bloque", "blocker", "nao aplicado"))
4068
+ if objective.linker_status == "not_run":
4069
+ return any(marker in folded for marker in ("nao rodou", "ainda nao", "nao foi confirmad", "publicacao nao"))
4070
+ return True
4071
+
4072
+
4073
+ def _fold_text(text: str) -> str:
4074
+ normalized = unicodedata.normalize("NFKD", str(text or ""))
4075
+ without_marks = "".join(ch for ch in normalized if not unicodedata.combining(ch))
4076
+ return f" {without_marks.casefold()} "
4077
+
4078
+
4079
+ def _omitted_tool_error_findings(transcript: object, final_text: str) -> list[AgentRunReportFinding]:
4080
+ findings: list[AgentRunReportFinding] = []
4081
+ tool_errors = [
4082
+ finding
4083
+ for finding in validate_agent_tool_calls(transcript)
4084
+ if str(finding.get("code") or "") == TOOL_CALL_ERROR
4085
+ ]
4086
+ if not tool_errors:
4087
+ return []
4088
+ for error in tool_errors:
4089
+ if _final_report_mentions_tool_error(final_text, error):
4090
+ continue
4091
+ findings.append(
4092
+ AgentRunReportFinding(
4093
+ code=AgentRunReportFindingCode.OMITTED_TOOL_ERROR,
4094
+ severity=_finding_severity(error.get("severity")),
4095
+ source="transcript",
4096
+ source_field="tool_error",
4097
+ tool_error_type=str(error.get("error_type") or ""),
4098
+ message="O transcript contém tool call falha que o relatório final não reportou.",
4099
+ next_action="Reportar explicitamente a tool call falha e seu impacto, mesmo quando um retry posterior recuperar.",
4100
+ evidence={
4101
+ "tool_type": str(error.get("tool_type") or ""),
4102
+ "tool_error_message": str(error.get("message") or ""),
4103
+ },
4104
+ )
4105
+ )
4106
+ return findings
4107
+
4108
+
4109
+ def _finding_severity(value: object) -> AgentRunReportSeverity:
4110
+ text = str(value or "medium").strip().lower()
4111
+ if text in {"low", "medium", "high", "critical"}:
4112
+ return cast(AgentRunReportSeverity, text)
4113
+ return "medium"
4114
+
4115
+
4116
+ def _final_report_mentions_tool_error(final_text: str, error: JsonObject) -> bool:
4117
+ lowered = final_text.lower()
4118
+ if "tool" not in lowered:
4119
+ return False
4120
+ error_type = str(error.get("error_type") or "").lower()
4121
+ if error_type and error_type in lowered:
4122
+ return True
4123
+ return any(marker in lowered for marker in ("erro", "falh", "failed", "invalid tool", "invalid_tool"))
4124
+
4125
+
4126
+ def _omitted_tool_deviation_findings(transcript: object, final_text: str) -> list[AgentRunReportFinding]:
4127
+ deviations, finding_codes = _transcript_tool_deviation_context(transcript)
4128
+ if not deviations:
4129
+ return []
4130
+ if _final_report_mentions_tool_deviations(final_text, deviations):
4131
+ return []
4132
+ no_deviation_claim = bool(final_text and NO_TOOL_DEVIATION_CLAIM_RE.search(final_text))
4133
+ return [
4134
+ AgentRunReportFinding(
4135
+ code=AgentRunReportFindingCode.TOOL_DEVIATION_OMITTED,
4136
+ severity="high",
4137
+ source="transcript",
4138
+ source_field="final_report_text",
4139
+ expected="relatório final deve listar probes, permissões e comandos fora do roteiro quando ocorrerem",
4140
+ actual="no_deviations_claim" if no_deviation_claim else ",".join(deviations),
4141
+ message=(
4142
+ "O relatório final afirmou que não houve desvios, mas o transcript contém probes ou tool calls fora do roteiro."
4143
+ if no_deviation_claim
4144
+ else "O relatório final omitiu probes ou tool calls fora do roteiro presentes no transcript."
4145
+ ),
4146
+ next_action=(
4147
+ "Reescrever a seção de avisos de execução listando os probes/tool calls observados "
4148
+ "e o impacto deles no experimento."
4149
+ ),
4150
+ evidence=_tool_deviation_evidence(deviations=deviations, finding_codes=finding_codes),
4151
+ )
4152
+ ]
4153
+
4154
+
4155
+ def _tool_deviation_evidence(*, deviations: list[str], finding_codes: list[str]) -> JsonObject:
4156
+ evidence: JsonObject = {"tool_types": deviations}
4157
+ if finding_codes:
4158
+ evidence["finding_codes"] = finding_codes
4159
+ return evidence
4160
+
4161
+
4162
+ def _update_topic_success_claim_findings(
4163
+ transcript: object,
4164
+ truth: _WorkflowTruth,
4165
+ ) -> list[AgentRunReportFinding]:
4166
+ status = truth.workflow_status or truth.progress_status or truth.receipt_status
4167
+ if status not in NON_SUCCESS_STATUSES:
4168
+ return []
4169
+ findings: list[AgentRunReportFinding] = []
4170
+ for event in _iter_transcript_events(transcript):
4171
+ if event.event_type.casefold() != "tool_use" or event.tool_name.casefold() != "update_topic":
4172
+ continue
4173
+ text = "\n".join(
4174
+ str(event.parameters.get(field) or "")
4175
+ for field in ("title", "summary", "strategic_intent")
4176
+ )
4177
+ if not _has_positive_success_claim(text):
4178
+ continue
4179
+ if _update_topic_acknowledges_partial_workflow(text):
4180
+ continue
4181
+ findings.append(
4182
+ AgentRunReportFinding(
4183
+ code=AgentRunReportFindingCode.SUCCESS_CLAIM_MISMATCH,
4184
+ severity="medium",
4185
+ source="transcript",
4186
+ source_field="transcript.tool_use.update_topic",
4187
+ expected=f"update_topic deve comunicar estado parcial/pendente quando workflow_status={status}",
4188
+ actual="success_claim",
4189
+ message="O update_topic usou linguagem de sucesso apesar de o workflow ainda estar parcial ou bloqueado.",
4190
+ next_action=(
4191
+ "Atualizar a comunicação pública para dizer o que foi aplicado e o que ainda falta, "
4192
+ "sem chamar o workflow parcial de sucesso."
4193
+ ),
4194
+ evidence={"workflow_status": status, "text": text},
4195
+ )
4196
+ )
4197
+ return findings
4198
+
4199
+
4200
+ def _update_topic_acknowledges_partial_workflow(text: str) -> bool:
4201
+ folded = _fold_text(text)
4202
+ return any(
4203
+ marker in folded
4204
+ for marker in (
4205
+ "parcial",
4206
+ "pendente",
4207
+ "aguard",
4208
+ "waiting",
4209
+ "bloque",
4210
+ "falta",
4211
+ "restam",
4212
+ "nao conclu",
4213
+ "não conclu",
4214
+ "nao fixou",
4215
+ "não fixou",
4216
+ )
4217
+ )
4218
+
4219
+
4220
+ def _transcript_tool_deviation_context(transcript: object) -> tuple[list[str], list[str]]:
4221
+ probe_types: list[str] = []
4222
+ finding_codes: list[str] = []
4223
+
4224
+ def visit(value: object) -> None:
4225
+ if isinstance(value, list):
4226
+ for item in value:
4227
+ visit(item)
4228
+ return
4229
+ if not isinstance(value, dict):
4230
+ return
4231
+ event_type = str(value.get("type") or "").upper()
4232
+ tool_name = str(value.get("name") or value.get("tool_name") or "").strip()
4233
+ if event_type in {"VIEW_FILE", "LIST_DIRECTORY", "GREP_SEARCH"}:
4234
+ if event_type == "VIEW_FILE":
4235
+ if _is_expected_workflow_skill_read(value) or _is_expected_cpu_sample_read(value):
4236
+ return
4237
+ if _is_agy_background_task_log_read(value):
4238
+ _append_unique(probe_types, "AGY_BACKGROUND_TASK_LOG")
4239
+ return
4240
+ _append_unique(probe_types, event_type)
4241
+ if event_type == "GENERIC" and tool_name == "list_permissions":
4242
+ _append_unique(probe_types, "GENERIC:list_permissions")
4243
+ for child in _transcript_child_containers(value):
4244
+ visit(child)
4245
+
4246
+ visit(transcript)
4247
+ for finding in validate_agent_tool_calls(transcript):
4248
+ code = str(finding.get("code") or "")
4249
+ if not code or code == TOOL_CALL_ERROR:
4250
+ continue
4251
+ if code == PUBLIC_TOOL_TEXT_CONTRACT_VIOLATION:
4252
+ continue
4253
+ _append_unique(finding_codes, code)
4254
+ tool_name = str(finding.get("tool_name") or code)
4255
+ _append_unique(probe_types, tool_name)
4256
+ return probe_types, finding_codes
4257
+
4258
+
4259
+ def _is_expected_workflow_skill_read(event: JsonObject) -> bool:
4260
+ normalized = _transcript_event_file_path(event).replace("\\", "/")
4261
+ if not normalized.endswith("/SKILL.md"):
4262
+ return False
4263
+ return any(
4264
+ marker in normalized
4265
+ for marker in (
4266
+ "/mednotes-fix-wiki/SKILL.md",
4267
+ "/fix-medical-wiki/SKILL.md",
4268
+ "/obsidian-ops/SKILL.md",
4269
+ f"/{SKILLS_RELPATH}/fix-medical-wiki/SKILL.md",
4270
+ )
4271
+ )
4272
+
4273
+
4274
+ def _is_expected_cpu_sample_read(event: JsonObject) -> bool:
4275
+ normalized = _transcript_event_file_path(event).replace("\\", "/")
4276
+ return normalized.endswith("/cpu-samples.jsonl")
4277
+
4278
+
4279
+ def _is_agy_background_task_log_read(event: JsonObject) -> bool:
4280
+ normalized = _transcript_event_file_path(event).replace("\\", "/")
4281
+ return "/.gemini/antigravity-cli/brain/" in normalized and "/.system_generated/tasks/task-" in normalized and normalized.endswith(".log")
4282
+
4283
+
4284
+ def _transcript_event_file_path(event: JsonObject) -> str:
4285
+ path_from_parameters = ""
4286
+ parameters = event.get("parameters")
4287
+ if isinstance(parameters, dict):
4288
+ args = parameters.get("args")
4289
+ if isinstance(args, dict):
4290
+ path_from_parameters = str(args.get("path") or args.get("file_path") or "")
4291
+ path_from_content = _tool_content_file_path(str(event.get("content") or ""))
4292
+ return str(
4293
+ event.get("path")
4294
+ or event.get("file_path")
4295
+ or path_from_parameters
4296
+ or path_from_content
4297
+ or ""
4298
+ )
4299
+
4300
+
4301
+ def _tool_content_file_path(content: str) -> str:
4302
+ match = TOOL_CONTENT_FILE_PATH_RE.search(content)
4303
+ if match is None:
4304
+ return ""
4305
+ return unquote(match.group("path"))
4306
+
4307
+
4308
+ def _final_report_mentions_tool_deviations(final_text: str, deviations: list[str]) -> bool:
4309
+ folded = _fold_text(final_text)
4310
+ if not folded:
4311
+ return False
4312
+ for deviation in deviations:
4313
+ token = _fold_text(deviation)
4314
+ if token in folded:
4315
+ continue
4316
+ if deviation == "VIEW_FILE" and any(marker in folded for marker in ("view_file", "leu skill", "leitura de skill", "read file")):
4317
+ continue
4318
+ if deviation == "AGY_BACKGROUND_TASK_LOG" and any(
4319
+ marker in folded
4320
+ for marker in (
4321
+ "agy background fallback",
4322
+ "task log",
4323
+ "background task",
4324
+ "fallback de background",
4325
+ "log indicado pela ferramenta",
4326
+ "log indicado pela propria ferramenta",
4327
+ "log indicado pela própria ferramenta",
4328
+ "registro indicado pela ferramenta",
4329
+ "registro da ferramenta",
4330
+ "execucao em segundo plano",
4331
+ "execução em segundo plano",
4332
+ "segundo plano",
4333
+ "registro temporario de progresso",
4334
+ "registro temporário de progresso",
4335
+ )
4336
+ ):
4337
+ continue
4338
+ if deviation == "LIST_DIRECTORY" and any(marker in folded for marker in ("list_directory", "listou diretorio", "listagem de diretorio")):
4339
+ continue
4340
+ if deviation == "GREP_SEARCH" and any(marker in folded for marker in ("grep_search", "grep", "busca textual")):
4341
+ continue
4342
+ return False
4343
+ return True
4344
+
4345
+
4346
+ def _append_unique(values: list[str], value: str) -> None:
4347
+ if value not in values:
4348
+ values.append(value)
4349
+
4350
+
4351
+ def _final_report_local_path_leak_findings(final_text: str) -> list[AgentRunReportFinding]:
4352
+ findings: list[AgentRunReportFinding] = []
4353
+ for path in _reported_absolute_paths(final_text):
4354
+ if not _looks_like_local_path_leak(path):
4355
+ continue
4356
+ findings.append(
4357
+ AgentRunReportFinding(
4358
+ code=AgentRunReportFindingCode.FINAL_REPORT_LOCAL_PATH_LEAK,
4359
+ severity="medium",
4360
+ source="final_report",
4361
+ source_field="final_report_text",
4362
+ path=path,
4363
+ artifact_name=path.replace("\\", "/").rsplit("/", 1)[-1],
4364
+ expected="resposta pública sem links file:// nem caminhos locais absolutos",
4365
+ actual=path,
4366
+ message="O relatório final expôs um caminho local da máquina no texto público.",
4367
+ next_action=(
4368
+ "Trocar o caminho local por uma descrição humana do item afetado ou por referência técnica "
4369
+ "apenas no log/JSON do experimento."
4370
+ ),
4371
+ evidence={"path": path},
4372
+ )
4373
+ )
4374
+ return findings
4375
+
4376
+
4377
+ def _looks_like_local_path_leak(path: str) -> bool:
4378
+ normalized = path.replace("\\", "/")
4379
+ if normalized.startswith(("/mednotes:", "/flashcards")):
4380
+ return False
4381
+ return normalized.startswith(("/Users/", "/tmp/", "/private/tmp/", "/private/var/"))
4382
+
4383
+
4384
+ def _invalid_reported_artifact_path_findings(final_text: str) -> list[AgentRunReportFinding]:
4385
+ findings: list[AgentRunReportFinding] = []
4386
+ for path in _reported_absolute_paths(final_text):
4387
+ if not _looks_like_reported_artifact_path(path):
4388
+ continue
4389
+ if Path(path).exists():
4390
+ continue
4391
+ findings.append(
4392
+ AgentRunReportFinding(
4393
+ code=AgentRunReportFindingCode.REPORTED_ARTIFACT_PATH_INVALID,
4394
+ severity="medium",
4395
+ source="filesystem",
4396
+ source_field="final_report_text",
4397
+ path=path,
4398
+ artifact_name=path.replace("\\", "/").rsplit("/", 1)[-1],
4399
+ message="O relatório final citou caminho de artefato ou backup que não existe no filesystem.",
4400
+ next_action="Remover o caminho inventado ou substituir pelo caminho oficial existente antes de concluir a rodada.",
4401
+ )
4402
+ )
4403
+ return findings
4404
+
4405
+
4406
+ def _reported_absolute_paths(final_text: str) -> list[str]:
4407
+ paths: list[str] = []
4408
+ for pattern in (BACKTICK_ABSOLUTE_PATH_RE, FILE_URI_RE, PLAIN_ABSOLUTE_PATH_RE):
4409
+ for match in pattern.finditer(final_text):
4410
+ raw_path = _normalize_reported_path_candidate(unquote(match.group("path")).rstrip(".,;"))
4411
+ if raw_path and raw_path not in paths:
4412
+ paths.append(raw_path)
4413
+ return paths
4414
+
4415
+
4416
+ def _normalize_reported_path_candidate(raw_path: str) -> str:
4417
+ stripped = raw_path.strip()
4418
+ for separator in ("\n", "\r"):
4419
+ if separator in stripped:
4420
+ stripped = stripped.split(separator, 1)[0].strip()
4421
+ if stripped.startswith(("/mednotes:", "/flashcards")):
4422
+ return stripped.split(maxsplit=1)[0]
4423
+ if stripped.endswith(")") and not Path(stripped).exists():
4424
+ markdown_link_candidate = stripped[:-1]
4425
+ if Path(markdown_link_candidate).exists() or _looks_like_reported_artifact_path(markdown_link_candidate):
4426
+ stripped = markdown_link_candidate
4427
+ return stripped
4428
+
4429
+
4430
+ def _looks_like_reported_artifact_path(path: str) -> bool:
4431
+ normalized = path.replace("\\", "/")
4432
+ if normalized.startswith(("/mednotes:", "/flashcards")):
4433
+ return False
4434
+ name = normalized.rsplit("/", 1)[-1]
4435
+ if name.endswith((".json", ".md", ".bak", ".log")):
4436
+ return True
4437
+ return any(
4438
+ marker in normalized
4439
+ for marker in (
4440
+ "/runs/",
4441
+ "/workflow-",
4442
+ "fix-wiki",
4443
+ "link-diagnosis",
4444
+ "run_state",
4445
+ "compact-report",
4446
+ "full-report",
4447
+ )
4448
+ )