mednotes-opencode 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.opencode/agents/med-chat-triager.md +204 -0
- package/.opencode/agents/med-flashcard-maker.md +63 -0
- package/.opencode/agents/med-knowledge-architect.md +230 -0
- package/.opencode/agents/med-link-graph-curator.md +177 -0
- package/.opencode/agents/med-publish-guard.md +62 -0
- package/.opencode/commands/flashcards.md +25 -0
- package/.opencode/commands/mednotes/create.md +25 -0
- package/.opencode/commands/mednotes/enrich.md +27 -0
- package/.opencode/commands/mednotes/fix-wiki.md +27 -0
- package/.opencode/commands/mednotes/history.md +22 -0
- package/.opencode/commands/mednotes/link-body.md +25 -0
- package/.opencode/commands/mednotes/link-related.md +27 -0
- package/.opencode/commands/mednotes/link.md +27 -0
- package/.opencode/commands/mednotes/pdf-library.md +27 -0
- package/.opencode/commands/mednotes/process-chats.md +23 -0
- package/.opencode/commands/mednotes/setup.md +21 -0
- package/.opencode/commands/mednotes/status.md +27 -0
- package/.opencode/commands/mednotes/telemetry.md +27 -0
- package/.opencode/commands/report.md +26 -0
- package/.opencode/mednotes/AGENTS.md +57 -0
- package/.opencode/mednotes/agents/med-chat-triager.md +197 -0
- package/.opencode/mednotes/agents/med-flashcard-maker.md +56 -0
- package/.opencode/mednotes/agents/med-knowledge-architect.md +224 -0
- package/.opencode/mednotes/agents/med-link-graph-curator.md +171 -0
- package/.opencode/mednotes/agents/med-publish-guard.md +55 -0
- package/.opencode/mednotes/contracts/.gitkeep +1 -0
- package/.opencode/mednotes/contracts/agents.json +116 -0
- package/.opencode/mednotes/contracts/opencode-plugin.json +70 -0
- package/.opencode/mednotes/docs/agent-prompt-hardening.md +567 -0
- package/.opencode/mednotes/docs/agent-role-contracts.md +94 -0
- package/.opencode/mednotes/docs/anki-mcp-twenty-rules.md +214 -0
- package/.opencode/mednotes/docs/anki-templates/README.md +39 -0
- package/.opencode/mednotes/docs/anki-templates/cloze.back.html +23 -0
- package/.opencode/mednotes/docs/anki-templates/cloze.front.html +14 -0
- package/.opencode/mednotes/docs/anki-templates/qa.back.html +24 -0
- package/.opencode/mednotes/docs/anki-templates/qa.front.html +14 -0
- package/.opencode/mednotes/docs/anki-templates/style.css +182 -0
- package/.opencode/mednotes/docs/atomicity-splitting-policy.md +113 -0
- package/.opencode/mednotes/docs/extension-docs.md +40 -0
- package/.opencode/mednotes/docs/flashcard-ingestion.md +278 -0
- package/.opencode/mednotes/docs/knowledge-architect.md +208 -0
- package/.opencode/mednotes/docs/merge-policy.md +110 -0
- package/.opencode/mednotes/docs/public-vocabulary.md +104 -0
- package/.opencode/mednotes/docs/semantic-linker.md +141 -0
- package/.opencode/mednotes/docs/taxonomy-policy.md +90 -0
- package/.opencode/mednotes/docs/triage-policy.md +187 -0
- package/.opencode/mednotes/docs/vault-version-control.md +758 -0
- package/.opencode/mednotes/docs/vocabulary-db-recovery.md +58 -0
- package/.opencode/mednotes/docs/workflow-output-contract.md +779 -0
- package/.opencode/mednotes/hooks/hooks.json +79 -0
- package/.opencode/mednotes/package-lock.json +6361 -0
- package/.opencode/mednotes/package.json +15 -0
- package/.opencode/mednotes/pyproject.toml +48 -0
- package/.opencode/mednotes/scripts/bootstrap_windows_python_uv.cmd +13 -0
- package/.opencode/mednotes/scripts/bootstrap_windows_python_uv.ps1 +172 -0
- package/.opencode/mednotes/scripts/enrich_notes.py +23 -0
- package/.opencode/mednotes/scripts/full_reset_windows_python_uv.cmd +13 -0
- package/.opencode/mednotes/scripts/hooks/antigravity_hook_status.mjs +212 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/antigravity.mjs +169 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/harness_payload.mjs +103 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/opencode_plugin.mjs +341 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/adapters/opencode_user_config_sync.mjs +177 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/anki_preflight.mjs +214 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/cli.mjs +143 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/diagnostics.mjs +11 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/domain/agent_directive_core.mjs +160 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/fsm_directive.mjs +1470 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/hook_errors.mjs +120 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/retention.mjs +114 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/runtime.mjs +174 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/telemetry_capture.mjs +511 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook/vault_guard.mjs +624 -0
- package/.opencode/mednotes/scripts/hooks/mednotes_hook.mjs +5 -0
- package/.opencode/mednotes/scripts/mednotes/_runtime_paths.py +24 -0
- package/.opencode/mednotes/scripts/mednotes/anki_model_validator.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/capture_extension_diff.py +1562 -0
- package/.opencode/mednotes/scripts/mednotes/feedback_report.py +16 -0
- package/.opencode/mednotes/scripts/mednotes/flashcard_index.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/flashcard_pipeline.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/flashcard_report.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/flashcard_sources.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/obsidian/README.md +6 -0
- package/.opencode/mednotes/scripts/mednotes/obsidian_note_utils.py +20 -0
- package/.opencode/mednotes/scripts/mednotes/pdf_library/cli.py +16 -0
- package/.opencode/mednotes/scripts/mednotes/project_fsm.py +229 -0
- package/.opencode/mednotes/scripts/mednotes/setup_telemetry_email.py +404 -0
- package/.opencode/mednotes/scripts/mednotes/sync_anki_twenty_rules.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/sync_opencode_user_config.py +36 -0
- package/.opencode/mednotes/scripts/mednotes/wiki/cli.py +20 -0
- package/.opencode/mednotes/scripts/mednotes/wiki_graph.py +18 -0
- package/.opencode/mednotes/scripts/mednotes/wiki_tree.py +134 -0
- package/.opencode/mednotes/scripts/reset_windows_python_uv.ps1 +625 -0
- package/.opencode/mednotes/scripts/run_python.mjs +109 -0
- package/.opencode/mednotes/scripts/vault/vault_commit.ps1 +19 -0
- package/.opencode/mednotes/scripts/vault/vault_commit.sh +18 -0
- package/.opencode/mednotes/scripts/vault/vault_git.ps1 +19 -0
- package/.opencode/mednotes/scripts/vault/vault_git.py +3107 -0
- package/.opencode/mednotes/scripts/vault/vault_git.sh +18 -0
- package/.opencode/mednotes/scripts/vault/vault_precommit.ps1 +19 -0
- package/.opencode/mednotes/scripts/vault/vault_precommit.sh +18 -0
- package/.opencode/mednotes/skills/THIRD_PARTY_NOTICES.md +45 -0
- package/.opencode/mednotes/skills/create-medical-flashcards/SKILL.md +113 -0
- package/.opencode/mednotes/skills/create-medical-note/SKILL.md +90 -0
- package/.opencode/mednotes/skills/enrich-medical-note/SKILL.md +120 -0
- package/.opencode/mednotes/skills/fix-medical-wiki/SKILL.md +559 -0
- package/.opencode/mednotes/skills/link-medical-wiki/SKILL.md +224 -0
- package/.opencode/mednotes/skills/obsidian-cli/SKILL.md +118 -0
- package/.opencode/mednotes/skills/obsidian-markdown/SKILL.md +207 -0
- package/.opencode/mednotes/skills/obsidian-markdown/references/CALLOUTS.md +58 -0
- package/.opencode/mednotes/skills/obsidian-markdown/references/EMBEDS.md +63 -0
- package/.opencode/mednotes/skills/obsidian-markdown/references/PROPERTIES.md +61 -0
- package/.opencode/mednotes/skills/obsidian-ops/SKILL.md +136 -0
- package/.opencode/mednotes/skills/pdf-library/SKILL.md +45 -0
- package/.opencode/mednotes/skills/process-medical-chats/SKILL.md +246 -0
- package/.opencode/mednotes/skills/workflow-report/SKILL.md +100 -0
- package/.opencode/mednotes/src/mednotes/__init__.py +5 -0
- package/.opencode/mednotes/src/mednotes/domains/__init__.py +5 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/README.md +26 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/__init__.py +2 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/build_demo_apkg.py +177 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/contracts.py +385 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/flashcards_machine.py +522 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/fsm.py +817 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/index.py +630 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/install_models.py +445 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/model.py +359 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/obsidian_links.py +135 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/obsidian_note_utils.py +546 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/pipeline.py +580 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/report.py +510 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/sources.py +682 -0
- package/.opencode/mednotes/src/mednotes/domains/flashcards/sync_rules.py +184 -0
- package/.opencode/mednotes/src/mednotes/domains/history/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/history/history_fsm.py +852 -0
- package/.opencode/mednotes/src/mednotes/domains/history/history_machine.py +453 -0
- package/.opencode/mednotes/src/mednotes/domains/setup/__init__.py +7 -0
- package/.opencode/mednotes/src/mednotes/domains/setup/setup_fsm.py +808 -0
- package/.opencode/mednotes/src/mednotes/domains/setup/setup_machine.py +973 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/README.md +64 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/api.py +668 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/batch_state.py +102 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/atomicity/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/atomicity/atomicity.py +877 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/body_link/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/body_link/body_linker.py +1562 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/effect_adapters.py +949 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/effects/fix_wiki_runtime_adapters.py +433 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/coverage.py +413 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/graph.py +396 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/graph/graph_fixes.py +161 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/hygiene/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/hygiene/hygiene.py +483 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/__init__.py +2 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/anchors.py +185 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/__init__.py +0 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/cache.py +223 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/config.py +131 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/download.py +224 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/frontmatter.py +59 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/insert.py +227 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/core/local_import.py +54 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/__init__.py +42 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/web_profiles.py +99 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/web_search.py +203 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/illustrate/sources/wikimedia.py +102 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_db_adapter.mjs +434 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_node_runtime.py +274 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/markdown/markdown_query.py +227 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/artifacts.py +605 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/canonical_merge.py +277 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/markdown_zones.py +85 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/meaning_planner.py +307 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_iter.py +67 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_merge.py +278 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_plan.py +409 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_policy.py +22 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/__init__.py +79 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/fixes.py +264 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/frontmatter.py +435 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/models.py +208 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/prompts.py +37 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/tables.py +236 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/note_style/validate.py +404 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/provenance.py +478 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/raw_chats.py +273 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/notes/sources_backfill.py +235 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/__init__.py +10 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/anchors.py +16 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/captions.py +47 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/cli.py +179 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/cloud.py +52 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/config.py +196 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/context_packets.py +76 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/db.py +81 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/doctor.py +102 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/figure_ids.py +42 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/ingest.py +326 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/insert.py +316 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/mentions.py +57 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/ocr.py +71 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/paths.py +35 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/pdf_engine.py +77 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/schema.py +155 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/search.py +188 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/app.py +89 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/image_backend.py +29 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/pdf/tui/state.py +65 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish.py +1139 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish_receipts.py +365 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/publish/publish_recovery.py +240 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_behavior_corpus.py +2069 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_report_validation.py +4448 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_run_audit.py +852 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/architect_prompt_eval.py +341 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/body_linker_eval.py +240 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/curator_output_validation.py +175 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/curator_prompt_eval.py +865 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/triager_prompt_eval.py +1295 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/related_notes.py +1920 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/related_notes/related_notes_headless.py +1186 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/plan_attestation.py +148 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_receipts.py +360 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_runtime.py +52 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/specialist/specialist_task_runner.py +2470 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/style/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/style/style.py +1952 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/subagents/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/subagents/agents.py +1767 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/alias_projection.py +331 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/link_terms.py +151 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/llm_disambiguation.py +182 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/__init__.py +116 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/audit.py +201 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/migration.py +314 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/normalize.py +72 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/policy.py +135 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/resolve.py +413 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/schema.py +157 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/taxonomy/status.py +137 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_bootstrap.py +509 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_curator_batch.py +1115 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_ingestion.py +632 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_map.py +930 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/vocabulary/vocabulary_recovery.py +1388 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/cli.py +6665 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/common.py +69 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/config.py +210 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/__init__.py +74 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agent_report.py +242 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agent_run_audit.py +196 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/agents.py +601 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/curator.py +256 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/effect_payloads.py +519 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/happy_path.py +190 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/link_git.py +110 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/link_runtime_artifact.py +52 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/note_plan.py +75 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/paths.py +114 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/public_report.py +53 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/publish.py +111 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/raw_coverage.py +217 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes.py +136 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes_headless.py +153 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/related_notes_runtime.py +395 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/schema_registry.py +637 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/specialist.py +432 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/status.py +62 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/style_rewrite.py +568 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/vocabulary_ingestion.py +223 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_blockers.py +510 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_guardrails.py +637 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_outcomes.py +121 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/contracts/workflow_receipts.py +100 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/__main__.py +4 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/cli.py +275 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/__init__.py +2 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/candidates.py +193 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/cli.py +189 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/gemini.py +220 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/inputs.py +120 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/models.py +34 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/parsing.py +48 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/prompts.py +216 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/quality.py +54 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/reporting.py +24 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/runner.py +433 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/utils.py +39 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/enrich/workflow/vault_guard_bridge.py +17 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_context_packets.py +454 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_decision_projection.py +133 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_effects.py +1260 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_fsm.py +2768 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_machine.py +1588 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_plan.py +306 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_primary_objective.py +316 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_problem.py +153 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_receipt_evidence.py +306 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_states.py +290 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/fix_wiki_user_report.py +342 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/fix_wiki/health.py +6332 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_fsm.py +1119 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_git.py +638 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_machine.py +1106 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_retry_governance.py +374 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_runtime_result.py +485 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/link_triggers.py +183 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/linking.py +2758 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/reference_repair.py +718 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link/related_notes_fsm.py +1855 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link_related/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/link_related/link_related_machine.py +834 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/__init__.py +1 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_fsm.py +1592 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_machine.py +3097 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_primary_objective.py +28 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/flows/process_chats/process_chats_runtime_result.py +185 -0
- package/.opencode/mednotes/src/mednotes/domains/wiki/performance.py +97 -0
- package/.opencode/mednotes/src/mednotes/kernel/__init__.py +6 -0
- package/.opencode/mednotes/src/mednotes/kernel/agent_directive.py +336 -0
- package/.opencode/mednotes/src/mednotes/kernel/base.py +51 -0
- package/.opencode/mednotes/src/mednotes/kernel/blockers.py +39 -0
- package/.opencode/mednotes/src/mednotes/kernel/effect_executor.py +55 -0
- package/.opencode/mednotes/src/mednotes/kernel/effect_intent.py +69 -0
- package/.opencode/mednotes/src/mednotes/kernel/effects.py +160 -0
- package/.opencode/mednotes/src/mednotes/kernel/errors.py +38 -0
- package/.opencode/mednotes/src/mednotes/kernel/fsm_event.py +35 -0
- package/.opencode/mednotes/src/mednotes/kernel/fsm_model.py +55 -0
- package/.opencode/mednotes/src/mednotes/kernel/fsm_transition_result.py +75 -0
- package/.opencode/mednotes/src/mednotes/kernel/guardrails.py +188 -0
- package/.opencode/mednotes/src/mednotes/kernel/progress.py +319 -0
- package/.opencode/mednotes/src/mednotes/kernel/public_report.py +346 -0
- package/.opencode/mednotes/src/mednotes/kernel/state_machine.py +164 -0
- package/.opencode/mednotes/src/mednotes/kernel/workflow.py +619 -0
- package/.opencode/mednotes/src/mednotes/platform/__init__.py +5 -0
- package/.opencode/mednotes/src/mednotes/platform/backup_policy.py +382 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/__init__.py +62 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/cli.py +275 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/contracts.py +83 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/core.py +4168 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/integrity.py +989 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/operational_contract.py +2293 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/telemetry.py +875 -0
- package/.opencode/mednotes/src/mednotes/platform/feedback/telemetry_config.py +65 -0
- package/.opencode/mednotes/src/mednotes/platform/opencode_runtime_config.py +182 -0
- package/.opencode/mednotes/src/mednotes/platform/paths/__init__.py +1560 -0
- package/.opencode/mednotes/src/mednotes/platform/secrets.py +89 -0
- package/.opencode/mednotes/src/mednotes/platform/user_config.py +103 -0
- package/.opencode/mednotes/src/mednotes/platform/vault_guard.py +214 -0
- package/.opencode/mednotes/uv.lock +932 -0
- package/.opencode/mednotes.generated.json +395 -0
- package/.opencode/opencode.json +31 -0
- package/.opencode/plugins/mednotes-fsm.mjs +7 -0
- package/.opencode/plugins/mednotes_hook/adapters/antigravity.mjs +169 -0
- package/.opencode/plugins/mednotes_hook/adapters/harness_payload.mjs +103 -0
- package/.opencode/plugins/mednotes_hook/adapters/opencode_plugin.mjs +341 -0
- package/.opencode/plugins/mednotes_hook/adapters/opencode_user_config_sync.mjs +177 -0
- package/.opencode/plugins/mednotes_hook/anki_preflight.mjs +214 -0
- package/.opencode/plugins/mednotes_hook/cli.mjs +143 -0
- package/.opencode/plugins/mednotes_hook/diagnostics.mjs +11 -0
- package/.opencode/plugins/mednotes_hook/domain/agent_directive_core.mjs +160 -0
- package/.opencode/plugins/mednotes_hook/fsm_directive.mjs +1470 -0
- package/.opencode/plugins/mednotes_hook/hook_errors.mjs +120 -0
- package/.opencode/plugins/mednotes_hook/retention.mjs +114 -0
- package/.opencode/plugins/mednotes_hook/runtime.mjs +174 -0
- package/.opencode/plugins/mednotes_hook/telemetry_capture.mjs +511 -0
- package/.opencode/plugins/mednotes_hook/vault_guard.mjs +624 -0
- package/AGENTS.md +57 -0
- package/README.md +194 -0
- package/adapters/antigravity/agents.json +80 -0
- package/adapters/antigravity/templates/med-chat-triager.md +214 -0
- package/adapters/antigravity/templates/med-flashcard-maker.md +72 -0
- package/adapters/antigravity/templates/med-knowledge-architect.md +241 -0
- package/adapters/antigravity/templates/med-link-graph-curator.md +187 -0
- package/adapters/antigravity/templates/med-publish-guard.md +71 -0
- package/adapters/gemini-cli/gemini-extension.json +14 -0
- package/adapters/gemini-cli/package.json +15 -0
- package/adapters/gemini-cli/pyproject.toml +48 -0
- package/bin/mednotes-opencode.mjs +155 -0
- package/contracts/agents.json +116 -0
- package/core/agents/med-chat-triager.md +197 -0
- package/core/agents/med-flashcard-maker.md +56 -0
- package/core/agents/med-knowledge-architect.md +224 -0
- package/core/agents/med-link-graph-curator.md +171 -0
- package/core/agents/med-publish-guard.md +55 -0
- package/core/commands/flashcards.toml +22 -0
- package/core/commands/mednotes/create.toml +22 -0
- package/core/commands/mednotes/enrich.toml +24 -0
- package/core/commands/mednotes/fix-wiki.toml +24 -0
- package/core/commands/mednotes/history.toml +19 -0
- package/core/commands/mednotes/link-body.toml +22 -0
- package/core/commands/mednotes/link-related.toml +24 -0
- package/core/commands/mednotes/link.toml +24 -0
- package/core/commands/mednotes/pdf-library.toml +24 -0
- package/core/commands/mednotes/process-chats.toml +20 -0
- package/core/commands/mednotes/setup.toml +18 -0
- package/core/commands/mednotes/status.toml +24 -0
- package/core/commands/mednotes/telemetry.toml +24 -0
- package/core/commands/report.toml +23 -0
- package/core/skills/THIRD_PARTY_NOTICES.md +45 -0
- package/core/skills/create-medical-flashcards/SKILL.md +113 -0
- package/core/skills/create-medical-note/SKILL.md +90 -0
- package/core/skills/enrich-medical-note/SKILL.md +120 -0
- package/core/skills/fix-medical-wiki/SKILL.md +559 -0
- package/core/skills/link-medical-wiki/SKILL.md +224 -0
- package/core/skills/obsidian-cli/SKILL.md +118 -0
- package/core/skills/obsidian-markdown/SKILL.md +207 -0
- package/core/skills/obsidian-markdown/references/CALLOUTS.md +58 -0
- package/core/skills/obsidian-markdown/references/EMBEDS.md +63 -0
- package/core/skills/obsidian-markdown/references/PROPERTIES.md +61 -0
- package/core/skills/obsidian-ops/SKILL.md +136 -0
- package/core/skills/pdf-library/SKILL.md +45 -0
- package/core/skills/process-medical-chats/SKILL.md +246 -0
- package/core/skills/workflow-report/SKILL.md +100 -0
- package/package.json +45 -0
package/.opencode/mednotes/src/mednotes/domains/wiki/capabilities/quality/agent_report_validation.py
ADDED
|
@@ -0,0 +1,4448 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
import shlex
|
|
6
|
+
import unicodedata
|
|
7
|
+
from collections.abc import Iterable
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import cast
|
|
10
|
+
from urllib.parse import unquote
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr, ValidationError
|
|
13
|
+
|
|
14
|
+
from mednotes.domains.wiki.capabilities.quality.agent_run_audit import audit_agent_transcript
|
|
15
|
+
from mednotes.domains.wiki.common import SKILLS_RELPATH
|
|
16
|
+
from mednotes.domains.wiki.contracts.agent_report import (
|
|
17
|
+
AgentRunReportFinding,
|
|
18
|
+
AgentRunReportFindingCode,
|
|
19
|
+
AgentRunReportSeverity,
|
|
20
|
+
AgentRunReportValidation,
|
|
21
|
+
FixWikiPrimaryObjectiveSummary,
|
|
22
|
+
ProcessChatsPrimaryObjectiveSummary,
|
|
23
|
+
StyleRewriteAtomicApplyResult,
|
|
24
|
+
)
|
|
25
|
+
from mednotes.domains.wiki.contracts.agent_run_audit import (
|
|
26
|
+
AuditWorkflow,
|
|
27
|
+
WorkflowDeviationFinding,
|
|
28
|
+
WorkflowTranscriptAuditResult,
|
|
29
|
+
)
|
|
30
|
+
from mednotes.domains.wiki.contracts.happy_path import happy_path_metrics_from_findings
|
|
31
|
+
from mednotes.domains.wiki.contracts.public_report import WorkflowPublicObjectiveAnswer, WorkflowPublicReportViewModel
|
|
32
|
+
from mednotes.domains.wiki.flows.fix_wiki.fix_wiki_primary_objective import fix_wiki_primary_objective_summary
|
|
33
|
+
from mednotes.domains.wiki.flows.process_chats.process_chats_primary_objective import (
|
|
34
|
+
process_chats_primary_objective_summary,
|
|
35
|
+
)
|
|
36
|
+
from mednotes.kernel.agent_directive import AgentDirective, AgentEffect
|
|
37
|
+
from mednotes.kernel.base import JsonObject, JsonObjectAdapter
|
|
38
|
+
from mednotes.kernel.effects import WorkflowEffectKind
|
|
39
|
+
from mednotes.kernel.public_report import WorkflowPrimaryObjectiveSummary, WorkflowPublicReport
|
|
40
|
+
from mednotes.platform.feedback.operational_contract import (
|
|
41
|
+
PUBLIC_TOOL_TEXT_CONTRACT_VIOLATION,
|
|
42
|
+
TOOL_CALL_ERROR,
|
|
43
|
+
validate_agent_tool_calls,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
NON_SUCCESS_STATUSES = {
|
|
47
|
+
"blocked",
|
|
48
|
+
"failed",
|
|
49
|
+
"waiting_agent",
|
|
50
|
+
"waiting_external",
|
|
51
|
+
"waiting_human",
|
|
52
|
+
"completed_with_link_blockers",
|
|
53
|
+
}
|
|
54
|
+
FSM_FIRST_SCHEMAS = {
|
|
55
|
+
"medical-notes-workbench.fix-wiki-fsm-result.v1",
|
|
56
|
+
"medical-notes-workbench.flashcards-fsm-result.v1",
|
|
57
|
+
"medical-notes-workbench.link-fsm-result.v1",
|
|
58
|
+
"medical-notes-workbench.link-related-fsm-result.v1",
|
|
59
|
+
"medical-notes-workbench.process-chats-fsm-result.v1",
|
|
60
|
+
"medical-notes-workbench.setup-fsm-result.v1",
|
|
61
|
+
"medical-notes-workbench.history-fsm-result.v1",
|
|
62
|
+
}
|
|
63
|
+
PrimaryObjectiveSummary = (
|
|
64
|
+
FixWikiPrimaryObjectiveSummary | ProcessChatsPrimaryObjectiveSummary | WorkflowPrimaryObjectiveSummary
|
|
65
|
+
)
|
|
66
|
+
STYLE_REWRITE_APPLY_RESULT_SCHEMAS = {
|
|
67
|
+
"medical-notes-workbench.style-rewrite-atomic-apply-agent-stdout.v1",
|
|
68
|
+
"medical-notes-workbench.style-rewrite-atomic-apply-result.v1",
|
|
69
|
+
}
|
|
70
|
+
KNOWN_WORKFLOW_STATUSES = NON_SUCCESS_STATUSES | {
|
|
71
|
+
"no_pending",
|
|
72
|
+
"completed",
|
|
73
|
+
"completed_with_warnings",
|
|
74
|
+
"ready_to_publish",
|
|
75
|
+
"published",
|
|
76
|
+
"preview_ready",
|
|
77
|
+
"ready",
|
|
78
|
+
"running",
|
|
79
|
+
}
|
|
80
|
+
TRANSCRIPT_AUDIT_FINDING_CODE_MAP: dict[str, AgentRunReportFindingCode] = {
|
|
81
|
+
"agent.transcript_unreadable": AgentRunReportFindingCode.TRANSCRIPT_UNREADABLE,
|
|
82
|
+
"agent.subagent_raw_content_contract_violation": (
|
|
83
|
+
AgentRunReportFindingCode.SUBAGENT_RAW_CONTENT_CONTRACT_VIOLATION
|
|
84
|
+
),
|
|
85
|
+
"agent.parent_canonical_artifact_write_before_subagent": (
|
|
86
|
+
AgentRunReportFindingCode.PARENT_CANONICAL_ARTIFACT_WRITE_BEFORE_SUBAGENT
|
|
87
|
+
),
|
|
88
|
+
"agent.parent_canonical_artifact_write_after_subagent": (
|
|
89
|
+
AgentRunReportFindingCode.PARENT_CANONICAL_ARTIFACT_WRITE_AFTER_SUBAGENT
|
|
90
|
+
),
|
|
91
|
+
"agent.parallel_human_decision_backlog": AgentRunReportFindingCode.PARALLEL_HUMAN_DECISION_BACKLOG,
|
|
92
|
+
"agent.agy_materialized_skill_misclassified_as_stale": (
|
|
93
|
+
AgentRunReportFindingCode.AGY_MATERIALIZED_SKILL_MISCLASSIFIED_AS_STALE
|
|
94
|
+
),
|
|
95
|
+
"agent.recoverable_tool_error_observed": AgentRunReportFindingCode.RECOVERABLE_TOOL_ERROR_OBSERVED,
|
|
96
|
+
}
|
|
97
|
+
GLOBAL_SUCCESS_CONTEXT_MARKERS = {
|
|
98
|
+
"workflow",
|
|
99
|
+
"fluxo",
|
|
100
|
+
"wiki",
|
|
101
|
+
"publicacao",
|
|
102
|
+
"publicou",
|
|
103
|
+
"publicad",
|
|
104
|
+
"conclu",
|
|
105
|
+
"pronto",
|
|
106
|
+
"completo",
|
|
107
|
+
"final",
|
|
108
|
+
}
|
|
109
|
+
SCOPED_SUCCESS_CONTEXT_MARKERS = {
|
|
110
|
+
"reparos deterministic",
|
|
111
|
+
"reparo deterministic",
|
|
112
|
+
"reparos iniciais",
|
|
113
|
+
"reparo inicial",
|
|
114
|
+
"reparos automatic",
|
|
115
|
+
"reparo automatic",
|
|
116
|
+
"etapa deterministic",
|
|
117
|
+
"related notes",
|
|
118
|
+
"notas relacionadas",
|
|
119
|
+
"grafo",
|
|
120
|
+
"body links",
|
|
121
|
+
"links corporais",
|
|
122
|
+
}
|
|
123
|
+
SUCCESS_CLAIM_RE = re.compile(
|
|
124
|
+
r"\b("
|
|
125
|
+
r"sucesso|conclu[ií]do|concluiu|completed|success|sem\s+desvios|sem\s+problemas|pronto"
|
|
126
|
+
r")\b",
|
|
127
|
+
re.IGNORECASE,
|
|
128
|
+
)
|
|
129
|
+
UNSUPPORTED_BLOCKER_CLAIM_RE = re.compile(
|
|
130
|
+
r"\b("
|
|
131
|
+
r"bloquead\w*|blocked|pausad\w*|interrompid\w*|bloqueio\s+preventivo|"
|
|
132
|
+
r"duplicidade|duplicate|collision|colis[aã]o"
|
|
133
|
+
r")\b",
|
|
134
|
+
re.IGNORECASE,
|
|
135
|
+
)
|
|
136
|
+
NO_TOOL_DEVIATION_CLAIM_RE = re.compile(
|
|
137
|
+
r"(desvios?\s+do\s+happy\s+path\s*:\s*nenhum|nenhum\s+desvio|sem\s+desvios?|"
|
|
138
|
+
r"n[aã]o\s+houve\s+desvios?|houve\s+desvios?[^?]{0,100}\?\s*n[aã]o\s+houve|"
|
|
139
|
+
r"n[aã]o\s+foram\s+executados\s+probes?|no\s+deviations?|no\s+probes?)",
|
|
140
|
+
re.IGNORECASE,
|
|
141
|
+
)
|
|
142
|
+
SPECIALIST_REWRITE_COUNT_CLAIM_RE = re.compile(
|
|
143
|
+
r"\b(?P<count>\d+)\s+"
|
|
144
|
+
r"(?:nota(?:\(s\))?s?|arquivo(?:\(s\))?s?)"
|
|
145
|
+
r"[^.!?\n]{0,80}\b(?:reescrit|rewrite)",
|
|
146
|
+
re.IGNORECASE,
|
|
147
|
+
)
|
|
148
|
+
RUNTIME_CONTINUATION_UNAVAILABLE_RE = re.compile(
|
|
149
|
+
r"(runtime\s+headless|headless|cli)[^.!?\n]{0,160}"
|
|
150
|
+
r"(n[aã]o\s+possui|sem|lacks?|unavailable|indispon[ií]vel)[^.!?\n]{0,160}"
|
|
151
|
+
r"(invoke_agent|ferramenta|tool|subagente|subagent|med-knowledge-architect)",
|
|
152
|
+
re.IGNORECASE,
|
|
153
|
+
)
|
|
154
|
+
STATUS_VALUE_RE = re.compile(r"\b[a-z][a-z0-9_]*\b")
|
|
155
|
+
NEGATED_SUCCESS_PREFIX_RE = re.compile(r"\b(n[aã]o|not|never|sem)\b[\w\s]{0,24}$", re.IGNORECASE)
|
|
156
|
+
NEGATED_SUCCESS_SENTENCE_RE = re.compile(
|
|
157
|
+
r"\b(n[aã]o|not|never|sem)\b[^.!?\n]{0,160}"
|
|
158
|
+
r"\b(sucesso|success|conclu[ií]do|concluiu|completed|pronto|completo)\b",
|
|
159
|
+
re.IGNORECASE,
|
|
160
|
+
)
|
|
161
|
+
SCOPED_SUCCESS_WITH_GLOBAL_BLOCKER_RE = re.compile(
|
|
162
|
+
r"\b(mas|por[eé]m|contudo)\b[^.!?\n]{0,180}"
|
|
163
|
+
r"\b(workflow|fluxo|wiki)\b[^.!?\n]{0,180}"
|
|
164
|
+
r"\b(terminou|ficou|permanece|aguarda|bloque\w*|interromp\w*|waiting_agent|waiting_external|pendente)\b",
|
|
165
|
+
re.IGNORECASE,
|
|
166
|
+
)
|
|
167
|
+
BACKTICK_ABSOLUTE_PATH_RE = re.compile(r"`(?P<path>/[^`]+)`")
|
|
168
|
+
FILE_URI_RE = re.compile(r"file://(?P<path>/[^\s\]`>\"']+)")
|
|
169
|
+
PLAIN_ABSOLUTE_PATH_RE = re.compile(r"(?<![\w:/])(?P<path>/(?:Users|tmp|private/tmp)/[^\s)\]`>\"']+)")
|
|
170
|
+
TOOL_CONTENT_FILE_PATH_RE = re.compile(r"File Path:\s*`file://(?P<path>[^`]+)`")
|
|
171
|
+
PUBLIC_OUTPUT_FORBIDDEN_TERMS = (
|
|
172
|
+
"uv run",
|
|
173
|
+
"--apply",
|
|
174
|
+
"wiki/cli.py",
|
|
175
|
+
"--json",
|
|
176
|
+
"--dry-run",
|
|
177
|
+
"blocked_reason",
|
|
178
|
+
"receipt",
|
|
179
|
+
"recibo",
|
|
180
|
+
"schema",
|
|
181
|
+
"hash",
|
|
182
|
+
"fix-wiki --apply",
|
|
183
|
+
"finalize-agy-specialist-task",
|
|
184
|
+
"run-linker",
|
|
185
|
+
"resource_guard_active",
|
|
186
|
+
"compact-report",
|
|
187
|
+
"full-report",
|
|
188
|
+
"workflow_exit_code",
|
|
189
|
+
"código de saída",
|
|
190
|
+
"codigo de saida",
|
|
191
|
+
"código de retorno",
|
|
192
|
+
"codigo de retorno",
|
|
193
|
+
"exit code",
|
|
194
|
+
"returncode",
|
|
195
|
+
"background task",
|
|
196
|
+
"agy background fallback",
|
|
197
|
+
"harness externo",
|
|
198
|
+
"versionamento",
|
|
199
|
+
"workflow",
|
|
200
|
+
"linker",
|
|
201
|
+
"atestação",
|
|
202
|
+
"atestacao",
|
|
203
|
+
"homologado",
|
|
204
|
+
"logs",
|
|
205
|
+
"progress_view_model",
|
|
206
|
+
"process_chats_terminal_state",
|
|
207
|
+
"specialist_model_quota_exhausted",
|
|
208
|
+
"specialist_model_capacity_unavailable",
|
|
209
|
+
"guard_lease_mismatch",
|
|
210
|
+
"run_id",
|
|
211
|
+
"i am waiting",
|
|
212
|
+
"you will be notified",
|
|
213
|
+
"waiting for completion",
|
|
214
|
+
"no_pending",
|
|
215
|
+
)
|
|
216
|
+
TRANSCRIPT_CHILD_CONTAINER_KEYS = (
|
|
217
|
+
"$set",
|
|
218
|
+
"content",
|
|
219
|
+
"events",
|
|
220
|
+
"items",
|
|
221
|
+
"messages",
|
|
222
|
+
"records",
|
|
223
|
+
"response",
|
|
224
|
+
"responses",
|
|
225
|
+
"result",
|
|
226
|
+
"toolCalls",
|
|
227
|
+
"tool_calls",
|
|
228
|
+
"transcript",
|
|
229
|
+
)
|
|
230
|
+
CPU_SAMPLE_SCHEMA = "medical-notes-workbench.controlled-experiment-cpu-sample.v1"
|
|
231
|
+
HIGH_CPU_PERCENT_THRESHOLD = 85.0
|
|
232
|
+
HIGH_CPU_MIN_SAMPLE_COUNT = 2
|
|
233
|
+
HIGH_CPU_MIN_SPAN_SECONDS = 10.0
|
|
234
|
+
AGY_SELECTED_MODEL_RE = re.compile(r'Propagating selected model override to backend:\s+label="(?P<label>[^"]+)"')
|
|
235
|
+
FLASH_MODEL_RE = re.compile(r"\bflash\b|gemini[-\s\d.]*flash", re.IGNORECASE)
|
|
236
|
+
PROCESS_CHATS_WIKI_DELETION_RE = re.compile(
|
|
237
|
+
r"(?m)^\s*(?:D|deleted:)\s+(?P<path>.*(?:Wiki_Medicina|wiki)[^\n]*\.md)\s*$",
|
|
238
|
+
re.IGNORECASE,
|
|
239
|
+
)
|
|
240
|
+
ROOT_CAUSE_PUBLIC_LABELS: dict[str, tuple[str, ...]] = {
|
|
241
|
+
"environment_blocker.windows_path_or_venv": (
|
|
242
|
+
"ambiente Python",
|
|
243
|
+
"Acesso negado",
|
|
244
|
+
"venv",
|
|
245
|
+
),
|
|
246
|
+
"specialist_model_capacity_unavailable": (
|
|
247
|
+
"cota",
|
|
248
|
+
"quota",
|
|
249
|
+
"capacidade",
|
|
250
|
+
"modelo especialista",
|
|
251
|
+
),
|
|
252
|
+
"specialist_model_quota_exhausted": (
|
|
253
|
+
"cota",
|
|
254
|
+
"quota",
|
|
255
|
+
"capacidade",
|
|
256
|
+
"modelo especialista",
|
|
257
|
+
),
|
|
258
|
+
"vocabulary_curation_required": (
|
|
259
|
+
"curadoria de vocabulário",
|
|
260
|
+
"vocabulary curation",
|
|
261
|
+
"vocabulário",
|
|
262
|
+
),
|
|
263
|
+
}
|
|
264
|
+
LEGITIMATE_SPECIALIST_STOP_REASONS = {
|
|
265
|
+
"rewrite_output_validation_errors",
|
|
266
|
+
"specialist_model_capacity_unavailable",
|
|
267
|
+
"specialist_model_quota_exhausted",
|
|
268
|
+
"style_rewrite_agent_contract_violation",
|
|
269
|
+
"style_rewrite_output_missing",
|
|
270
|
+
"style_rewrite_still_requires_rewrite",
|
|
271
|
+
"target_hash_changed",
|
|
272
|
+
}
|
|
273
|
+
WAITING_AGENT_CONTINUATION_MARKERS = (
|
|
274
|
+
"med-knowledge-architect",
|
|
275
|
+
"finalize-agy-specialist-task",
|
|
276
|
+
"finalize-opencode-specialist-task",
|
|
277
|
+
"invoke_agent",
|
|
278
|
+
"define_subagent",
|
|
279
|
+
"invoke_subagent",
|
|
280
|
+
"finalize-style-rewrite-output",
|
|
281
|
+
"collect-style-rewrite-outputs",
|
|
282
|
+
"apply-specialist-style-rewrite",
|
|
283
|
+
"apply-style-rewrite",
|
|
284
|
+
)
|
|
285
|
+
NON_ERROR_DECISION_REASON_CODES = {
|
|
286
|
+
"style_rewrite_ready",
|
|
287
|
+
}
|
|
288
|
+
NON_SUCCESS_HUMAN_STATUS_MARKERS: dict[str, tuple[str, ...]] = {
|
|
289
|
+
"blocked": (
|
|
290
|
+
"bloquead",
|
|
291
|
+
"nao concluiu",
|
|
292
|
+
"nao foi conclu",
|
|
293
|
+
"nao fixou",
|
|
294
|
+
"pendente",
|
|
295
|
+
),
|
|
296
|
+
"failed": (
|
|
297
|
+
"falhou",
|
|
298
|
+
"erro",
|
|
299
|
+
"nao concluiu",
|
|
300
|
+
"nao foi conclu",
|
|
301
|
+
),
|
|
302
|
+
"waiting_agent": (
|
|
303
|
+
"aguard",
|
|
304
|
+
"bloquead",
|
|
305
|
+
"cota",
|
|
306
|
+
"quota",
|
|
307
|
+
"modelo especialista",
|
|
308
|
+
"nao fixou",
|
|
309
|
+
"nao foi fixada por completo",
|
|
310
|
+
"parcial",
|
|
311
|
+
"pendente",
|
|
312
|
+
"reescrita especializada",
|
|
313
|
+
),
|
|
314
|
+
"waiting_external": (
|
|
315
|
+
"aguard",
|
|
316
|
+
"bloquead",
|
|
317
|
+
"cota",
|
|
318
|
+
"quota",
|
|
319
|
+
"capacidade",
|
|
320
|
+
"modelo especialista",
|
|
321
|
+
"nao fixou",
|
|
322
|
+
"pendente",
|
|
323
|
+
"sem capacidade",
|
|
324
|
+
),
|
|
325
|
+
"waiting_human": (
|
|
326
|
+
"decisao humana",
|
|
327
|
+
"escolha humana",
|
|
328
|
+
"confirmacao",
|
|
329
|
+
"confirmar",
|
|
330
|
+
"aguard",
|
|
331
|
+
"pendente",
|
|
332
|
+
),
|
|
333
|
+
"completed_with_link_blockers": (
|
|
334
|
+
"link",
|
|
335
|
+
"grafo",
|
|
336
|
+
"bloquead",
|
|
337
|
+
"pendente",
|
|
338
|
+
),
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def validate_agent_run_report(
|
|
343
|
+
*,
|
|
344
|
+
workflow_payload: JsonObject,
|
|
345
|
+
transcript: object | None = None,
|
|
346
|
+
final_report_text: str | None = None,
|
|
347
|
+
runtime_log_text: str | None = None,
|
|
348
|
+
workflow_payload_path: Path | None = None,
|
|
349
|
+
transcript_path: Path | None = None,
|
|
350
|
+
final_report_path: Path | None = None,
|
|
351
|
+
runtime_log_paths: list[Path] | None = None,
|
|
352
|
+
) -> AgentRunReportValidation:
|
|
353
|
+
"""Validate the agent's final report against the workflow's typed truth."""
|
|
354
|
+
|
|
355
|
+
raw_payload = _json_object(workflow_payload)
|
|
356
|
+
agent_directive_findings = _agent_directive_contract_findings(raw_payload)
|
|
357
|
+
payload = _payload_with_safe_diagnostic_context(raw_payload)
|
|
358
|
+
truth = _workflow_truth(payload)
|
|
359
|
+
primary_objective = _workflow_primary_objective_summary(payload)
|
|
360
|
+
final_text = _final_report_text(final_report_text=final_report_text, transcript=transcript)
|
|
361
|
+
findings: list[AgentRunReportFinding] = list(agent_directive_findings)
|
|
362
|
+
findings.extend(_legacy_specialist_route_findings(payload))
|
|
363
|
+
final_report_present = bool(final_text)
|
|
364
|
+
|
|
365
|
+
findings.extend(_public_output_findings(payload))
|
|
366
|
+
findings.extend(_public_report_pending_effect_success_findings(payload))
|
|
367
|
+
findings.extend(_stale_next_action_findings(payload))
|
|
368
|
+
if primary_objective is None:
|
|
369
|
+
findings.extend(_missing_fsm_primary_objective_findings(payload))
|
|
370
|
+
if final_text:
|
|
371
|
+
findings.extend(_final_report_permission_findings(payload, final_text))
|
|
372
|
+
incomplete_findings = _final_report_incomplete_findings(final_text, truth)
|
|
373
|
+
findings.extend(incomplete_findings)
|
|
374
|
+
if incomplete_findings:
|
|
375
|
+
final_report_present = False
|
|
376
|
+
findings.extend(_final_report_internal_term_findings(final_text))
|
|
377
|
+
findings.extend(_status_mismatch_findings(final_text, truth, primary_objective))
|
|
378
|
+
findings.extend(_unsupported_blocker_claim_findings(final_text, truth))
|
|
379
|
+
findings.extend(_success_claim_findings(final_text, truth))
|
|
380
|
+
findings.extend(_omitted_status_findings(final_text, truth))
|
|
381
|
+
findings.extend(_error_context_root_cause_findings(payload, final_text))
|
|
382
|
+
findings.extend(_final_report_local_path_leak_findings(final_text))
|
|
383
|
+
findings.extend(_invalid_reported_artifact_path_findings(final_text))
|
|
384
|
+
findings.extend(_workflow_payload_omission_findings(payload, final_text, transcript))
|
|
385
|
+
if primary_objective is not None:
|
|
386
|
+
findings.extend(_primary_objective_payload_findings(payload, primary_objective))
|
|
387
|
+
findings.extend(_primary_objective_success_claim_findings(final_text, primary_objective))
|
|
388
|
+
findings.extend(_primary_objective_omission_findings(final_text, primary_objective))
|
|
389
|
+
elif primary_objective is not None:
|
|
390
|
+
findings.extend(_primary_objective_payload_findings(payload, primary_objective))
|
|
391
|
+
findings.extend(_workflow_payload_consistency_findings(payload))
|
|
392
|
+
findings.extend(_runtime_log_findings(payload, runtime_log_text or "", final_text, transcript))
|
|
393
|
+
if transcript is not None:
|
|
394
|
+
findings.extend(_tool_payload_contract_findings(transcript))
|
|
395
|
+
findings.extend(_omitted_tool_error_findings(transcript, final_text))
|
|
396
|
+
findings.extend(_omitted_tool_deviation_findings(transcript, final_text))
|
|
397
|
+
findings.extend(_blocked_workflow_tool_result_findings(transcript, final_text))
|
|
398
|
+
findings.extend(_update_topic_success_claim_findings(transcript, truth))
|
|
399
|
+
findings.extend(_transcript_specialist_model_policy_findings(payload, transcript))
|
|
400
|
+
findings.extend(_specialist_completed_apply_step_findings(transcript))
|
|
401
|
+
findings.extend(_opencode_specialist_receipt_step_findings(payload, transcript))
|
|
402
|
+
findings.extend(_style_rewrite_batch_progress_checkpoint_findings(payload, transcript))
|
|
403
|
+
findings.extend(_specialist_rewrite_count_findings(transcript, final_text))
|
|
404
|
+
findings.extend(
|
|
405
|
+
_waiting_agent_continuation_findings(
|
|
406
|
+
payload,
|
|
407
|
+
transcript,
|
|
408
|
+
final_text,
|
|
409
|
+
runtime_log_text or "",
|
|
410
|
+
)
|
|
411
|
+
)
|
|
412
|
+
findings.extend(
|
|
413
|
+
_ready_continuation_stopped_findings(
|
|
414
|
+
payload,
|
|
415
|
+
transcript,
|
|
416
|
+
final_text,
|
|
417
|
+
runtime_log_text or "",
|
|
418
|
+
)
|
|
419
|
+
)
|
|
420
|
+
findings.extend(_waiting_external_continuation_attempt_findings(payload, transcript))
|
|
421
|
+
transcript_audit = _audit_agent_transcript_from_paths(
|
|
422
|
+
truth=truth,
|
|
423
|
+
workflow_payload_path=workflow_payload_path,
|
|
424
|
+
transcript_path=transcript_path,
|
|
425
|
+
final_report_path=final_report_path,
|
|
426
|
+
runtime_log_paths=runtime_log_paths or [],
|
|
427
|
+
)
|
|
428
|
+
if transcript_audit is not None:
|
|
429
|
+
findings.extend(_transcript_audit_findings(transcript_audit))
|
|
430
|
+
|
|
431
|
+
status = "blocked" if findings else "completed"
|
|
432
|
+
happy_path_metrics = happy_path_metrics_from_findings(
|
|
433
|
+
workflow=truth.workflow or _optional_text(payload, "workflow"),
|
|
434
|
+
run_id=truth.run_id or str(payload.get("run_id") or "unknown"),
|
|
435
|
+
findings=findings,
|
|
436
|
+
primary_objective_completed=_primary_objective_completed(primary_objective),
|
|
437
|
+
legitimate_stop_reason=_legitimate_stop_reason(payload, primary_objective),
|
|
438
|
+
)
|
|
439
|
+
public_report_view_model = _public_report_view_model(payload, primary_objective)
|
|
440
|
+
return AgentRunReportValidation(
|
|
441
|
+
status=status,
|
|
442
|
+
workflow=truth.workflow,
|
|
443
|
+
run_id=truth.run_id,
|
|
444
|
+
workflow_status=truth.workflow_status,
|
|
445
|
+
workflow_phase=truth.workflow_phase,
|
|
446
|
+
receipt_status=truth.receipt_status,
|
|
447
|
+
blocked_reason="agent_final_report_contract_violation" if findings else "",
|
|
448
|
+
next_action=(
|
|
449
|
+
"Corrigir o relatório final do agente para refletir o payload oficial, reportar erros de tool "
|
|
450
|
+
"e remover caminhos de artefatos inexistentes antes de concluir a rodada."
|
|
451
|
+
if findings
|
|
452
|
+
else ""
|
|
453
|
+
),
|
|
454
|
+
final_report_present=final_report_present,
|
|
455
|
+
transcript_present=transcript is not None or transcript_path is not None,
|
|
456
|
+
workflow_payload_path=str(workflow_payload_path) if workflow_payload_path is not None else "",
|
|
457
|
+
transcript_path=str(transcript_path) if transcript_path is not None else "",
|
|
458
|
+
final_report_path=str(final_report_path) if final_report_path is not None else "",
|
|
459
|
+
primary_objective=primary_objective,
|
|
460
|
+
happy_path_metrics=happy_path_metrics,
|
|
461
|
+
public_report_view_model=public_report_view_model,
|
|
462
|
+
transcript_audit=transcript_audit,
|
|
463
|
+
finding_count=len(findings),
|
|
464
|
+
findings=findings,
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _audit_agent_transcript_from_paths(
|
|
469
|
+
*,
|
|
470
|
+
truth: _WorkflowTruth,
|
|
471
|
+
workflow_payload_path: Path | None,
|
|
472
|
+
transcript_path: Path | None,
|
|
473
|
+
final_report_path: Path | None,
|
|
474
|
+
runtime_log_paths: list[Path],
|
|
475
|
+
) -> WorkflowTranscriptAuditResult | None:
|
|
476
|
+
if transcript_path is None:
|
|
477
|
+
return None
|
|
478
|
+
return audit_agent_transcript(
|
|
479
|
+
transcript_path=transcript_path,
|
|
480
|
+
workflow=_audit_workflow(truth.workflow),
|
|
481
|
+
workflow_payload_path=workflow_payload_path,
|
|
482
|
+
final_report_path=final_report_path,
|
|
483
|
+
runtime_log_paths=runtime_log_paths,
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _audit_workflow(workflow: str) -> AuditWorkflow:
|
|
488
|
+
normalized = workflow.strip().lower()
|
|
489
|
+
if normalized.startswith("/"):
|
|
490
|
+
normalized = normalized[1:]
|
|
491
|
+
if normalized.startswith("mednotes:"):
|
|
492
|
+
normalized = normalized.split(":", 1)[1]
|
|
493
|
+
normalized = normalized.replace("_", "-")
|
|
494
|
+
if normalized in {"process-chats", "fix-wiki", "link"}:
|
|
495
|
+
return cast(AuditWorkflow, normalized)
|
|
496
|
+
return "unknown"
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def _transcript_audit_findings(
|
|
500
|
+
transcript_audit: WorkflowTranscriptAuditResult,
|
|
501
|
+
) -> list[AgentRunReportFinding]:
|
|
502
|
+
return [_transcript_audit_finding(audit_finding) for audit_finding in transcript_audit.findings]
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def _transcript_audit_finding(audit_finding: WorkflowDeviationFinding) -> AgentRunReportFinding:
|
|
506
|
+
next_action = audit_finding.promotion_gate or str(audit_finding.recommended_action)
|
|
507
|
+
return AgentRunReportFinding(
|
|
508
|
+
code=_agent_report_code_for_audit(audit_finding),
|
|
509
|
+
severity=_agent_report_severity_for_audit(audit_finding),
|
|
510
|
+
source="transcript_audit",
|
|
511
|
+
source_field="transcript_audit.findings",
|
|
512
|
+
expected=audit_finding.expected_contract,
|
|
513
|
+
actual=audit_finding.observed_behavior,
|
|
514
|
+
message=audit_finding.observed_behavior,
|
|
515
|
+
next_action=next_action,
|
|
516
|
+
evidence={
|
|
517
|
+
"evidence_ref": audit_finding.evidence_ref,
|
|
518
|
+
"recommended_action": audit_finding.recommended_action,
|
|
519
|
+
},
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def _agent_report_code_for_audit(audit_finding: WorkflowDeviationFinding) -> AgentRunReportFindingCode:
|
|
524
|
+
return TRANSCRIPT_AUDIT_FINDING_CODE_MAP.get(
|
|
525
|
+
audit_finding.code,
|
|
526
|
+
AgentRunReportFindingCode.WORKFLOW_CONTRACT_CONTRADICTION,
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def _agent_report_severity_for_audit(audit_finding: WorkflowDeviationFinding) -> AgentRunReportSeverity:
|
|
531
|
+
if audit_finding.severity == "blocking_candidate":
|
|
532
|
+
return "critical"
|
|
533
|
+
return "high"
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def _json_object(value: object) -> JsonObject:
|
|
537
|
+
return JsonObjectAdapter.validate_python(value)
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
class _AgentReportFieldModel(BaseModel):
|
|
541
|
+
model_config = ConfigDict(extra="forbid", validate_assignment=True)
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
class _RuntimeCpuSample(_AgentReportFieldModel):
|
|
545
|
+
model_config = ConfigDict(extra="ignore", populate_by_name=True, validate_assignment=True)
|
|
546
|
+
|
|
547
|
+
schema_id: StrictStr = Field(default="", alias="schema")
|
|
548
|
+
elapsed_seconds: float = Field(default=0.0, ge=0)
|
|
549
|
+
total_cpu_percent: float = Field(default=0.0, ge=0)
|
|
550
|
+
max_cpu_percent: float = Field(default=0.0, ge=0)
|
|
551
|
+
process_count: StrictInt = Field(default=0, ge=0)
|
|
552
|
+
max_cpu_command: StrictStr = ""
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
class _SpecialistRuntimeBatchItem(_AgentReportFieldModel):
|
|
556
|
+
model_config = ConfigDict(extra="ignore", validate_assignment=True)
|
|
557
|
+
|
|
558
|
+
work_id: StrictStr = ""
|
|
559
|
+
agent: StrictStr = ""
|
|
560
|
+
model_policy: StrictStr = ""
|
|
561
|
+
required_model_tier: StrictStr = ""
|
|
562
|
+
preferred_model_tier: StrictStr = ""
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
class _WorkflowTruthPayloadFields(_AgentReportFieldModel):
|
|
566
|
+
workflow: StrictStr = ""
|
|
567
|
+
run_id: StrictStr = ""
|
|
568
|
+
status: StrictStr = ""
|
|
569
|
+
phase: StrictStr = ""
|
|
570
|
+
blocked_reason: StrictStr = ""
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
class _ProgressTruthFields(_AgentReportFieldModel):
|
|
574
|
+
workflow: StrictStr = ""
|
|
575
|
+
run_id: StrictStr = ""
|
|
576
|
+
status: StrictStr = ""
|
|
577
|
+
phase: StrictStr = ""
|
|
578
|
+
can_continue_now: StrictBool | None = None
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
class _PublicProgressFields(_AgentReportFieldModel):
|
|
582
|
+
user_action: StrictStr = ""
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
class _PublicReceiptFields(_AgentReportFieldModel):
|
|
586
|
+
next_action: StrictStr = ""
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
class _HumanDecisionPacketFields(_AgentReportFieldModel):
|
|
590
|
+
"""Human-decision summary fields used only after payload shape validation."""
|
|
591
|
+
|
|
592
|
+
model_config = ConfigDict(extra="ignore", validate_assignment=True)
|
|
593
|
+
|
|
594
|
+
why_this_needs_you: StrictStr = ""
|
|
595
|
+
question: StrictStr = ""
|
|
596
|
+
evidence_summary: StrictStr = ""
|
|
597
|
+
type: StrictStr = ""
|
|
598
|
+
kind: StrictStr = ""
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
class _AgentDirectiveCapabilities(_AgentReportFieldModel):
|
|
602
|
+
model_config = ConfigDict(extra="ignore", populate_by_name=True, validate_assignment=True)
|
|
603
|
+
|
|
604
|
+
continue_: StrictBool = Field(False, alias="continue")
|
|
605
|
+
final_report: StrictBool = False
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
class _AgentDirectiveEffect(_AgentReportFieldModel):
|
|
609
|
+
model_config = ConfigDict(extra="ignore", validate_assignment=True)
|
|
610
|
+
|
|
611
|
+
kind: StrictStr = ""
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
class _AgentDirectiveControl(_AgentReportFieldModel):
|
|
615
|
+
model_config = ConfigDict(extra="ignore", validate_assignment=True)
|
|
616
|
+
|
|
617
|
+
status: StrictStr = ""
|
|
618
|
+
state: StrictStr = ""
|
|
619
|
+
capabilities: _AgentDirectiveCapabilities = Field(default_factory=_AgentDirectiveCapabilities)
|
|
620
|
+
effects: list[_AgentDirectiveEffect] = Field(default_factory=list)
|
|
621
|
+
blockers: list[StrictStr] = Field(default_factory=list)
|
|
622
|
+
resume: StrictStr = ""
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
class _ReceiptTruthFields(_AgentReportFieldModel):
|
|
626
|
+
workflow: StrictStr = ""
|
|
627
|
+
run_id: StrictStr = ""
|
|
628
|
+
status: StrictStr = ""
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
class _StateMachineTruthFields(_AgentReportFieldModel):
|
|
632
|
+
current_state: StrictStr = ""
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
class _AgentReportRelatedRecoveryFields(_AgentReportFieldModel):
|
|
636
|
+
status: StrictStr = ""
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
class _AgentReportApplyFields(_AgentReportFieldModel):
|
|
640
|
+
requested_apply: StrictBool | None = None
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
class _AgentReportOrchestrationPlanFields(_AgentReportFieldModel):
|
|
644
|
+
status: StrictStr = ""
|
|
645
|
+
automatic: StrictBool | None = None
|
|
646
|
+
executable_now: StrictBool | None = None
|
|
647
|
+
human_decision_required: StrictBool | None = None
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
class _AgentReportVersionControlSafetyFields(_AgentReportFieldModel):
|
|
651
|
+
mutation_without_guard: StrictBool | None = None
|
|
652
|
+
resource_guard_active: StrictBool | None = None
|
|
653
|
+
run_finish_seen: StrictBool | None = None
|
|
654
|
+
sync_status: StrictStr = ""
|
|
655
|
+
agent_instruction: StrictStr = ""
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
class _ProcessChatsTerminalFields(_AgentReportFieldModel):
|
|
659
|
+
workflow: StrictStr = ""
|
|
660
|
+
status: StrictStr = ""
|
|
661
|
+
phase: StrictStr = ""
|
|
662
|
+
process_chats_terminal_state: StrictStr = ""
|
|
663
|
+
process_chats_backlog_state: StrictStr = ""
|
|
664
|
+
item_count: StrictInt | None = None
|
|
665
|
+
total_available_count: StrictInt | None = None
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
class _AgentReportHeadlessExportFields(_AgentReportFieldModel):
|
|
669
|
+
embedded_count: StrictInt | None = None
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
class _AgentReportReportContractFields(_AgentReportFieldModel):
|
|
673
|
+
must_include: list[StrictStr] = Field(default_factory=list)
|
|
674
|
+
after_each_batch: StrictBool = False
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
class _SpecialistRuntimeBatch(_AgentReportFieldModel):
|
|
678
|
+
"""Executable specialist batch projected from agent_directive effects."""
|
|
679
|
+
|
|
680
|
+
phase: StrictStr = ""
|
|
681
|
+
current_batch_items: list[_SpecialistRuntimeBatchItem] = Field(default_factory=list)
|
|
682
|
+
report_contract: _AgentReportReportContractFields = Field(default_factory=_AgentReportReportContractFields)
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
class _TranscriptEventFields(_AgentReportFieldModel):
|
|
686
|
+
model_config = ConfigDict(extra="ignore", populate_by_name=True, validate_assignment=True)
|
|
687
|
+
|
|
688
|
+
event_type: StrictStr = Field(default="", alias="type")
|
|
689
|
+
tool_name: StrictStr = ""
|
|
690
|
+
role: StrictStr = ""
|
|
691
|
+
status: StrictStr = ""
|
|
692
|
+
output: StrictStr = ""
|
|
693
|
+
parameters: JsonObject = Field(default_factory=dict)
|
|
694
|
+
content: object = ""
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
class _TranscriptTextParameters(_AgentReportFieldModel):
|
|
698
|
+
"""Text parameters that can influence transcript-derived decisions."""
|
|
699
|
+
|
|
700
|
+
command: StrictStr = ""
|
|
701
|
+
role: StrictStr = ""
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
class _OpenCodeSpecialistTaskMetadataFields(_AgentReportFieldModel):
|
|
705
|
+
model_config = ConfigDict(extra="ignore", populate_by_name=True, validate_assignment=True)
|
|
706
|
+
|
|
707
|
+
schema_id: StrictStr = Field(default="", alias="schema")
|
|
708
|
+
work_id: StrictStr = ""
|
|
709
|
+
task_id: StrictStr = ""
|
|
710
|
+
provider_id: StrictStr = ""
|
|
711
|
+
model_id: StrictStr = ""
|
|
712
|
+
model_tier: StrictStr = ""
|
|
713
|
+
tool_sequence: list[StrictStr] = Field(default_factory=list)
|
|
714
|
+
prompt_contract: StrictStr = ""
|
|
715
|
+
raw_content_embedded: StrictBool | None = None
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
class _SpecialistTaskRunnerResultFields(_AgentReportFieldModel):
|
|
719
|
+
model_config = ConfigDict(extra="ignore", populate_by_name=True, validate_assignment=True)
|
|
720
|
+
|
|
721
|
+
schema_id: StrictStr = Field(default="", alias="schema")
|
|
722
|
+
status: StrictStr = ""
|
|
723
|
+
work_id: StrictStr = ""
|
|
724
|
+
next_apply_step: JsonObject | None = None
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
class _BlockedWorkflowToolResult(_AgentReportFieldModel):
|
|
728
|
+
tool_name: StrictStr = ""
|
|
729
|
+
status: StrictStr = ""
|
|
730
|
+
phase: StrictStr = ""
|
|
731
|
+
blocked_reason: StrictStr
|
|
732
|
+
work_id: StrictStr = ""
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
def _field_payload(source: JsonObject, field_names: tuple[str, ...]) -> JsonObject:
|
|
736
|
+
payload: JsonObject = {}
|
|
737
|
+
for field_name in field_names:
|
|
738
|
+
if field_name in source:
|
|
739
|
+
payload[field_name] = source[field_name]
|
|
740
|
+
return payload
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
def _object_field(source: JsonObject, field_name: str) -> JsonObject:
|
|
744
|
+
if field_name not in source or source[field_name] is None:
|
|
745
|
+
return {}
|
|
746
|
+
value = source[field_name]
|
|
747
|
+
if not isinstance(value, dict):
|
|
748
|
+
if field_name == "diagnostic_context":
|
|
749
|
+
return {}
|
|
750
|
+
raise ValueError(f"{field_name} must be an object")
|
|
751
|
+
return _json_object(value)
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
def _list_field(source: JsonObject, field_name: str) -> list[object]:
|
|
755
|
+
value = source.get(field_name)
|
|
756
|
+
if not isinstance(value, list):
|
|
757
|
+
return []
|
|
758
|
+
return list(value)
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
def _is_fsm_first_payload(payload: JsonObject) -> bool:
|
|
762
|
+
return _optional_text(payload, "schema") in FSM_FIRST_SCHEMAS
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def _payload_with_safe_diagnostic_context(payload: JsonObject) -> JsonObject:
|
|
766
|
+
if isinstance(payload.get("diagnostic_context"), dict):
|
|
767
|
+
return payload
|
|
768
|
+
return {**payload, "diagnostic_context": {}}
|
|
769
|
+
|
|
770
|
+
|
|
771
|
+
def _agent_directive_from_payload(payload: JsonObject) -> tuple[AgentDirective | None, str]:
|
|
772
|
+
if "agent_directive" not in payload:
|
|
773
|
+
return None, "missing"
|
|
774
|
+
directive_payload = payload["agent_directive"]
|
|
775
|
+
if not isinstance(directive_payload, dict):
|
|
776
|
+
return None, "agent_directive_not_object"
|
|
777
|
+
try:
|
|
778
|
+
return AgentDirective.model_validate(directive_payload), ""
|
|
779
|
+
except ValidationError as exc:
|
|
780
|
+
first_error = exc.errors()[0] if exc.errors() else {}
|
|
781
|
+
location = ".".join(str(part) for part in first_error.get("loc", ())) or "agent_directive"
|
|
782
|
+
message = str(first_error.get("msg") or "invalid")
|
|
783
|
+
return None, f"{location}: {message}"
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
def _agent_directive_contract_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
|
|
787
|
+
if not _is_fsm_first_payload(payload):
|
|
788
|
+
return []
|
|
789
|
+
directive, directive_error = _agent_directive_from_payload(payload)
|
|
790
|
+
if directive is not None:
|
|
791
|
+
return []
|
|
792
|
+
return [_agent_directive_invalid_finding(payload, directive_error)]
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
def _agent_directive_control(payload: JsonObject) -> _AgentDirectiveControl:
|
|
796
|
+
directive, _error = _agent_directive_from_payload(payload)
|
|
797
|
+
if directive is None:
|
|
798
|
+
return _AgentDirectiveControl()
|
|
799
|
+
control = directive.control.to_payload()
|
|
800
|
+
return _AgentDirectiveControl.model_validate(
|
|
801
|
+
_field_payload(
|
|
802
|
+
control,
|
|
803
|
+
("status", "state", "capabilities", "effects", "blockers", "resume"),
|
|
804
|
+
)
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
def _specialist_runtime_batch_from_agent_directive(payload: JsonObject) -> _SpecialistRuntimeBatch:
|
|
809
|
+
"""Read executable specialist work only from the root agent directive."""
|
|
810
|
+
|
|
811
|
+
directive, _directive_error = _agent_directive_from_payload(payload)
|
|
812
|
+
if directive is None:
|
|
813
|
+
return _SpecialistRuntimeBatch()
|
|
814
|
+
batch_items: list[_SpecialistRuntimeBatchItem] = []
|
|
815
|
+
report_contract = _AgentReportReportContractFields()
|
|
816
|
+
for effect in directive.control.effects:
|
|
817
|
+
effect_payload = effect.payload
|
|
818
|
+
if effect.kind != WorkflowEffectKind.CALL_SPECIALIST_MODEL:
|
|
819
|
+
continue
|
|
820
|
+
if not _is_style_rewrite_specialist_effect(effect, effect_payload):
|
|
821
|
+
continue
|
|
822
|
+
batch_items.extend(
|
|
823
|
+
_SpecialistRuntimeBatchItem.model_validate(item)
|
|
824
|
+
for item in _list_field(effect_payload, "current_batch_items")
|
|
825
|
+
if isinstance(item, dict)
|
|
826
|
+
)
|
|
827
|
+
candidate_report_contract = _object_field(effect_payload, "report_contract")
|
|
828
|
+
if candidate_report_contract:
|
|
829
|
+
report_contract = _AgentReportReportContractFields.model_validate(
|
|
830
|
+
_field_payload(candidate_report_contract, ("must_include", "after_each_batch"))
|
|
831
|
+
)
|
|
832
|
+
return _SpecialistRuntimeBatch(
|
|
833
|
+
phase="style_rewrite" if batch_items else "",
|
|
834
|
+
current_batch_items=batch_items,
|
|
835
|
+
report_contract=report_contract,
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def _is_style_rewrite_specialist_effect(effect: AgentEffect, effect_payload: JsonObject) -> bool:
|
|
840
|
+
"""Identify fix-wiki style-rewrite work without consulting diagnostics."""
|
|
841
|
+
|
|
842
|
+
return (
|
|
843
|
+
str(effect_payload.get("kind") or "") == "style_rewrite"
|
|
844
|
+
or effect.target == "med-knowledge-architect"
|
|
845
|
+
or bool(_list_field(effect_payload, "current_batch_items"))
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
def _legacy_specialist_route_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
|
|
850
|
+
"""Reject old diagnostic-only specialist batches as a contract violation."""
|
|
851
|
+
|
|
852
|
+
diagnostic = _object_field(payload, "diagnostic_context")
|
|
853
|
+
legacy_plan = _object_field(diagnostic, "orchestration" + "_plan")
|
|
854
|
+
if not _list_field(legacy_plan, "current_batch_items"):
|
|
855
|
+
return []
|
|
856
|
+
batch = _specialist_runtime_batch_from_agent_directive(payload)
|
|
857
|
+
if batch.current_batch_items:
|
|
858
|
+
return []
|
|
859
|
+
return [
|
|
860
|
+
AgentRunReportFinding(
|
|
861
|
+
code=AgentRunReportFindingCode.WORKFLOW_AGENT_DIRECTIVE_INVALID,
|
|
862
|
+
severity="critical",
|
|
863
|
+
source="workflow_payload",
|
|
864
|
+
source_field="diagnostic_context legacy specialist batch",
|
|
865
|
+
expected="agent_directive.control.effects[].payload.current_batch_items",
|
|
866
|
+
actual="specialist batch exposed only as diagnostic evidence",
|
|
867
|
+
message="O payload tentou expor trabalho especialista executavel fora do agent_directive root.",
|
|
868
|
+
next_action=(
|
|
869
|
+
"Reemitir o payload FSM com agent_directive.control.effects[] e manter diagnostic_context "
|
|
870
|
+
"apenas como evidencia."
|
|
871
|
+
),
|
|
872
|
+
)
|
|
873
|
+
]
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
class _WorkflowTruth:
|
|
877
|
+
def __init__(
|
|
878
|
+
self,
|
|
879
|
+
*,
|
|
880
|
+
workflow: str,
|
|
881
|
+
run_id: str,
|
|
882
|
+
workflow_status: str,
|
|
883
|
+
workflow_phase: str,
|
|
884
|
+
progress_status: str,
|
|
885
|
+
receipt_status: str,
|
|
886
|
+
blocked_reason: str,
|
|
887
|
+
) -> None:
|
|
888
|
+
self.workflow = workflow
|
|
889
|
+
self.run_id = run_id
|
|
890
|
+
self.workflow_status = workflow_status
|
|
891
|
+
self.workflow_phase = workflow_phase
|
|
892
|
+
self.progress_status = progress_status
|
|
893
|
+
self.receipt_status = receipt_status
|
|
894
|
+
self.blocked_reason = blocked_reason
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
def _workflow_truth(payload: JsonObject) -> _WorkflowTruth:
|
|
898
|
+
fsm_first = _is_fsm_first_payload(payload)
|
|
899
|
+
root = _WorkflowTruthPayloadFields.model_validate(
|
|
900
|
+
_field_payload(payload, ("workflow", "run_id", "status", "phase", "blocked_reason"))
|
|
901
|
+
)
|
|
902
|
+
progress = _ProgressTruthFields.model_validate(
|
|
903
|
+
_field_payload(_object_field(payload, "progress_view_model"), ("workflow", "run_id", "status", "phase", "can_continue_now"))
|
|
904
|
+
)
|
|
905
|
+
receipt = _ReceiptTruthFields.model_validate(
|
|
906
|
+
_field_payload(_object_field(payload, "receipt"), ("workflow", "run_id", "status"))
|
|
907
|
+
)
|
|
908
|
+
snapshot = _StateMachineTruthFields.model_validate(
|
|
909
|
+
_field_payload(_object_field(payload, "state_machine_snapshot"), ("current_state",))
|
|
910
|
+
)
|
|
911
|
+
if fsm_first:
|
|
912
|
+
return _WorkflowTruth(
|
|
913
|
+
workflow=progress.workflow or receipt.workflow or root.workflow,
|
|
914
|
+
run_id=progress.run_id or receipt.run_id or root.run_id,
|
|
915
|
+
workflow_status=progress.status or receipt.status,
|
|
916
|
+
workflow_phase=progress.phase or snapshot.current_state,
|
|
917
|
+
progress_status=progress.status,
|
|
918
|
+
receipt_status=receipt.status,
|
|
919
|
+
blocked_reason="",
|
|
920
|
+
)
|
|
921
|
+
return _WorkflowTruth(
|
|
922
|
+
workflow=root.workflow or progress.workflow or receipt.workflow,
|
|
923
|
+
run_id=root.run_id or progress.run_id or receipt.run_id,
|
|
924
|
+
workflow_status=root.status or progress.status or receipt.status,
|
|
925
|
+
workflow_phase=root.phase or progress.phase or snapshot.current_state,
|
|
926
|
+
progress_status=progress.status or root.status,
|
|
927
|
+
receipt_status=receipt.status or root.status,
|
|
928
|
+
blocked_reason=root.blocked_reason,
|
|
929
|
+
)
|
|
930
|
+
|
|
931
|
+
|
|
932
|
+
def _final_report_text(*, final_report_text: str | None, transcript: object | None) -> str:
|
|
933
|
+
if final_report_text is not None:
|
|
934
|
+
return _strip_controlled_experiment_json_lines(final_report_text)
|
|
935
|
+
if transcript is None:
|
|
936
|
+
return ""
|
|
937
|
+
responses: list[str] = []
|
|
938
|
+
delta_parts: list[str] = []
|
|
939
|
+
|
|
940
|
+
def flush_delta_parts() -> None:
|
|
941
|
+
if not delta_parts:
|
|
942
|
+
return
|
|
943
|
+
responses.append("".join(delta_parts))
|
|
944
|
+
delta_parts.clear()
|
|
945
|
+
|
|
946
|
+
def append_response(text: str, *, delta: bool = False) -> None:
|
|
947
|
+
if not text.strip():
|
|
948
|
+
return
|
|
949
|
+
if delta:
|
|
950
|
+
delta_parts.append(text)
|
|
951
|
+
return
|
|
952
|
+
flush_delta_parts()
|
|
953
|
+
responses.append(text)
|
|
954
|
+
|
|
955
|
+
def visit(value: object) -> None:
|
|
956
|
+
if isinstance(value, list):
|
|
957
|
+
for item in value:
|
|
958
|
+
visit(item)
|
|
959
|
+
return
|
|
960
|
+
if not isinstance(value, dict):
|
|
961
|
+
return
|
|
962
|
+
event_type = str(value.get("type") or "").upper()
|
|
963
|
+
if event_type in {"TOOL_USE", "TOOL_RESULT"}:
|
|
964
|
+
flush_delta_parts()
|
|
965
|
+
if event_type == "PLANNER_RESPONSE":
|
|
966
|
+
for field in ("content", "text", "message", "response"):
|
|
967
|
+
raw = value.get(field)
|
|
968
|
+
if isinstance(raw, str) and raw.strip():
|
|
969
|
+
append_response(raw)
|
|
970
|
+
break
|
|
971
|
+
if event_type in {"GEMINI", "MESSAGE"}:
|
|
972
|
+
role = str(value.get("role") or "").lower()
|
|
973
|
+
if event_type == "GEMINI" or role in {"assistant", "model"}:
|
|
974
|
+
text = _transcript_message_text(value.get("content"))
|
|
975
|
+
if text.strip():
|
|
976
|
+
append_response(text, delta=bool(value.get("delta")))
|
|
977
|
+
for child in _transcript_child_containers(value):
|
|
978
|
+
visit(child)
|
|
979
|
+
|
|
980
|
+
visit(transcript)
|
|
981
|
+
flush_delta_parts()
|
|
982
|
+
return _strip_controlled_experiment_json_lines("\n\n".join(responses))
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
def _strip_controlled_experiment_json_lines(text: str) -> str:
|
|
986
|
+
lines: list[str] = []
|
|
987
|
+
for line in text.splitlines():
|
|
988
|
+
stripped = line.strip()
|
|
989
|
+
if stripped.startswith("{") and (
|
|
990
|
+
"medical-notes-workbench.controlled-experiment-cpu-summary.v1" in stripped
|
|
991
|
+
or "medical-notes-workbench.controlled-experiment-output-truncated.v1" in stripped
|
|
992
|
+
):
|
|
993
|
+
continue
|
|
994
|
+
lines.append(line)
|
|
995
|
+
return "\n".join(lines)
|
|
996
|
+
|
|
997
|
+
|
|
998
|
+
def _transcript_message_text(value: object) -> str:
|
|
999
|
+
if isinstance(value, str):
|
|
1000
|
+
return value
|
|
1001
|
+
if isinstance(value, list):
|
|
1002
|
+
parts = [_transcript_message_text(item) for item in value]
|
|
1003
|
+
return "\n".join(part for part in parts if part.strip())
|
|
1004
|
+
if isinstance(value, dict):
|
|
1005
|
+
for field in ("text", "content", "message"):
|
|
1006
|
+
text = _transcript_message_text(value.get(field))
|
|
1007
|
+
if text.strip():
|
|
1008
|
+
return text
|
|
1009
|
+
parts = value.get("parts")
|
|
1010
|
+
if isinstance(parts, list):
|
|
1011
|
+
return _transcript_message_text(parts)
|
|
1012
|
+
return ""
|
|
1013
|
+
|
|
1014
|
+
|
|
1015
|
+
def _final_report_incomplete_findings(final_text: str, truth: _WorkflowTruth) -> list[AgentRunReportFinding]:
|
|
1016
|
+
if not _final_report_looks_like_progress_only(final_text):
|
|
1017
|
+
return []
|
|
1018
|
+
status = truth.workflow_status or truth.progress_status or truth.receipt_status or "unknown"
|
|
1019
|
+
return [
|
|
1020
|
+
AgentRunReportFinding(
|
|
1021
|
+
code=AgentRunReportFindingCode.FINAL_REPORT_INCOMPLETE,
|
|
1022
|
+
severity="high",
|
|
1023
|
+
source="final_report",
|
|
1024
|
+
source_field="final_report_text",
|
|
1025
|
+
expected="relatorio final com status publico, resultado primario, mutacoes, pendencias e erros",
|
|
1026
|
+
actual="progress_only",
|
|
1027
|
+
message="A resposta capturada parece mensagem intermediaria, nao relatorio final do workflow.",
|
|
1028
|
+
next_action=(
|
|
1029
|
+
"Tratar a rodada como incompleta e exigir fechamento que diga se a Wiki foi corrigida, "
|
|
1030
|
+
"o que mudou, o estado do grafo/Related Notes e qualquer bloqueio ou erro de runtime."
|
|
1031
|
+
),
|
|
1032
|
+
evidence={"workflow_status": status},
|
|
1033
|
+
)
|
|
1034
|
+
]
|
|
1035
|
+
|
|
1036
|
+
|
|
1037
|
+
def _final_report_permission_findings(payload: JsonObject, final_text: str) -> list[AgentRunReportFinding]:
|
|
1038
|
+
if not final_text.strip():
|
|
1039
|
+
return []
|
|
1040
|
+
directive, _directive_error = _agent_directive_from_payload(payload)
|
|
1041
|
+
if directive is None:
|
|
1042
|
+
return []
|
|
1043
|
+
control = directive.control
|
|
1044
|
+
if control.capabilities.final_report:
|
|
1045
|
+
return []
|
|
1046
|
+
return [
|
|
1047
|
+
AgentRunReportFinding(
|
|
1048
|
+
code=AgentRunReportFindingCode.FINAL_REPORT_NOT_ALLOWED,
|
|
1049
|
+
severity="high",
|
|
1050
|
+
source="workflow_payload",
|
|
1051
|
+
source_field="agent_directive.control.capabilities.final_report",
|
|
1052
|
+
expected=f"status={control.status} final_report=false",
|
|
1053
|
+
actual="final_report_present",
|
|
1054
|
+
message="A diretiva oficial ainda não autoriza relatório final para este estado do workflow.",
|
|
1055
|
+
next_action=(
|
|
1056
|
+
"Continuar pela rota oficial ou reportar o bloqueio real antes de emitir uma resposta final."
|
|
1057
|
+
),
|
|
1058
|
+
evidence={"directive_status": control.status, "directive_state": control.state},
|
|
1059
|
+
)
|
|
1060
|
+
]
|
|
1061
|
+
|
|
1062
|
+
|
|
1063
|
+
def _agent_directive_invalid_finding(payload: JsonObject, directive_error: str) -> AgentRunReportFinding:
|
|
1064
|
+
progress = _ProgressTruthFields.model_validate(
|
|
1065
|
+
_field_payload(_object_field(payload, "progress_view_model"), ("status", "can_continue_now"))
|
|
1066
|
+
)
|
|
1067
|
+
return AgentRunReportFinding(
|
|
1068
|
+
code=AgentRunReportFindingCode.WORKFLOW_AGENT_DIRECTIVE_INVALID,
|
|
1069
|
+
severity="high",
|
|
1070
|
+
source="workflow_payload",
|
|
1071
|
+
source_field="agent_directive.control",
|
|
1072
|
+
expected="agent_directive valido com control tipado para payload FSM-first",
|
|
1073
|
+
actual=directive_error or "invalid",
|
|
1074
|
+
message="Payload FSM-first nao trouxe agent_directive.control valido no root.",
|
|
1075
|
+
next_action=(
|
|
1076
|
+
"Corrigir o produtor FSM para emitir agent_directive antes de validar ou aceitar relatorio final."
|
|
1077
|
+
),
|
|
1078
|
+
evidence={
|
|
1079
|
+
"schema": _optional_text(payload, "schema"),
|
|
1080
|
+
"progress_status": progress.status,
|
|
1081
|
+
"can_continue_now": progress.can_continue_now,
|
|
1082
|
+
},
|
|
1083
|
+
)
|
|
1084
|
+
|
|
1085
|
+
|
|
1086
|
+
def _final_report_looks_like_progress_only(final_text: str) -> bool:
|
|
1087
|
+
if len(final_text.strip()) > 600:
|
|
1088
|
+
return False
|
|
1089
|
+
folded = _fold_text(final_text)
|
|
1090
|
+
substance_markers = (
|
|
1091
|
+
"status:",
|
|
1092
|
+
"receipt status",
|
|
1093
|
+
"fixou a wiki",
|
|
1094
|
+
"wiki ficou",
|
|
1095
|
+
"nao fixou",
|
|
1096
|
+
"nao foi fixada",
|
|
1097
|
+
"mutacao",
|
|
1098
|
+
"arquivos",
|
|
1099
|
+
"grafo",
|
|
1100
|
+
"related notes",
|
|
1101
|
+
"notas relacionadas",
|
|
1102
|
+
"bloque",
|
|
1103
|
+
"pendente",
|
|
1104
|
+
"parcial",
|
|
1105
|
+
"cota",
|
|
1106
|
+
"quota",
|
|
1107
|
+
"erro",
|
|
1108
|
+
"falhou",
|
|
1109
|
+
)
|
|
1110
|
+
if _folded_contains_any(folded, substance_markers):
|
|
1111
|
+
return False
|
|
1112
|
+
lines = [line.strip() for line in final_text.splitlines() if line.strip()]
|
|
1113
|
+
if not lines:
|
|
1114
|
+
return False
|
|
1115
|
+
progress_markers = (
|
|
1116
|
+
"i have started",
|
|
1117
|
+
"i started",
|
|
1118
|
+
"started the",
|
|
1119
|
+
"waiting for",
|
|
1120
|
+
"waiting for completion",
|
|
1121
|
+
"waiting for the execution",
|
|
1122
|
+
"aguardando resultado",
|
|
1123
|
+
"aguardando o resultado",
|
|
1124
|
+
"aguardando conclusao",
|
|
1125
|
+
"aguardando a conclusao",
|
|
1126
|
+
"em andamento",
|
|
1127
|
+
"vou aguardar",
|
|
1128
|
+
)
|
|
1129
|
+
return all(_folded_contains_any(_fold_text(line), progress_markers) for line in lines)
|
|
1130
|
+
|
|
1131
|
+
|
|
1132
|
+
def _status_mismatch_findings(
|
|
1133
|
+
final_text: str,
|
|
1134
|
+
truth: _WorkflowTruth,
|
|
1135
|
+
primary_objective: PrimaryObjectiveSummary | None,
|
|
1136
|
+
) -> list[AgentRunReportFinding]:
|
|
1137
|
+
findings: list[AgentRunReportFinding] = []
|
|
1138
|
+
reported = _reported_status_fields(final_text)
|
|
1139
|
+
receipt_status = reported.get("receipt.status")
|
|
1140
|
+
if receipt_status and truth.receipt_status and receipt_status != truth.receipt_status:
|
|
1141
|
+
findings.append(
|
|
1142
|
+
AgentRunReportFinding(
|
|
1143
|
+
code=AgentRunReportFindingCode.RECEIPT_STATUS_MISMATCH,
|
|
1144
|
+
severity="high",
|
|
1145
|
+
source="final_report",
|
|
1146
|
+
source_field="receipt.status",
|
|
1147
|
+
expected=truth.receipt_status,
|
|
1148
|
+
actual=receipt_status,
|
|
1149
|
+
message=(
|
|
1150
|
+
"O relatório final declarou um receipt.status diferente do recibo oficial do workflow."
|
|
1151
|
+
),
|
|
1152
|
+
next_action="Reescrever o relatório usando receipt.status do payload oficial.",
|
|
1153
|
+
)
|
|
1154
|
+
)
|
|
1155
|
+
progress_status = reported.get("progress_view_model.status")
|
|
1156
|
+
if progress_status and truth.progress_status and progress_status != truth.progress_status:
|
|
1157
|
+
findings.append(
|
|
1158
|
+
AgentRunReportFinding(
|
|
1159
|
+
code=AgentRunReportFindingCode.PROGRESS_STATUS_MISMATCH,
|
|
1160
|
+
severity="high",
|
|
1161
|
+
source="final_report",
|
|
1162
|
+
source_field="progress_view_model.status",
|
|
1163
|
+
expected=truth.progress_status,
|
|
1164
|
+
actual=progress_status,
|
|
1165
|
+
message=(
|
|
1166
|
+
"O relatório final declarou um progress_view_model.status diferente do payload oficial."
|
|
1167
|
+
),
|
|
1168
|
+
next_action="Reescrever o relatório usando progress_view_model.status como fonte canônica.",
|
|
1169
|
+
)
|
|
1170
|
+
)
|
|
1171
|
+
root_status = reported.get("status")
|
|
1172
|
+
expected_root_statuses = _acceptable_public_statuses(truth, primary_objective)
|
|
1173
|
+
if root_status and expected_root_statuses and root_status not in expected_root_statuses:
|
|
1174
|
+
findings.append(
|
|
1175
|
+
AgentRunReportFinding(
|
|
1176
|
+
code=AgentRunReportFindingCode.PROGRESS_STATUS_MISMATCH,
|
|
1177
|
+
severity="medium",
|
|
1178
|
+
source="final_report",
|
|
1179
|
+
source_field="status",
|
|
1180
|
+
expected=", ".join(sorted(expected_root_statuses)),
|
|
1181
|
+
actual=root_status,
|
|
1182
|
+
message="O relatório final declarou um status diferente do status canônico do workflow.",
|
|
1183
|
+
next_action="Corrigir o status público do relatório final antes de concluir a rodada.",
|
|
1184
|
+
)
|
|
1185
|
+
)
|
|
1186
|
+
return findings
|
|
1187
|
+
|
|
1188
|
+
|
|
1189
|
+
def _acceptable_public_statuses(
|
|
1190
|
+
truth: _WorkflowTruth,
|
|
1191
|
+
primary_objective: PrimaryObjectiveSummary | None,
|
|
1192
|
+
) -> set[str]:
|
|
1193
|
+
"""Statuses a public final report may name without contradicting the FSM."""
|
|
1194
|
+
statuses: set[str] = set()
|
|
1195
|
+
if truth.workflow_status:
|
|
1196
|
+
statuses.add(truth.workflow_status)
|
|
1197
|
+
if isinstance(primary_objective, ProcessChatsPrimaryObjectiveSummary):
|
|
1198
|
+
statuses.add(primary_objective.process_status)
|
|
1199
|
+
if isinstance(primary_objective, WorkflowPrimaryObjectiveSummary):
|
|
1200
|
+
statuses.add(primary_objective.status)
|
|
1201
|
+
return statuses
|
|
1202
|
+
|
|
1203
|
+
|
|
1204
|
+
def _reported_status_fields(final_text: str) -> dict[str, str]:
|
|
1205
|
+
reported: dict[str, str] = {}
|
|
1206
|
+
patterns: tuple[tuple[str, re.Pattern[str]], ...] = (
|
|
1207
|
+
(
|
|
1208
|
+
"receipt.status",
|
|
1209
|
+
re.compile(r"(?im)^\s*(?:[-*]\s*)?(?:receipt\s+status|receipt\.status)\s*[:=]\s*`?(?P<value>[a-z0-9_]+)`?"),
|
|
1210
|
+
),
|
|
1211
|
+
(
|
|
1212
|
+
"progress_view_model.status",
|
|
1213
|
+
re.compile(
|
|
1214
|
+
r"(?im)^\s*(?:[-*]\s*)?(?:progress_view_model\.status|progress\s+status)\s*[:=]\s*`?(?P<value>[a-z0-9_]+)`?"
|
|
1215
|
+
),
|
|
1216
|
+
),
|
|
1217
|
+
(
|
|
1218
|
+
"status",
|
|
1219
|
+
re.compile(r"(?im)^\s*(?:[-*]\s*)?(?:status)\s*[:=]\s*`?(?P<value>[a-z0-9_]+)`?"),
|
|
1220
|
+
),
|
|
1221
|
+
)
|
|
1222
|
+
for field, pattern in patterns:
|
|
1223
|
+
match = pattern.search(final_text)
|
|
1224
|
+
if not match:
|
|
1225
|
+
continue
|
|
1226
|
+
value = _normalize_status(match.group("value"))
|
|
1227
|
+
if value and value in KNOWN_WORKFLOW_STATUSES:
|
|
1228
|
+
reported[field] = value
|
|
1229
|
+
return reported
|
|
1230
|
+
|
|
1231
|
+
|
|
1232
|
+
def _normalize_status(value: str) -> str:
|
|
1233
|
+
match = STATUS_VALUE_RE.search(value.strip().lower())
|
|
1234
|
+
return match.group(0) if match else ""
|
|
1235
|
+
|
|
1236
|
+
|
|
1237
|
+
def _success_claim_findings(final_text: str, truth: _WorkflowTruth) -> list[AgentRunReportFinding]:
|
|
1238
|
+
status = truth.workflow_status or truth.progress_status or truth.receipt_status
|
|
1239
|
+
if status not in NON_SUCCESS_STATUSES:
|
|
1240
|
+
return []
|
|
1241
|
+
if not _has_positive_success_claim(final_text):
|
|
1242
|
+
return []
|
|
1243
|
+
return [
|
|
1244
|
+
AgentRunReportFinding(
|
|
1245
|
+
code=AgentRunReportFindingCode.SUCCESS_CLAIM_MISMATCH,
|
|
1246
|
+
severity="medium",
|
|
1247
|
+
source="final_report",
|
|
1248
|
+
source_field="final_report_text",
|
|
1249
|
+
expected=status,
|
|
1250
|
+
actual="success_claim",
|
|
1251
|
+
message="O relatório final usou linguagem de sucesso para um workflow que não está concluído.",
|
|
1252
|
+
next_action="Trocar linguagem de sucesso por progresso parcial, bloqueio ou espera externa conforme o payload oficial.",
|
|
1253
|
+
)
|
|
1254
|
+
]
|
|
1255
|
+
|
|
1256
|
+
|
|
1257
|
+
def _public_report_pending_effect_success_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
|
|
1258
|
+
"""Ensure the human-facing report cannot outrank pending FSM effects."""
|
|
1259
|
+
|
|
1260
|
+
control = _agent_directive_control(payload)
|
|
1261
|
+
if control.status != "waiting_agent" or control.capabilities.continue_ is not True:
|
|
1262
|
+
return []
|
|
1263
|
+
if not control.effects and not control.resume.strip():
|
|
1264
|
+
return []
|
|
1265
|
+
reports = _object_field(payload, "reports")
|
|
1266
|
+
findings: list[AgentRunReportFinding] = []
|
|
1267
|
+
public_sources = [("reports.summary", _optional_text(reports, "summary"))]
|
|
1268
|
+
if "public_report" in reports:
|
|
1269
|
+
public_report = WorkflowPublicReport.model_validate(reports["public_report"])
|
|
1270
|
+
public_sources.append(("reports.public_report.headline", public_report.headline))
|
|
1271
|
+
public_sources.extend(
|
|
1272
|
+
(f"reports.public_report.lines[{index}]", line) for index, line in enumerate(public_report.lines)
|
|
1273
|
+
)
|
|
1274
|
+
for source_field, text in public_sources:
|
|
1275
|
+
if not _has_positive_success_claim(text):
|
|
1276
|
+
continue
|
|
1277
|
+
findings.append(
|
|
1278
|
+
AgentRunReportFinding(
|
|
1279
|
+
code=AgentRunReportFindingCode.SUCCESS_CLAIM_MISMATCH,
|
|
1280
|
+
severity="medium",
|
|
1281
|
+
source="workflow_payload",
|
|
1282
|
+
source_field=source_field,
|
|
1283
|
+
expected="waiting_agent",
|
|
1284
|
+
actual="success_claim",
|
|
1285
|
+
message="O relatório público declarou sucesso enquanto a FSM ainda exige continuação por agente.",
|
|
1286
|
+
next_action=(
|
|
1287
|
+
"Projetar reports.* a partir da transição FSM e manter linguagem de progresso parcial "
|
|
1288
|
+
"até agent_directive.control.capabilities.final_report=true."
|
|
1289
|
+
),
|
|
1290
|
+
)
|
|
1291
|
+
)
|
|
1292
|
+
return findings
|
|
1293
|
+
|
|
1294
|
+
|
|
1295
|
+
def _unsupported_blocker_claim_findings(final_text: str, truth: _WorkflowTruth) -> list[AgentRunReportFinding]:
|
|
1296
|
+
status = truth.workflow_status or truth.progress_status or truth.receipt_status
|
|
1297
|
+
if status in NON_SUCCESS_STATUSES or truth.blocked_reason:
|
|
1298
|
+
return []
|
|
1299
|
+
for match in UNSUPPORTED_BLOCKER_CLAIM_RE.finditer(final_text):
|
|
1300
|
+
sentence = _fold_text(_sentence_containing_match(final_text, match.start(), match.end()))
|
|
1301
|
+
if "sem bloque" in sentence or "nao bloque" in sentence or "não bloque" in sentence:
|
|
1302
|
+
continue
|
|
1303
|
+
return [
|
|
1304
|
+
AgentRunReportFinding(
|
|
1305
|
+
code=AgentRunReportFindingCode.WORKFLOW_CONTRACT_CONTRADICTION,
|
|
1306
|
+
severity="high",
|
|
1307
|
+
source="final_report",
|
|
1308
|
+
source_field="final_report_text",
|
|
1309
|
+
expected=status or "workflow sem blocked_reason",
|
|
1310
|
+
actual=sentence[:180],
|
|
1311
|
+
message="O relatório final declarou bloqueio/duplicidade que não existe no payload oficial.",
|
|
1312
|
+
next_action=(
|
|
1313
|
+
"Usar somente status, blocked_reason e decision oficiais para declarar bloqueio; "
|
|
1314
|
+
"se o agente suspeitar duplicidade, registrar como suspeita e seguir a próxima ação oficial."
|
|
1315
|
+
),
|
|
1316
|
+
)
|
|
1317
|
+
]
|
|
1318
|
+
return []
|
|
1319
|
+
|
|
1320
|
+
|
|
1321
|
+
def _has_positive_success_claim(final_text: str) -> bool:
|
|
1322
|
+
for match in SUCCESS_CLAIM_RE.finditer(final_text):
|
|
1323
|
+
prefix = final_text[max(0, match.start() - 32) : match.start()]
|
|
1324
|
+
if NEGATED_SUCCESS_PREFIX_RE.search(prefix):
|
|
1325
|
+
continue
|
|
1326
|
+
sentence = _fold_text(_sentence_containing_match(final_text, match.start(), match.end()))
|
|
1327
|
+
if NEGATED_SUCCESS_SENTENCE_RE.search(sentence):
|
|
1328
|
+
continue
|
|
1329
|
+
if _is_partial_success_sentence(sentence):
|
|
1330
|
+
continue
|
|
1331
|
+
return True
|
|
1332
|
+
return False
|
|
1333
|
+
|
|
1334
|
+
|
|
1335
|
+
def _is_partial_success_sentence(sentence: str) -> bool:
|
|
1336
|
+
if "sem pendenc" in sentence or "sem blocker" in sentence or "sem bloque" in sentence:
|
|
1337
|
+
return False
|
|
1338
|
+
if not any(marker in sentence for marker in ("publicacao", "publicou", "publicad")):
|
|
1339
|
+
return False
|
|
1340
|
+
return any(marker in sentence for marker in ("pendenc", "pendente", "blocker", "bloque", "parcial"))
|
|
1341
|
+
|
|
1342
|
+
|
|
1343
|
+
def _is_component_success_sentence(sentence: str) -> bool:
|
|
1344
|
+
if any(marker in sentence for marker in ("wiki", "workflow", "fluxo")):
|
|
1345
|
+
return False
|
|
1346
|
+
return any(marker in sentence for marker in SCOPED_SUCCESS_CONTEXT_MARKERS)
|
|
1347
|
+
|
|
1348
|
+
|
|
1349
|
+
def _is_scoped_success_with_global_blocker(sentence: str) -> bool:
|
|
1350
|
+
return (
|
|
1351
|
+
any(marker in sentence for marker in SCOPED_SUCCESS_CONTEXT_MARKERS)
|
|
1352
|
+
and SCOPED_SUCCESS_WITH_GLOBAL_BLOCKER_RE.search(sentence) is not None
|
|
1353
|
+
)
|
|
1354
|
+
|
|
1355
|
+
|
|
1356
|
+
def _sentence_containing_match(text: str, start: int, end: int) -> str:
|
|
1357
|
+
boundaries = "\n.!?"
|
|
1358
|
+
sentence_start = max(text.rfind(boundary, 0, start) for boundary in boundaries) + 1
|
|
1359
|
+
sentence_end_candidates = [
|
|
1360
|
+
index
|
|
1361
|
+
for boundary in boundaries
|
|
1362
|
+
if (index := text.find(boundary, end)) != -1
|
|
1363
|
+
]
|
|
1364
|
+
sentence_end = min(sentence_end_candidates) if sentence_end_candidates else len(text)
|
|
1365
|
+
return text[sentence_start:sentence_end]
|
|
1366
|
+
|
|
1367
|
+
|
|
1368
|
+
def _omitted_status_findings(final_text: str, truth: _WorkflowTruth) -> list[AgentRunReportFinding]:
|
|
1369
|
+
status = truth.workflow_status or truth.progress_status or truth.receipt_status
|
|
1370
|
+
if status not in NON_SUCCESS_STATUSES:
|
|
1371
|
+
return []
|
|
1372
|
+
if status in final_text.lower() or _mentions_non_success_status_publicly(final_text, status):
|
|
1373
|
+
return []
|
|
1374
|
+
return [
|
|
1375
|
+
AgentRunReportFinding(
|
|
1376
|
+
code=AgentRunReportFindingCode.WORKFLOW_STATUS_OMITTED,
|
|
1377
|
+
severity="medium",
|
|
1378
|
+
source="final_report",
|
|
1379
|
+
source_field="progress_view_model.status",
|
|
1380
|
+
expected=status,
|
|
1381
|
+
actual="omitted",
|
|
1382
|
+
message="O relatório final não deixou claro que o workflow ainda não está concluído.",
|
|
1383
|
+
next_action=(
|
|
1384
|
+
"Explicar em linguagem pública que o workflow ficou parcial, bloqueado ou aguardando "
|
|
1385
|
+
"continuação; o identificador técnico é opcional."
|
|
1386
|
+
),
|
|
1387
|
+
)
|
|
1388
|
+
]
|
|
1389
|
+
|
|
1390
|
+
|
|
1391
|
+
def _mentions_non_success_status_publicly(final_text: str, status: str) -> bool:
|
|
1392
|
+
markers = NON_SUCCESS_HUMAN_STATUS_MARKERS.get(status, ())
|
|
1393
|
+
if not markers:
|
|
1394
|
+
return False
|
|
1395
|
+
folded = _fold_text(final_text)
|
|
1396
|
+
return _folded_contains_any(folded, markers)
|
|
1397
|
+
|
|
1398
|
+
|
|
1399
|
+
def _workflow_primary_objective_summary(
|
|
1400
|
+
payload: JsonObject,
|
|
1401
|
+
) -> PrimaryObjectiveSummary | None:
|
|
1402
|
+
return (
|
|
1403
|
+
fix_wiki_primary_objective_summary(payload)
|
|
1404
|
+
or process_chats_primary_objective_summary(payload)
|
|
1405
|
+
or _generic_primary_objective_summary(payload)
|
|
1406
|
+
)
|
|
1407
|
+
|
|
1408
|
+
|
|
1409
|
+
def _generic_primary_objective_summary(payload: JsonObject) -> WorkflowPrimaryObjectiveSummary | None:
|
|
1410
|
+
reports = _object_field(payload, "reports")
|
|
1411
|
+
details = _object_field(reports, "details")
|
|
1412
|
+
if "primary_objective_summary" not in details:
|
|
1413
|
+
return None
|
|
1414
|
+
summary = details["primary_objective_summary"]
|
|
1415
|
+
if not isinstance(summary, dict):
|
|
1416
|
+
raise ValueError("reports.details.primary_objective_summary must be an object")
|
|
1417
|
+
return WorkflowPrimaryObjectiveSummary.model_validate(summary)
|
|
1418
|
+
|
|
1419
|
+
|
|
1420
|
+
def _primary_objective_completed(
|
|
1421
|
+
objective: PrimaryObjectiveSummary | None,
|
|
1422
|
+
) -> bool:
|
|
1423
|
+
if objective is None:
|
|
1424
|
+
return False
|
|
1425
|
+
if isinstance(objective, FixWikiPrimaryObjectiveSummary):
|
|
1426
|
+
return objective.wiki_fixed == "yes"
|
|
1427
|
+
if isinstance(objective, WorkflowPrimaryObjectiveSummary):
|
|
1428
|
+
return objective.completed
|
|
1429
|
+
return objective.process_status in {
|
|
1430
|
+
"no_pending",
|
|
1431
|
+
"preview_ready",
|
|
1432
|
+
"ready_to_publish",
|
|
1433
|
+
"published",
|
|
1434
|
+
"completed_with_link_blockers",
|
|
1435
|
+
"completed",
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
|
|
1439
|
+
def _legitimate_stop_reason(
|
|
1440
|
+
payload: JsonObject,
|
|
1441
|
+
objective: PrimaryObjectiveSummary | None,
|
|
1442
|
+
) -> str:
|
|
1443
|
+
progress = _object_field(payload, "progress_view_model")
|
|
1444
|
+
status = _optional_text(progress, "status") or _optional_text(payload, "status")
|
|
1445
|
+
if status == "waiting_external" and _payload_has_external_wait_evidence(payload):
|
|
1446
|
+
return "waiting_external"
|
|
1447
|
+
if status == "waiting_human" and _human_decision_packet(payload) is not None:
|
|
1448
|
+
return "waiting_human"
|
|
1449
|
+
if isinstance(objective, FixWikiPrimaryObjectiveSummary) and objective.wiki_fixed == "waiting_external":
|
|
1450
|
+
return "waiting_external"
|
|
1451
|
+
if isinstance(objective, WorkflowPrimaryObjectiveSummary):
|
|
1452
|
+
if objective.status == "waiting_external" or "waiting_external" in objective.status:
|
|
1453
|
+
return "waiting_external"
|
|
1454
|
+
if objective.status == "waiting_human" or "waiting_human" in objective.status:
|
|
1455
|
+
return "waiting_human"
|
|
1456
|
+
return ""
|
|
1457
|
+
|
|
1458
|
+
|
|
1459
|
+
def _payload_has_external_wait_evidence(payload: JsonObject) -> bool:
|
|
1460
|
+
folded = _fold_text(json.dumps(payload, ensure_ascii=False, sort_keys=True))
|
|
1461
|
+
return any(
|
|
1462
|
+
marker in folded
|
|
1463
|
+
for marker in (
|
|
1464
|
+
"quota",
|
|
1465
|
+
"cota",
|
|
1466
|
+
"capacity",
|
|
1467
|
+
"capacidade",
|
|
1468
|
+
"waiting_external",
|
|
1469
|
+
"external_wait",
|
|
1470
|
+
)
|
|
1471
|
+
)
|
|
1472
|
+
|
|
1473
|
+
|
|
1474
|
+
def _public_report_view_model(
|
|
1475
|
+
payload: JsonObject,
|
|
1476
|
+
objective: PrimaryObjectiveSummary | None,
|
|
1477
|
+
) -> WorkflowPublicReportViewModel | None:
|
|
1478
|
+
if objective is None:
|
|
1479
|
+
return None
|
|
1480
|
+
if isinstance(objective, FixWikiPrimaryObjectiveSummary):
|
|
1481
|
+
return _fix_wiki_public_report_view_model(payload, objective)
|
|
1482
|
+
if isinstance(objective, ProcessChatsPrimaryObjectiveSummary):
|
|
1483
|
+
return _process_chats_public_report_view_model(payload, objective)
|
|
1484
|
+
return _generic_public_report_view_model(payload, objective)
|
|
1485
|
+
|
|
1486
|
+
|
|
1487
|
+
def _fix_wiki_public_report_view_model(
|
|
1488
|
+
payload: JsonObject,
|
|
1489
|
+
objective: FixWikiPrimaryObjectiveSummary,
|
|
1490
|
+
) -> WorkflowPublicReportViewModel:
|
|
1491
|
+
mutation_state = "changed" if objective.mutation_count > 0 or objective.written_count > 0 else "unchanged"
|
|
1492
|
+
human_reason = _human_decision_reason(payload)
|
|
1493
|
+
return WorkflowPublicReportViewModel(
|
|
1494
|
+
workflow="/mednotes:fix-wiki",
|
|
1495
|
+
run_id=str(payload.get("run_id") or ""),
|
|
1496
|
+
objective_answer=_fix_wiki_public_objective_answer(objective.wiki_fixed),
|
|
1497
|
+
headline=objective.wiki_summary,
|
|
1498
|
+
mutation_state=mutation_state,
|
|
1499
|
+
mutation_summary=objective.mutation_summary,
|
|
1500
|
+
remaining_work_summary=_join_public_parts(objective.graph_summary, objective.related_notes_summary),
|
|
1501
|
+
next_step_summary=_public_next_step(payload, fallback=objective.related_notes_summary),
|
|
1502
|
+
user_attention_required=bool(human_reason),
|
|
1503
|
+
human_reason=human_reason,
|
|
1504
|
+
internal_terms_present=False,
|
|
1505
|
+
)
|
|
1506
|
+
|
|
1507
|
+
|
|
1508
|
+
def _process_chats_public_report_view_model(
|
|
1509
|
+
payload: JsonObject,
|
|
1510
|
+
objective: ProcessChatsPrimaryObjectiveSummary,
|
|
1511
|
+
) -> WorkflowPublicReportViewModel:
|
|
1512
|
+
mutation_state = "changed" if objective.notes_status == "published" and objective.note_count > 0 else "unchanged"
|
|
1513
|
+
human_reason = _human_decision_reason(payload)
|
|
1514
|
+
return WorkflowPublicReportViewModel(
|
|
1515
|
+
workflow="/mednotes:process-chats",
|
|
1516
|
+
run_id=str(payload.get("run_id") or ""),
|
|
1517
|
+
objective_answer=_process_chats_public_objective_answer(objective.process_status),
|
|
1518
|
+
headline=objective.process_summary,
|
|
1519
|
+
mutation_state=mutation_state,
|
|
1520
|
+
mutation_summary=objective.wiki_write_summary,
|
|
1521
|
+
remaining_work_summary=_join_public_parts(objective.raw_summary, objective.coverage_summary, objective.linker_summary),
|
|
1522
|
+
next_step_summary=_public_next_step(payload, fallback=objective.linker_summary),
|
|
1523
|
+
user_attention_required=bool(human_reason),
|
|
1524
|
+
human_reason=human_reason,
|
|
1525
|
+
internal_terms_present=False,
|
|
1526
|
+
)
|
|
1527
|
+
|
|
1528
|
+
|
|
1529
|
+
def _generic_public_report_view_model(
|
|
1530
|
+
payload: JsonObject,
|
|
1531
|
+
objective: WorkflowPrimaryObjectiveSummary,
|
|
1532
|
+
) -> WorkflowPublicReportViewModel:
|
|
1533
|
+
human_reason = _human_decision_reason(payload)
|
|
1534
|
+
return WorkflowPublicReportViewModel(
|
|
1535
|
+
workflow=objective.workflow,
|
|
1536
|
+
run_id=objective.run_id,
|
|
1537
|
+
objective_answer=_generic_public_objective_answer(objective),
|
|
1538
|
+
headline=objective.objective,
|
|
1539
|
+
mutation_state=objective.mutation_state,
|
|
1540
|
+
mutation_summary=objective.mutation_summary,
|
|
1541
|
+
remaining_work_summary=objective.remaining_work_summary,
|
|
1542
|
+
next_step_summary=_public_next_step(payload, fallback=objective.next_step_summary),
|
|
1543
|
+
user_attention_required=bool(human_reason),
|
|
1544
|
+
human_reason=human_reason,
|
|
1545
|
+
internal_terms_present=False,
|
|
1546
|
+
)
|
|
1547
|
+
|
|
1548
|
+
|
|
1549
|
+
def _fix_wiki_public_objective_answer(value: str) -> WorkflowPublicObjectiveAnswer:
|
|
1550
|
+
match value:
|
|
1551
|
+
case "yes":
|
|
1552
|
+
return "yes"
|
|
1553
|
+
case "waiting_agent":
|
|
1554
|
+
return "waiting_agent"
|
|
1555
|
+
case "waiting_external":
|
|
1556
|
+
return "waiting_external"
|
|
1557
|
+
case "failed":
|
|
1558
|
+
return "failed"
|
|
1559
|
+
case "no":
|
|
1560
|
+
return "no"
|
|
1561
|
+
case _:
|
|
1562
|
+
return "partial"
|
|
1563
|
+
|
|
1564
|
+
|
|
1565
|
+
def _process_chats_public_objective_answer(value: str) -> WorkflowPublicObjectiveAnswer:
|
|
1566
|
+
match value:
|
|
1567
|
+
case "published" | "completed" | "completed_with_link_blockers" | "no_pending":
|
|
1568
|
+
return "yes"
|
|
1569
|
+
case "blocked":
|
|
1570
|
+
return "no"
|
|
1571
|
+
case "failed":
|
|
1572
|
+
return "failed"
|
|
1573
|
+
case _:
|
|
1574
|
+
return "partial"
|
|
1575
|
+
|
|
1576
|
+
|
|
1577
|
+
def _generic_public_objective_answer(
|
|
1578
|
+
objective: WorkflowPrimaryObjectiveSummary,
|
|
1579
|
+
) -> WorkflowPublicObjectiveAnswer:
|
|
1580
|
+
if objective.completed:
|
|
1581
|
+
return "yes"
|
|
1582
|
+
if objective.status == "failed" or "failed" in objective.status:
|
|
1583
|
+
return "failed"
|
|
1584
|
+
if objective.status == "waiting_external" or "waiting_external" in objective.status:
|
|
1585
|
+
return "waiting_external"
|
|
1586
|
+
if objective.status == "waiting_human" or "waiting_human" in objective.status:
|
|
1587
|
+
return "waiting_human"
|
|
1588
|
+
if objective.status == "blocked" or "blocked" in objective.status:
|
|
1589
|
+
return "no"
|
|
1590
|
+
if objective.status == "waiting_agent" or "waiting_agent" in objective.status:
|
|
1591
|
+
return "waiting_agent"
|
|
1592
|
+
return "partial"
|
|
1593
|
+
|
|
1594
|
+
|
|
1595
|
+
def _human_decision_reason(payload: JsonObject) -> str:
|
|
1596
|
+
packet = _human_decision_packet(payload)
|
|
1597
|
+
if packet is None:
|
|
1598
|
+
return ""
|
|
1599
|
+
for value in (packet.why_this_needs_you, packet.question, packet.evidence_summary, packet.type, packet.kind):
|
|
1600
|
+
if value.strip():
|
|
1601
|
+
return value.strip()
|
|
1602
|
+
return "Decisao humana pendente."
|
|
1603
|
+
|
|
1604
|
+
|
|
1605
|
+
def _human_decision_packet(payload: JsonObject) -> _HumanDecisionPacketFields | None:
|
|
1606
|
+
packet = _object_field(payload, "human_decision_packet")
|
|
1607
|
+
if not packet:
|
|
1608
|
+
return None
|
|
1609
|
+
return _HumanDecisionPacketFields.model_validate(
|
|
1610
|
+
_field_payload(packet, ("why_this_needs_you", "question", "evidence_summary", "type", "kind"))
|
|
1611
|
+
)
|
|
1612
|
+
|
|
1613
|
+
|
|
1614
|
+
def _public_next_step(payload: JsonObject, *, fallback: str) -> str:
|
|
1615
|
+
progress = _PublicProgressFields.model_validate(
|
|
1616
|
+
_field_payload(_object_field(payload, "progress_view_model"), ("user_action",))
|
|
1617
|
+
)
|
|
1618
|
+
user_action = progress.user_action.strip()
|
|
1619
|
+
if user_action:
|
|
1620
|
+
return user_action
|
|
1621
|
+
receipt = _PublicReceiptFields.model_validate(_field_payload(_object_field(payload, "receipt"), ("next_action",)))
|
|
1622
|
+
next_action = receipt.next_action.strip()
|
|
1623
|
+
if next_action:
|
|
1624
|
+
return next_action
|
|
1625
|
+
return fallback
|
|
1626
|
+
|
|
1627
|
+
|
|
1628
|
+
def _join_public_parts(*parts: str) -> str:
|
|
1629
|
+
cleaned = [part.strip() for part in parts if part.strip()]
|
|
1630
|
+
if not cleaned:
|
|
1631
|
+
return "Sem pendencias descritas."
|
|
1632
|
+
return " ".join(cleaned)
|
|
1633
|
+
|
|
1634
|
+
|
|
1635
|
+
def _primary_objective_payload_findings(
|
|
1636
|
+
payload: JsonObject,
|
|
1637
|
+
objective: PrimaryObjectiveSummary,
|
|
1638
|
+
) -> list[AgentRunReportFinding]:
|
|
1639
|
+
if not isinstance(objective, ProcessChatsPrimaryObjectiveSummary):
|
|
1640
|
+
return []
|
|
1641
|
+
if objective.process_status != "unknown":
|
|
1642
|
+
return []
|
|
1643
|
+
terminal = _ProcessChatsTerminalFields.model_validate(
|
|
1644
|
+
_field_payload(payload, ("workflow", "phase", "status", "item_count"))
|
|
1645
|
+
)
|
|
1646
|
+
workflow = terminal.workflow
|
|
1647
|
+
phase = terminal.phase
|
|
1648
|
+
status = terminal.status
|
|
1649
|
+
item_count = terminal.item_count or 0
|
|
1650
|
+
if workflow != "/mednotes:process-chats":
|
|
1651
|
+
return []
|
|
1652
|
+
if phase not in {"triage", "architect", "publish_dry_run", "publish_apply"} and not item_count:
|
|
1653
|
+
return []
|
|
1654
|
+
return [
|
|
1655
|
+
AgentRunReportFinding(
|
|
1656
|
+
code=AgentRunReportFindingCode.PROCESS_CHATS_PRIMARY_OBJECTIVE_UNRESOLVED,
|
|
1657
|
+
severity="high",
|
|
1658
|
+
source="workflow_payload",
|
|
1659
|
+
source_field="workflow/phase/status",
|
|
1660
|
+
expected="process-chats deve terminar em preview/publicação/linker ou blocker explícito antes do relatório final",
|
|
1661
|
+
actual=f"phase={phase or 'missing'} status={status or 'missing'} item_count={item_count}",
|
|
1662
|
+
message=(
|
|
1663
|
+
"O payload oficial ainda não prova que process-chats cumpriu o objetivo primário."
|
|
1664
|
+
),
|
|
1665
|
+
next_action=(
|
|
1666
|
+
"Continuar a rota oficial de process-chats até publicar/preparar preview com coverage, "
|
|
1667
|
+
"rodar linker ou emitir blocker real antes de concluir."
|
|
1668
|
+
),
|
|
1669
|
+
)
|
|
1670
|
+
]
|
|
1671
|
+
|
|
1672
|
+
|
|
1673
|
+
def _missing_fsm_primary_objective_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
|
|
1674
|
+
schema = _optional_text(payload, "schema")
|
|
1675
|
+
workflow = _optional_text(payload, "workflow")
|
|
1676
|
+
if schema not in FSM_FIRST_SCHEMAS:
|
|
1677
|
+
if workflow == "/mednotes:process-chats":
|
|
1678
|
+
return [
|
|
1679
|
+
AgentRunReportFinding(
|
|
1680
|
+
code=AgentRunReportFindingCode.PROCESS_CHATS_PRIMARY_OBJECTIVE_UNRESOLVED,
|
|
1681
|
+
severity="high",
|
|
1682
|
+
source="workflow_payload",
|
|
1683
|
+
source_field="reports.details.primary_objective_summary",
|
|
1684
|
+
expected="process-chats-fsm-result.v1 com reports.details.primary_objective_summary tipado",
|
|
1685
|
+
actual=schema or "schema ausente",
|
|
1686
|
+
message="O payload não trouxe o resumo primário canônico emitido pela FSM de process-chats.",
|
|
1687
|
+
next_action=(
|
|
1688
|
+
"Reexecutar /mednotes:process-chats pela rota FSM-first antes de validar o relatório final."
|
|
1689
|
+
),
|
|
1690
|
+
)
|
|
1691
|
+
]
|
|
1692
|
+
return []
|
|
1693
|
+
return [
|
|
1694
|
+
AgentRunReportFinding(
|
|
1695
|
+
code=AgentRunReportFindingCode.PRIMARY_OBJECTIVE_OMITTED,
|
|
1696
|
+
severity="high",
|
|
1697
|
+
source="workflow_payload",
|
|
1698
|
+
source_field="reports.details.primary_objective_summary",
|
|
1699
|
+
expected="payload FSM-first com reports.details.primary_objective_summary tipado",
|
|
1700
|
+
actual=schema or "schema ausente",
|
|
1701
|
+
message=f"O payload de {workflow or 'workflow FSM-first'} não trouxe o resumo primário canônico emitido pela FSM.",
|
|
1702
|
+
next_action="Corrigir a projeção FSM para emitir primary_objective_summary antes de validar relatório final.",
|
|
1703
|
+
)
|
|
1704
|
+
]
|
|
1705
|
+
|
|
1706
|
+
|
|
1707
|
+
def _safe_positive_int(value: object) -> int:
|
|
1708
|
+
if isinstance(value, bool) or value is None:
|
|
1709
|
+
return 0
|
|
1710
|
+
if isinstance(value, int | float):
|
|
1711
|
+
return max(0, int(value))
|
|
1712
|
+
if isinstance(value, str):
|
|
1713
|
+
try:
|
|
1714
|
+
return max(0, int(value))
|
|
1715
|
+
except ValueError:
|
|
1716
|
+
return 0
|
|
1717
|
+
return 0
|
|
1718
|
+
|
|
1719
|
+
|
|
1720
|
+
def _public_output_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
|
|
1721
|
+
findings: list[AgentRunReportFinding] = []
|
|
1722
|
+
for source_field, text in _public_text_sources(payload):
|
|
1723
|
+
lowered = text.lower()
|
|
1724
|
+
hits = [term for term in PUBLIC_OUTPUT_FORBIDDEN_TERMS if term in lowered]
|
|
1725
|
+
if not hits:
|
|
1726
|
+
continue
|
|
1727
|
+
findings.append(
|
|
1728
|
+
AgentRunReportFinding(
|
|
1729
|
+
code=AgentRunReportFindingCode.PUBLIC_OUTPUT_INTERNAL_TERM_LEAK,
|
|
1730
|
+
severity="medium",
|
|
1731
|
+
source="workflow_payload",
|
|
1732
|
+
source_field=source_field,
|
|
1733
|
+
expected="linguagem pública sem comandos internos",
|
|
1734
|
+
actual=", ".join(hits),
|
|
1735
|
+
message="O payload público do workflow expôs termos internos de automação/desenvolvimento.",
|
|
1736
|
+
next_action=(
|
|
1737
|
+
"Trocar o texto público por linguagem de usuário; deixe comandos, schemas, recibos e hashes "
|
|
1738
|
+
"apenas em JSON/logs técnicos."
|
|
1739
|
+
),
|
|
1740
|
+
evidence={"forbidden_terms": hits},
|
|
1741
|
+
)
|
|
1742
|
+
)
|
|
1743
|
+
return findings
|
|
1744
|
+
|
|
1745
|
+
|
|
1746
|
+
def _final_report_internal_term_findings(final_text: str) -> list[AgentRunReportFinding]:
|
|
1747
|
+
lowered = final_text.lower()
|
|
1748
|
+
hits = [term for term in PUBLIC_OUTPUT_FORBIDDEN_TERMS if term in lowered]
|
|
1749
|
+
if not hits:
|
|
1750
|
+
return []
|
|
1751
|
+
return [
|
|
1752
|
+
AgentRunReportFinding(
|
|
1753
|
+
code=AgentRunReportFindingCode.PUBLIC_OUTPUT_INTERNAL_TERM_LEAK,
|
|
1754
|
+
severity="medium",
|
|
1755
|
+
source="final_report",
|
|
1756
|
+
source_field="final_report_text",
|
|
1757
|
+
expected="resposta pública sem nomes de campos, recibos, hashes ou estado técnico do guard",
|
|
1758
|
+
actual=", ".join(hits),
|
|
1759
|
+
message="A resposta final do agente expôs termos internos de automação/desenvolvimento.",
|
|
1760
|
+
next_action=(
|
|
1761
|
+
"Reescrever a resposta final em linguagem de usuário; deixe nomes de campos, recibos, "
|
|
1762
|
+
"hashes e detalhes técnicos do guard apenas em logs/JSON."
|
|
1763
|
+
),
|
|
1764
|
+
evidence={"forbidden_terms": hits},
|
|
1765
|
+
)
|
|
1766
|
+
]
|
|
1767
|
+
|
|
1768
|
+
|
|
1769
|
+
def _public_text_sources(payload: JsonObject) -> list[tuple[str, str]]:
|
|
1770
|
+
sources: list[tuple[str, str]] = []
|
|
1771
|
+
progress = _object_field(payload, "progress_view_model")
|
|
1772
|
+
receipt = _object_field(payload, "receipt")
|
|
1773
|
+
decision = _object_field(payload, "decision")
|
|
1774
|
+
reports = _object_field(payload, "reports")
|
|
1775
|
+
for field, value in (
|
|
1776
|
+
("progress_view_model.message", _optional_text(progress, "message")),
|
|
1777
|
+
("progress_view_model.user_action", _optional_text(progress, "user_action")),
|
|
1778
|
+
("receipt.next_action", _optional_text(receipt, "next_action")),
|
|
1779
|
+
("decision.public_summary", _optional_text(decision, "public_summary")),
|
|
1780
|
+
("decision.next_action", _optional_text(decision, "next_action")),
|
|
1781
|
+
("reports.summary", _optional_text(reports, "summary")),
|
|
1782
|
+
):
|
|
1783
|
+
if value.strip():
|
|
1784
|
+
sources.append((field, value))
|
|
1785
|
+
if "public_report" in reports:
|
|
1786
|
+
public_report = WorkflowPublicReport.model_validate(reports["public_report"])
|
|
1787
|
+
if public_report.headline.strip():
|
|
1788
|
+
sources.append(("reports.public_report.headline", public_report.headline))
|
|
1789
|
+
for index, line in enumerate(public_report.lines):
|
|
1790
|
+
if line.strip():
|
|
1791
|
+
sources.append((f"reports.public_report.lines[{index}]", line))
|
|
1792
|
+
return sources
|
|
1793
|
+
|
|
1794
|
+
|
|
1795
|
+
def _optional_text(source: JsonObject, field_name: str) -> str:
|
|
1796
|
+
if field_name not in source or source[field_name] is None:
|
|
1797
|
+
return ""
|
|
1798
|
+
value = source[field_name]
|
|
1799
|
+
if not isinstance(value, str):
|
|
1800
|
+
raise ValueError(f"{field_name} must be text")
|
|
1801
|
+
return value
|
|
1802
|
+
|
|
1803
|
+
|
|
1804
|
+
def _stale_next_action_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
|
|
1805
|
+
progress = _object_field(payload, "progress_view_model")
|
|
1806
|
+
receipt = _object_field(payload, "receipt")
|
|
1807
|
+
decision = _object_field(payload, "decision")
|
|
1808
|
+
diagnostic = _object_field(payload, "diagnostic_context")
|
|
1809
|
+
related_state = _AgentReportRelatedRecoveryFields.model_validate(
|
|
1810
|
+
_field_payload(_object_field(diagnostic, "related_notes_recovery_state"), ("status",))
|
|
1811
|
+
)
|
|
1812
|
+
apply_context = _AgentReportApplyFields.model_validate(
|
|
1813
|
+
_field_payload(_object_field(diagnostic, "apply"), ("requested_apply",))
|
|
1814
|
+
)
|
|
1815
|
+
status = _optional_text(progress, "status") or _optional_text(receipt, "status")
|
|
1816
|
+
requested_apply = apply_context.requested_apply is True
|
|
1817
|
+
texts = [
|
|
1818
|
+
("receipt.next_action", _optional_text(receipt, "next_action")),
|
|
1819
|
+
("progress_view_model.user_action", _optional_text(progress, "user_action")),
|
|
1820
|
+
("progress_view_model.resume_action", _optional_text(progress, "resume_action")),
|
|
1821
|
+
("decision.next_action", _optional_text(decision, "next_action")),
|
|
1822
|
+
]
|
|
1823
|
+
findings: list[AgentRunReportFinding] = []
|
|
1824
|
+
for source_field, text in texts:
|
|
1825
|
+
folded = _fold_text(text)
|
|
1826
|
+
if not folded:
|
|
1827
|
+
continue
|
|
1828
|
+
reason = ""
|
|
1829
|
+
if status == "waiting_external" and re.search(r"\b(dry-run|preview|previa|diagnostico)\b", folded):
|
|
1830
|
+
reason = "waiting_external_next_action_repeats_preview"
|
|
1831
|
+
if (
|
|
1832
|
+
status == "waiting_external"
|
|
1833
|
+
and related_state.status == "waiting_for_retry"
|
|
1834
|
+
and "export" in folded
|
|
1835
|
+
and "retom" not in folded
|
|
1836
|
+
):
|
|
1837
|
+
reason = "related_notes_wait_next_action_regenerates_export"
|
|
1838
|
+
if requested_apply and status in NON_SUCCESS_STATUSES and re.search(r"\b(dry-run|preview|previa)\b", folded):
|
|
1839
|
+
reason = "apply_block_next_action_loops_to_preview"
|
|
1840
|
+
if not reason:
|
|
1841
|
+
continue
|
|
1842
|
+
findings.append(
|
|
1843
|
+
AgentRunReportFinding(
|
|
1844
|
+
code=AgentRunReportFindingCode.STALE_NEXT_ACTION,
|
|
1845
|
+
severity="high",
|
|
1846
|
+
source="workflow_payload",
|
|
1847
|
+
source_field=source_field,
|
|
1848
|
+
expected="próxima ação coerente com status/estado FSM",
|
|
1849
|
+
actual=text,
|
|
1850
|
+
message="A próxima ação pública ficou stale ou circular em relação ao estado canônico do workflow.",
|
|
1851
|
+
next_action="Gerar next_action a partir de progress_view_model/decision/receipt canônicos e revalidar o payload.",
|
|
1852
|
+
evidence={"reason": reason},
|
|
1853
|
+
)
|
|
1854
|
+
)
|
|
1855
|
+
return findings
|
|
1856
|
+
|
|
1857
|
+
|
|
1858
|
+
def _workflow_payload_consistency_findings(payload: JsonObject) -> list[AgentRunReportFinding]:
|
|
1859
|
+
progress = _ProgressTruthFields.model_validate(
|
|
1860
|
+
_field_payload(_object_field(payload, "progress_view_model"), ("status", "can_continue_now"))
|
|
1861
|
+
)
|
|
1862
|
+
if not _agent_directive_requires_waiting_agent_continuation(payload):
|
|
1863
|
+
return []
|
|
1864
|
+
if progress.status == "waiting_agent" and progress.can_continue_now is True:
|
|
1865
|
+
return []
|
|
1866
|
+
return [
|
|
1867
|
+
AgentRunReportFinding(
|
|
1868
|
+
code=AgentRunReportFindingCode.WORKFLOW_CONTRACT_CONTRADICTION,
|
|
1869
|
+
severity="high",
|
|
1870
|
+
source="workflow_payload",
|
|
1871
|
+
source_field="agent_directive.control",
|
|
1872
|
+
expected="agent_directive com effects executáveis deve projetar progress_view_model.status=waiting_agent e can_continue_now=true",
|
|
1873
|
+
actual=f"status={progress.status or 'missing'} can_continue_now={progress.can_continue_now}",
|
|
1874
|
+
message=(
|
|
1875
|
+
"O payload oficial mistura continuação assistida executável com estado que não autoriza continuar."
|
|
1876
|
+
),
|
|
1877
|
+
next_action=(
|
|
1878
|
+
"Corrigir a projeção FSM antes de confiar no relatório do agente ou repetir o experimento."
|
|
1879
|
+
),
|
|
1880
|
+
)
|
|
1881
|
+
]
|
|
1882
|
+
|
|
1883
|
+
|
|
1884
|
+
def _waiting_agent_continuation_findings(
|
|
1885
|
+
payload: JsonObject,
|
|
1886
|
+
transcript: object,
|
|
1887
|
+
final_text: str,
|
|
1888
|
+
runtime_log_text: str,
|
|
1889
|
+
) -> list[AgentRunReportFinding]:
|
|
1890
|
+
status, can_continue = _agent_continuation_status(payload)
|
|
1891
|
+
if status != "waiting_agent" or can_continue is not True:
|
|
1892
|
+
return []
|
|
1893
|
+
if not _agent_directive_requires_waiting_agent_continuation(payload):
|
|
1894
|
+
return []
|
|
1895
|
+
if _transcript_attempted_waiting_agent_continuation(
|
|
1896
|
+
transcript
|
|
1897
|
+
) or _runtime_log_attempted_waiting_agent_continuation(runtime_log_text):
|
|
1898
|
+
return []
|
|
1899
|
+
if _reported_runtime_continuation_unavailable(final_text):
|
|
1900
|
+
return []
|
|
1901
|
+
return [
|
|
1902
|
+
AgentRunReportFinding(
|
|
1903
|
+
code=AgentRunReportFindingCode.WAITING_AGENT_CONTINUATION_OMITTED,
|
|
1904
|
+
severity="high",
|
|
1905
|
+
source="transcript",
|
|
1906
|
+
source_field="progress_view_model.status",
|
|
1907
|
+
expected="agente deve continuar pelos effects do agent_directive antes do relatório final",
|
|
1908
|
+
actual="relatório final emitido sem subagente/aplicação de reescrita",
|
|
1909
|
+
message="O workflow ficou em waiting_agent com continuação automática pronta, mas o agente encerrou sem executar a continuação.",
|
|
1910
|
+
next_action="Continuar pelo agent_directive.control.effects ou reportar explicitamente a incapacidade da CLI de invocar o subagente.",
|
|
1911
|
+
)
|
|
1912
|
+
]
|
|
1913
|
+
|
|
1914
|
+
|
|
1915
|
+
def _agent_directive_requires_waiting_agent_continuation(payload: JsonObject) -> bool:
|
|
1916
|
+
control = _agent_directive_control(payload)
|
|
1917
|
+
if control.status != "waiting_agent" or control.capabilities.continue_ is not True:
|
|
1918
|
+
return False
|
|
1919
|
+
return bool(control.effects or control.resume.strip())
|
|
1920
|
+
|
|
1921
|
+
|
|
1922
|
+
def _agent_continuation_status(payload: JsonObject) -> tuple[str, bool | None]:
|
|
1923
|
+
control = _agent_directive_control(payload)
|
|
1924
|
+
if control.status:
|
|
1925
|
+
return control.status, control.capabilities.continue_
|
|
1926
|
+
progress = _ProgressTruthFields.model_validate(
|
|
1927
|
+
_field_payload(_object_field(payload, "progress_view_model"), ("status", "can_continue_now"))
|
|
1928
|
+
)
|
|
1929
|
+
return progress.status, progress.can_continue_now
|
|
1930
|
+
|
|
1931
|
+
|
|
1932
|
+
def _ready_continuation_stopped_findings(
|
|
1933
|
+
payload: JsonObject,
|
|
1934
|
+
transcript: object,
|
|
1935
|
+
final_text: str,
|
|
1936
|
+
runtime_log_text: str,
|
|
1937
|
+
) -> list[AgentRunReportFinding]:
|
|
1938
|
+
status, can_continue = _agent_continuation_status(payload)
|
|
1939
|
+
if status != "waiting_agent" or can_continue is not True:
|
|
1940
|
+
return []
|
|
1941
|
+
if not _agent_directive_requires_waiting_agent_continuation(payload):
|
|
1942
|
+
return []
|
|
1943
|
+
transcript_attempted = _transcript_attempted_waiting_agent_continuation(transcript)
|
|
1944
|
+
runtime_attempted = _runtime_log_attempted_waiting_agent_continuation(runtime_log_text)
|
|
1945
|
+
if not (transcript_attempted or runtime_attempted):
|
|
1946
|
+
return []
|
|
1947
|
+
if _reported_runtime_continuation_unavailable(final_text):
|
|
1948
|
+
return []
|
|
1949
|
+
if _transcript_reports_legitimate_specialist_stop(
|
|
1950
|
+
transcript,
|
|
1951
|
+
final_text,
|
|
1952
|
+
) or _runtime_log_reports_legitimate_specialist_stop(runtime_log_text, final_text):
|
|
1953
|
+
return []
|
|
1954
|
+
folded = _fold_text(final_text)
|
|
1955
|
+
if not any(marker in folded for marker in ("proxima acao", "próxima ação", "retomar", "restam", "restantes")):
|
|
1956
|
+
return []
|
|
1957
|
+
return [
|
|
1958
|
+
AgentRunReportFinding(
|
|
1959
|
+
code=AgentRunReportFindingCode.READY_CONTINUATION_STOPPED,
|
|
1960
|
+
severity="high",
|
|
1961
|
+
source="workflow_payload",
|
|
1962
|
+
source_field="progress_view_model.status",
|
|
1963
|
+
expected="waiting_agent/can_continue_now=true deve continuar pela rota oficial ate quota, capacidade, validacao ruim ou fila vazia",
|
|
1964
|
+
actual="relatório final encerrou a rodada com continuação executável ainda pronta",
|
|
1965
|
+
message=(
|
|
1966
|
+
"O agente começou a continuação automática, mas parou e pediu retomada mesmo com o workflow ainda executável."
|
|
1967
|
+
),
|
|
1968
|
+
next_action=(
|
|
1969
|
+
"Continuar pelo agent_directive.control.effects em vez de encerrar; se parar, reporte quota/capacidade/validação real como blocker."
|
|
1970
|
+
),
|
|
1971
|
+
)
|
|
1972
|
+
]
|
|
1973
|
+
|
|
1974
|
+
|
|
1975
|
+
def _reported_runtime_continuation_unavailable(final_text: str) -> bool:
|
|
1976
|
+
if not final_text:
|
|
1977
|
+
return False
|
|
1978
|
+
return bool(RUNTIME_CONTINUATION_UNAVAILABLE_RE.search(final_text))
|
|
1979
|
+
|
|
1980
|
+
|
|
1981
|
+
def _transcript_reports_legitimate_specialist_stop(transcript: object, final_text: str) -> bool:
|
|
1982
|
+
folded = _fold_text(final_text)
|
|
1983
|
+
if not folded:
|
|
1984
|
+
return False
|
|
1985
|
+
for event in _iter_transcript_events(transcript):
|
|
1986
|
+
if event.event_type.casefold() not in {"tool_result", "run_command"}:
|
|
1987
|
+
continue
|
|
1988
|
+
output_text = _transcript_tool_output_text(event)
|
|
1989
|
+
payload = _json_payload_from_tool_output(output_text)
|
|
1990
|
+
if not payload:
|
|
1991
|
+
if _raw_transcript_output_reports_specialist_stop(output_text, folded):
|
|
1992
|
+
return True
|
|
1993
|
+
continue
|
|
1994
|
+
schema = _optional_text(payload, "schema")
|
|
1995
|
+
if schema != "medical-notes-workbench.specialist-task-runner-result.v1":
|
|
1996
|
+
if _raw_transcript_output_reports_specialist_stop(output_text, folded):
|
|
1997
|
+
return True
|
|
1998
|
+
continue
|
|
1999
|
+
status = _optional_text(payload, "status")
|
|
2000
|
+
blocked_reason = _optional_text(payload, "blocked_reason")
|
|
2001
|
+
if status not in {"blocked", "failed", "waiting_external"}:
|
|
2002
|
+
continue
|
|
2003
|
+
if blocked_reason not in LEGITIMATE_SPECIALIST_STOP_REASONS:
|
|
2004
|
+
continue
|
|
2005
|
+
if _folded_contains_any(
|
|
2006
|
+
folded,
|
|
2007
|
+
(blocked_reason, *ROOT_CAUSE_PUBLIC_LABELS.get(blocked_reason, ())),
|
|
2008
|
+
):
|
|
2009
|
+
return True
|
|
2010
|
+
return False
|
|
2011
|
+
|
|
2012
|
+
|
|
2013
|
+
def _raw_transcript_output_reports_specialist_stop(output_text: str, folded_final_text: str) -> bool:
|
|
2014
|
+
folded_output = _fold_text(output_text)
|
|
2015
|
+
if not folded_output or not folded_final_text:
|
|
2016
|
+
return False
|
|
2017
|
+
for blocked_reason in LEGITIMATE_SPECIALIST_STOP_REASONS:
|
|
2018
|
+
if blocked_reason not in folded_output:
|
|
2019
|
+
continue
|
|
2020
|
+
if _folded_contains_any(
|
|
2021
|
+
folded_final_text,
|
|
2022
|
+
(blocked_reason, *ROOT_CAUSE_PUBLIC_LABELS.get(blocked_reason, ())),
|
|
2023
|
+
):
|
|
2024
|
+
return True
|
|
2025
|
+
return False
|
|
2026
|
+
|
|
2027
|
+
|
|
2028
|
+
def _waiting_external_continuation_attempt_findings(
|
|
2029
|
+
payload: JsonObject,
|
|
2030
|
+
transcript: object,
|
|
2031
|
+
) -> list[AgentRunReportFinding]:
|
|
2032
|
+
progress = _ProgressTruthFields.model_validate(
|
|
2033
|
+
_field_payload(_object_field(payload, "progress_view_model"), ("status", "can_continue_now"))
|
|
2034
|
+
)
|
|
2035
|
+
if progress.status != "waiting_external" and progress.can_continue_now is not False:
|
|
2036
|
+
return []
|
|
2037
|
+
if not _transcript_attempted_waiting_agent_continuation(transcript):
|
|
2038
|
+
return []
|
|
2039
|
+
return [
|
|
2040
|
+
AgentRunReportFinding(
|
|
2041
|
+
code=AgentRunReportFindingCode.WAITING_EXTERNAL_CONTINUATION_ATTEMPTED,
|
|
2042
|
+
severity="critical",
|
|
2043
|
+
source="transcript",
|
|
2044
|
+
source_field="progress_view_model.status",
|
|
2045
|
+
expected="waiting_external/can_continue_now=false deve parar sem invocar especialista ou comandos internos",
|
|
2046
|
+
actual="transcript tentou continuação especializada após o hard stop do workflow",
|
|
2047
|
+
message=(
|
|
2048
|
+
"O agente ignorou um estado não executável do workflow e tentou continuar a reescrita especializada."
|
|
2049
|
+
),
|
|
2050
|
+
next_action=(
|
|
2051
|
+
"Não aplicar outputs dessa tentativa; corrigir o relatório/agente e retomar somente quando "
|
|
2052
|
+
"um runner oficial produzir recibo tipado."
|
|
2053
|
+
),
|
|
2054
|
+
)
|
|
2055
|
+
]
|
|
2056
|
+
|
|
2057
|
+
|
|
2058
|
+
def _specialist_completed_apply_step_findings(transcript: object) -> list[AgentRunReportFinding]:
|
|
2059
|
+
pending_work_id = ""
|
|
2060
|
+
pending_apply_command = ""
|
|
2061
|
+
for event in _iter_transcript_events(transcript):
|
|
2062
|
+
event_type = event.event_type.casefold()
|
|
2063
|
+
if event_type in {"tool_result", "run_command"}:
|
|
2064
|
+
payload = _json_payload_from_tool_output(_transcript_tool_output_text(event))
|
|
2065
|
+
result = _SpecialistTaskRunnerResultFields.model_validate(
|
|
2066
|
+
_field_payload(payload, ("schema", "status", "work_id", "next_apply_step"))
|
|
2067
|
+
)
|
|
2068
|
+
if result.schema_id == "medical-notes-workbench.specialist-task-runner-result.v1" and result.status == "completed":
|
|
2069
|
+
pending_work_id = result.work_id
|
|
2070
|
+
if result.next_apply_step:
|
|
2071
|
+
pending_apply_command = _optional_text(result.next_apply_step, "command_family")
|
|
2072
|
+
if not pending_apply_command:
|
|
2073
|
+
pending_apply_command = "apply-specialist-style-rewrite"
|
|
2074
|
+
continue
|
|
2075
|
+
if event_type != "tool_use" or not pending_work_id:
|
|
2076
|
+
continue
|
|
2077
|
+
command = _event_parameter_text(event, "command")
|
|
2078
|
+
if not command:
|
|
2079
|
+
tool_name = event.tool_name.casefold()
|
|
2080
|
+
if tool_name == "read_file":
|
|
2081
|
+
return [_specialist_apply_step_omitted_finding(pending_work_id, "read_file")]
|
|
2082
|
+
continue
|
|
2083
|
+
folded = _fold_text(command)
|
|
2084
|
+
if pending_apply_command and pending_apply_command in folded and pending_work_id in command:
|
|
2085
|
+
pending_work_id = ""
|
|
2086
|
+
pending_apply_command = ""
|
|
2087
|
+
continue
|
|
2088
|
+
if _is_command_before_required_specialist_apply(folded):
|
|
2089
|
+
return [_specialist_apply_step_omitted_finding(pending_work_id, command)]
|
|
2090
|
+
return []
|
|
2091
|
+
|
|
2092
|
+
|
|
2093
|
+
def _opencode_specialist_receipt_step_findings(
|
|
2094
|
+
payload: JsonObject,
|
|
2095
|
+
transcript: object,
|
|
2096
|
+
) -> list[AgentRunReportFinding]:
|
|
2097
|
+
batch = _specialist_runtime_batch_from_agent_directive(payload)
|
|
2098
|
+
if batch.phase != "style_rewrite":
|
|
2099
|
+
return []
|
|
2100
|
+
pending_work_ids: set[str] = set()
|
|
2101
|
+
for event in _iter_transcript_events(transcript):
|
|
2102
|
+
metadata = _opencode_task_metadata_from_event(event)
|
|
2103
|
+
if metadata is not None and metadata.work_id:
|
|
2104
|
+
pending_work_ids.add(metadata.work_id)
|
|
2105
|
+
continue
|
|
2106
|
+
if event.event_type.casefold() != "tool_use":
|
|
2107
|
+
continue
|
|
2108
|
+
command = _event_parameter_text(event, "command")
|
|
2109
|
+
if not command:
|
|
2110
|
+
continue
|
|
2111
|
+
folded = _fold_text(command)
|
|
2112
|
+
finalized_work_id = _command_argument(command, "--work-id") if "finalize-opencode-specialist-task" in folded else ""
|
|
2113
|
+
if finalized_work_id and finalized_work_id in pending_work_ids:
|
|
2114
|
+
pending_work_ids.remove(finalized_work_id)
|
|
2115
|
+
continue
|
|
2116
|
+
if "apply-specialist-style-rewrite" not in folded:
|
|
2117
|
+
continue
|
|
2118
|
+
work_id = _command_argument(command, "--work-id")
|
|
2119
|
+
if pending_work_ids and (not work_id or work_id in pending_work_ids):
|
|
2120
|
+
return [_specialist_apply_step_omitted_finding(work_id or sorted(pending_work_ids)[0], command)]
|
|
2121
|
+
return []
|
|
2122
|
+
|
|
2123
|
+
|
|
2124
|
+
def _is_command_before_required_specialist_apply(folded_command: str) -> bool:
|
|
2125
|
+
return any(
|
|
2126
|
+
marker in folded_command
|
|
2127
|
+
for marker in (
|
|
2128
|
+
"fix-wiki --apply",
|
|
2129
|
+
"plan-subagents",
|
|
2130
|
+
"finalize-agy-specialist-task",
|
|
2131
|
+
"finalize-opencode-specialist-task",
|
|
2132
|
+
"finalize-style-rewrite-output",
|
|
2133
|
+
"collect-style-rewrite-outputs",
|
|
2134
|
+
"apply-style-rewrite",
|
|
2135
|
+
)
|
|
2136
|
+
)
|
|
2137
|
+
|
|
2138
|
+
|
|
2139
|
+
def _specialist_apply_step_omitted_finding(work_id: str, actual: str) -> AgentRunReportFinding:
|
|
2140
|
+
return AgentRunReportFinding(
|
|
2141
|
+
code=AgentRunReportFindingCode.SPECIALIST_APPLY_STEP_OMITTED,
|
|
2142
|
+
severity="high",
|
|
2143
|
+
source="transcript",
|
|
2144
|
+
source_field="tool_result.output.next_apply_step",
|
|
2145
|
+
expected=(
|
|
2146
|
+
"quando a etapa especialista retorna completed, o proximo comando relevante deve ser "
|
|
2147
|
+
"apply-specialist-style-rewrite para o mesmo work_id"
|
|
2148
|
+
),
|
|
2149
|
+
actual=actual,
|
|
2150
|
+
message=(
|
|
2151
|
+
"O agente recebeu uma reescrita especialista validada, mas desviou antes de aplicar o recibo oficial."
|
|
2152
|
+
),
|
|
2153
|
+
next_action=(
|
|
2154
|
+
"Usar next_apply_step.arguments imediatamente após a etapa especialista completed; "
|
|
2155
|
+
"não ler manifesto, rerodar fix-wiki, chamar plan-subagents ou lançar outro especialista antes do apply."
|
|
2156
|
+
),
|
|
2157
|
+
evidence={"work_id": work_id},
|
|
2158
|
+
)
|
|
2159
|
+
|
|
2160
|
+
|
|
2161
|
+
def _transcript_attempted_waiting_agent_continuation(transcript: object) -> bool:
|
|
2162
|
+
def visit(value: object) -> bool:
|
|
2163
|
+
if isinstance(value, list):
|
|
2164
|
+
return any(visit(item) for item in value)
|
|
2165
|
+
if not isinstance(value, dict):
|
|
2166
|
+
return False
|
|
2167
|
+
event = _transcript_event_fields(value)
|
|
2168
|
+
if event is not None and event.event_type.casefold() in {"tool_use", "tool_result"}:
|
|
2169
|
+
raw_event = repr(event.model_dump(mode="json")).casefold()
|
|
2170
|
+
if any(marker in raw_event for marker in WAITING_AGENT_CONTINUATION_MARKERS):
|
|
2171
|
+
return True
|
|
2172
|
+
event_type = str(value.get("type") or "").upper()
|
|
2173
|
+
if event_type == "RUN_COMMAND":
|
|
2174
|
+
raw = repr(value).casefold()
|
|
2175
|
+
if any(marker in raw for marker in WAITING_AGENT_CONTINUATION_MARKERS):
|
|
2176
|
+
return True
|
|
2177
|
+
if event_type == "PLANNER_RESPONSE":
|
|
2178
|
+
tool_calls = value.get("tool_calls")
|
|
2179
|
+
if isinstance(tool_calls, list):
|
|
2180
|
+
raw = repr(tool_calls).casefold()
|
|
2181
|
+
if any(marker in raw for marker in WAITING_AGENT_CONTINUATION_MARKERS):
|
|
2182
|
+
return True
|
|
2183
|
+
if _looks_like_saved_gemini_tool_call(value):
|
|
2184
|
+
raw = repr(value).casefold()
|
|
2185
|
+
if any(marker in raw for marker in WAITING_AGENT_CONTINUATION_MARKERS):
|
|
2186
|
+
return True
|
|
2187
|
+
for child in _transcript_child_containers(value):
|
|
2188
|
+
if visit(child):
|
|
2189
|
+
return True
|
|
2190
|
+
return False
|
|
2191
|
+
|
|
2192
|
+
return visit(transcript)
|
|
2193
|
+
|
|
2194
|
+
|
|
2195
|
+
def _runtime_log_attempted_waiting_agent_continuation(runtime_log_text: str) -> bool:
|
|
2196
|
+
folded = _fold_text(runtime_log_text)
|
|
2197
|
+
if not folded.strip():
|
|
2198
|
+
return False
|
|
2199
|
+
return _folded_contains_any(folded, WAITING_AGENT_CONTINUATION_MARKERS)
|
|
2200
|
+
|
|
2201
|
+
|
|
2202
|
+
def _runtime_log_reports_legitimate_specialist_stop(runtime_log_text: str, final_text: str) -> bool:
|
|
2203
|
+
folded_log = _fold_text(runtime_log_text)
|
|
2204
|
+
folded_final = _fold_text(final_text)
|
|
2205
|
+
if not folded_log.strip() or not folded_final.strip():
|
|
2206
|
+
return False
|
|
2207
|
+
if not _runtime_log_attempted_waiting_agent_continuation(runtime_log_text):
|
|
2208
|
+
return False
|
|
2209
|
+
for blocked_reason in LEGITIMATE_SPECIALIST_STOP_REASONS:
|
|
2210
|
+
if not _folded_contains_any(folded_log, (blocked_reason,)):
|
|
2211
|
+
continue
|
|
2212
|
+
if _folded_contains_any(
|
|
2213
|
+
folded_final,
|
|
2214
|
+
(blocked_reason, *ROOT_CAUSE_PUBLIC_LABELS.get(blocked_reason, ())),
|
|
2215
|
+
):
|
|
2216
|
+
return True
|
|
2217
|
+
quota_markers = (
|
|
2218
|
+
"terminalquotaerror",
|
|
2219
|
+
"quota_exhausted",
|
|
2220
|
+
"exhausted your capacity",
|
|
2221
|
+
"capacity on this model",
|
|
2222
|
+
)
|
|
2223
|
+
if _folded_contains_any(folded_log, quota_markers) and _folded_contains_any(
|
|
2224
|
+
folded_final,
|
|
2225
|
+
(
|
|
2226
|
+
"specialist_model_quota_exhausted",
|
|
2227
|
+
*ROOT_CAUSE_PUBLIC_LABELS["specialist_model_quota_exhausted"],
|
|
2228
|
+
),
|
|
2229
|
+
):
|
|
2230
|
+
return True
|
|
2231
|
+
return False
|
|
2232
|
+
|
|
2233
|
+
|
|
2234
|
+
def _transcript_used_native_specialist_invocation(transcript: object) -> bool:
|
|
2235
|
+
native_tool_names = {"invoke_agent", "invoke_subagent", "define_subagent", "send_message"}
|
|
2236
|
+
for event in _iter_transcript_events(transcript):
|
|
2237
|
+
tool_name = event.tool_name.casefold()
|
|
2238
|
+
raw_event = repr(event.model_dump(mode="json")).casefold()
|
|
2239
|
+
if tool_name in native_tool_names and (
|
|
2240
|
+
"med-knowledge-architect" in raw_event or "style_rewrite" in raw_event
|
|
2241
|
+
):
|
|
2242
|
+
return True
|
|
2243
|
+
if tool_name in {"run_command", "run_shell_command"}:
|
|
2244
|
+
command = _event_parameter_text(event, "command").casefold()
|
|
2245
|
+
if (
|
|
2246
|
+
"med-knowledge-architect" in command
|
|
2247
|
+
or "finalize-style-rewrite-output" in command
|
|
2248
|
+
or "apply-style-rewrite" in command
|
|
2249
|
+
):
|
|
2250
|
+
return True
|
|
2251
|
+
return False
|
|
2252
|
+
|
|
2253
|
+
|
|
2254
|
+
def _looks_like_saved_gemini_tool_call(value: JsonObject) -> bool:
|
|
2255
|
+
return isinstance(value.get("name"), str) and (
|
|
2256
|
+
"args" in value
|
|
2257
|
+
or "functionResponse" in value
|
|
2258
|
+
or "result" in value
|
|
2259
|
+
or "resultDisplay" in value
|
|
2260
|
+
)
|
|
2261
|
+
|
|
2262
|
+
|
|
2263
|
+
def _blocked_workflow_tool_result_findings(
|
|
2264
|
+
transcript: object,
|
|
2265
|
+
final_text: str,
|
|
2266
|
+
) -> list[AgentRunReportFinding]:
|
|
2267
|
+
blocked_results = _blocked_workflow_tool_results(transcript)
|
|
2268
|
+
if not blocked_results:
|
|
2269
|
+
return []
|
|
2270
|
+
folded = _fold_text(final_text)
|
|
2271
|
+
findings: list[AgentRunReportFinding] = []
|
|
2272
|
+
seen: set[str] = set()
|
|
2273
|
+
for result in blocked_results:
|
|
2274
|
+
key = f"{result.tool_name}:{result.phase}:{result.blocked_reason}:{result.work_id}"
|
|
2275
|
+
if key in seen:
|
|
2276
|
+
continue
|
|
2277
|
+
seen.add(key)
|
|
2278
|
+
reason_folded = _fold_text(result.blocked_reason)
|
|
2279
|
+
if folded and reason_folded.strip() and reason_folded in folded:
|
|
2280
|
+
continue
|
|
2281
|
+
if _final_report_explains_blocked_tool_result(result, folded):
|
|
2282
|
+
continue
|
|
2283
|
+
findings.append(
|
|
2284
|
+
AgentRunReportFinding(
|
|
2285
|
+
code=AgentRunReportFindingCode.BLOCKED_TOOL_RESULT_OMITTED,
|
|
2286
|
+
severity="high",
|
|
2287
|
+
source="transcript",
|
|
2288
|
+
source_field="tool_result.output.blocked_reason",
|
|
2289
|
+
expected="relatório final deve reportar qualquer payload de workflow bloqueado dentro de tool_result",
|
|
2290
|
+
actual=result.blocked_reason,
|
|
2291
|
+
message=(
|
|
2292
|
+
"O transcript contém um comando com tool status=success, mas o payload oficial dentro "
|
|
2293
|
+
"do output ficou bloqueado."
|
|
2294
|
+
),
|
|
2295
|
+
next_action=(
|
|
2296
|
+
"Reportar o blocked_reason literal, explicar o impacto no workflow e não tratar a "
|
|
2297
|
+
"tool call como sucesso do workflow."
|
|
2298
|
+
),
|
|
2299
|
+
evidence={
|
|
2300
|
+
"tool_name": result.tool_name,
|
|
2301
|
+
"phase": result.phase,
|
|
2302
|
+
"work_id": result.work_id,
|
|
2303
|
+
"status": result.status,
|
|
2304
|
+
},
|
|
2305
|
+
)
|
|
2306
|
+
)
|
|
2307
|
+
return findings
|
|
2308
|
+
|
|
2309
|
+
|
|
2310
|
+
def _final_report_explains_blocked_tool_result(result: _BlockedWorkflowToolResult, folded_text: str) -> bool:
|
|
2311
|
+
if not folded_text:
|
|
2312
|
+
return False
|
|
2313
|
+
if result.blocked_reason != "style_rewrite_still_requires_rewrite":
|
|
2314
|
+
return False
|
|
2315
|
+
has_rewrite_context = any(marker in folded_text for marker in ("reescrita", "rewrite"))
|
|
2316
|
+
has_not_applied = any(
|
|
2317
|
+
marker in folded_text
|
|
2318
|
+
for marker in (
|
|
2319
|
+
"parou antes",
|
|
2320
|
+
"nao foi aplicada",
|
|
2321
|
+
"nao foi aplicado",
|
|
2322
|
+
"não foi aplicada",
|
|
2323
|
+
"não foi aplicado",
|
|
2324
|
+
"nenhuma nota",
|
|
2325
|
+
"pendente",
|
|
2326
|
+
)
|
|
2327
|
+
)
|
|
2328
|
+
has_style_cause = any(
|
|
2329
|
+
marker in folded_text
|
|
2330
|
+
for marker in (
|
|
2331
|
+
"criterios de estilo",
|
|
2332
|
+
"critérios de estilo",
|
|
2333
|
+
"nao atendeu",
|
|
2334
|
+
"não atendeu",
|
|
2335
|
+
"excesso de callouts",
|
|
2336
|
+
"visual didatico pendente",
|
|
2337
|
+
"visual didático pendente",
|
|
2338
|
+
"nota validada",
|
|
2339
|
+
)
|
|
2340
|
+
)
|
|
2341
|
+
return has_rewrite_context and has_not_applied and has_style_cause
|
|
2342
|
+
|
|
2343
|
+
|
|
2344
|
+
def _blocked_workflow_tool_results(transcript: object) -> list[_BlockedWorkflowToolResult]:
|
|
2345
|
+
results: list[_BlockedWorkflowToolResult] = []
|
|
2346
|
+
for event in _iter_transcript_events(transcript):
|
|
2347
|
+
if event.event_type.casefold() not in {"tool_result", "run_command"}:
|
|
2348
|
+
continue
|
|
2349
|
+
payload = _workflow_payload_from_tool_output(_transcript_tool_output_text(event))
|
|
2350
|
+
if not payload:
|
|
2351
|
+
continue
|
|
2352
|
+
status = _optional_text(payload, "status")
|
|
2353
|
+
blocked_reason = _optional_text(payload, "blocked_reason")
|
|
2354
|
+
if status != "blocked" or not blocked_reason:
|
|
2355
|
+
continue
|
|
2356
|
+
results.append(
|
|
2357
|
+
_BlockedWorkflowToolResult(
|
|
2358
|
+
tool_name=event.tool_name,
|
|
2359
|
+
status=status,
|
|
2360
|
+
phase=_optional_text(payload, "phase"),
|
|
2361
|
+
blocked_reason=blocked_reason,
|
|
2362
|
+
work_id=_optional_text(payload, "work_id"),
|
|
2363
|
+
)
|
|
2364
|
+
)
|
|
2365
|
+
return results
|
|
2366
|
+
|
|
2367
|
+
|
|
2368
|
+
def _json_payload_from_tool_output(output: str) -> JsonObject:
|
|
2369
|
+
candidate = output.split("---", 1)[1] if "---" in output else output
|
|
2370
|
+
start = candidate.find("{")
|
|
2371
|
+
if start < 0:
|
|
2372
|
+
return {}
|
|
2373
|
+
decoder = json.JSONDecoder()
|
|
2374
|
+
try:
|
|
2375
|
+
parsed, _end = decoder.raw_decode(candidate[start:])
|
|
2376
|
+
except json.JSONDecodeError:
|
|
2377
|
+
return {}
|
|
2378
|
+
if not isinstance(parsed, dict):
|
|
2379
|
+
return {}
|
|
2380
|
+
return _json_object(parsed)
|
|
2381
|
+
|
|
2382
|
+
|
|
2383
|
+
def _tool_payload_contract_findings(transcript: object) -> list[AgentRunReportFinding]:
|
|
2384
|
+
findings: list[AgentRunReportFinding] = []
|
|
2385
|
+
for event in _iter_transcript_events(transcript):
|
|
2386
|
+
if event.event_type.casefold() not in {"tool_result", "run_command"}:
|
|
2387
|
+
continue
|
|
2388
|
+
payload = _json_payload_from_tool_output(_transcript_tool_output_text(event))
|
|
2389
|
+
schema = _optional_text(payload, "schema") if payload else ""
|
|
2390
|
+
if schema not in STYLE_REWRITE_APPLY_RESULT_SCHEMAS:
|
|
2391
|
+
continue
|
|
2392
|
+
try:
|
|
2393
|
+
StyleRewriteAtomicApplyResult.model_validate(payload)
|
|
2394
|
+
except ValidationError as exc:
|
|
2395
|
+
findings.append(_effect_payload_contract_invalid_finding(schema, exc))
|
|
2396
|
+
return findings
|
|
2397
|
+
|
|
2398
|
+
|
|
2399
|
+
def _effect_payload_contract_invalid_finding(schema: str, exc: ValidationError) -> AgentRunReportFinding:
|
|
2400
|
+
first_error = exc.errors()[0] if exc.errors() else {}
|
|
2401
|
+
location = ".".join(str(part) for part in first_error.get("loc", ())) or "$"
|
|
2402
|
+
message = str(first_error.get("msg") or str(exc))
|
|
2403
|
+
return AgentRunReportFinding(
|
|
2404
|
+
code=AgentRunReportFindingCode.EFFECT_PAYLOAD_CONTRACT_INVALID,
|
|
2405
|
+
severity="critical",
|
|
2406
|
+
source="transcript.tool_result.output",
|
|
2407
|
+
source_field=schema,
|
|
2408
|
+
expected="payload de efeito validado por modelo Pydantic fechado antes de dirigir relatório ou contagem",
|
|
2409
|
+
actual=f"{location}: {message}",
|
|
2410
|
+
message=f"Tool output {schema} violou o contrato tipado antes de poder dirigir o workflow.",
|
|
2411
|
+
next_action=(
|
|
2412
|
+
"Reexecutar ou corrigir o produtor do efeito para emitir payload completo; não usar esse output "
|
|
2413
|
+
"para declarar apply, contagem ou conclusão."
|
|
2414
|
+
),
|
|
2415
|
+
)
|
|
2416
|
+
|
|
2417
|
+
|
|
2418
|
+
def _transcript_tool_output_text(event: _TranscriptEventFields) -> str:
|
|
2419
|
+
if event.output:
|
|
2420
|
+
return event.output
|
|
2421
|
+
if isinstance(event.content, str):
|
|
2422
|
+
return event.content
|
|
2423
|
+
return ""
|
|
2424
|
+
|
|
2425
|
+
|
|
2426
|
+
def _workflow_payload_from_tool_output(output: str) -> JsonObject:
|
|
2427
|
+
if "blocked_reason" not in output or "blocked" not in output:
|
|
2428
|
+
return {}
|
|
2429
|
+
return _json_payload_from_tool_output(output)
|
|
2430
|
+
|
|
2431
|
+
|
|
2432
|
+
def _iter_transcript_events(transcript: object) -> list[_TranscriptEventFields]:
|
|
2433
|
+
events: list[_TranscriptEventFields] = []
|
|
2434
|
+
|
|
2435
|
+
def visit(value: object) -> None:
|
|
2436
|
+
if isinstance(value, list):
|
|
2437
|
+
for item in value:
|
|
2438
|
+
visit(item)
|
|
2439
|
+
return
|
|
2440
|
+
if not isinstance(value, dict):
|
|
2441
|
+
return
|
|
2442
|
+
event = _transcript_event_fields(value)
|
|
2443
|
+
if event is not None:
|
|
2444
|
+
events.append(event)
|
|
2445
|
+
events.extend(_planner_response_tool_call_events(value))
|
|
2446
|
+
for child in _transcript_child_containers(value):
|
|
2447
|
+
visit(child)
|
|
2448
|
+
|
|
2449
|
+
visit(transcript)
|
|
2450
|
+
return events
|
|
2451
|
+
|
|
2452
|
+
|
|
2453
|
+
def _transcript_child_containers(value: JsonObject) -> list[object]:
|
|
2454
|
+
children: list[object] = []
|
|
2455
|
+
for key in TRANSCRIPT_CHILD_CONTAINER_KEYS:
|
|
2456
|
+
child = value.get(key)
|
|
2457
|
+
if isinstance(child, (dict, list)):
|
|
2458
|
+
children.append(child)
|
|
2459
|
+
return children
|
|
2460
|
+
|
|
2461
|
+
|
|
2462
|
+
def _planner_response_tool_call_events(value: JsonObject) -> list[_TranscriptEventFields]:
|
|
2463
|
+
event_type = str(value.get("type") or "").upper()
|
|
2464
|
+
if event_type != "PLANNER_RESPONSE":
|
|
2465
|
+
return []
|
|
2466
|
+
tool_calls = value.get("tool_calls")
|
|
2467
|
+
if not isinstance(tool_calls, list):
|
|
2468
|
+
return []
|
|
2469
|
+
events: list[_TranscriptEventFields] = []
|
|
2470
|
+
for raw_tool_call in tool_calls:
|
|
2471
|
+
if not isinstance(raw_tool_call, dict):
|
|
2472
|
+
continue
|
|
2473
|
+
args = raw_tool_call.get("args")
|
|
2474
|
+
parameters: JsonObject = {}
|
|
2475
|
+
if isinstance(args, dict):
|
|
2476
|
+
command = args.get("command") or args.get("CommandLine")
|
|
2477
|
+
if isinstance(command, str) and command.strip():
|
|
2478
|
+
parameters["command"] = command
|
|
2479
|
+
tool_name = raw_tool_call.get("name")
|
|
2480
|
+
events.append(
|
|
2481
|
+
_TranscriptEventFields.model_validate(
|
|
2482
|
+
{
|
|
2483
|
+
"type": "tool_use",
|
|
2484
|
+
"tool_name": tool_name if isinstance(tool_name, str) else "",
|
|
2485
|
+
"parameters": parameters,
|
|
2486
|
+
"content": raw_tool_call,
|
|
2487
|
+
}
|
|
2488
|
+
)
|
|
2489
|
+
)
|
|
2490
|
+
return events
|
|
2491
|
+
|
|
2492
|
+
|
|
2493
|
+
def _transcript_event_fields(value: JsonObject) -> _TranscriptEventFields | None:
|
|
2494
|
+
normalized = dict(value)
|
|
2495
|
+
if not normalized.get("tool_name"):
|
|
2496
|
+
tool = normalized.get("tool")
|
|
2497
|
+
if isinstance(tool, str):
|
|
2498
|
+
normalized["tool_name"] = tool
|
|
2499
|
+
parameters = normalized.get("parameters")
|
|
2500
|
+
normalized_parameters = dict(parameters) if isinstance(parameters, dict) else {}
|
|
2501
|
+
metadata = normalized.get("metadata")
|
|
2502
|
+
if isinstance(metadata, dict) and "metadata" not in normalized_parameters:
|
|
2503
|
+
normalized_parameters["metadata"] = metadata
|
|
2504
|
+
if normalized_parameters:
|
|
2505
|
+
normalized["parameters"] = normalized_parameters
|
|
2506
|
+
try:
|
|
2507
|
+
return _TranscriptEventFields.model_validate(normalized)
|
|
2508
|
+
except ValueError:
|
|
2509
|
+
return None
|
|
2510
|
+
|
|
2511
|
+
|
|
2512
|
+
def _event_parameter_text(event: _TranscriptEventFields, field_name: str) -> str:
|
|
2513
|
+
"""Read transcript tool parameters only after the event was normalized."""
|
|
2514
|
+
|
|
2515
|
+
try:
|
|
2516
|
+
parameters = _TranscriptTextParameters.model_validate(_field_payload(event.parameters, ("command", "role")))
|
|
2517
|
+
except ValidationError:
|
|
2518
|
+
return ""
|
|
2519
|
+
match field_name:
|
|
2520
|
+
case "command":
|
|
2521
|
+
return parameters.command
|
|
2522
|
+
case "role":
|
|
2523
|
+
return parameters.role
|
|
2524
|
+
case _:
|
|
2525
|
+
raise ValueError(f"unsupported transcript text parameter: {field_name}")
|
|
2526
|
+
|
|
2527
|
+
|
|
2528
|
+
def _opencode_task_metadata_from_event(
|
|
2529
|
+
event: _TranscriptEventFields,
|
|
2530
|
+
) -> _OpenCodeSpecialistTaskMetadataFields | None:
|
|
2531
|
+
if event.tool_name.casefold() != "task":
|
|
2532
|
+
return None
|
|
2533
|
+
candidates = [
|
|
2534
|
+
event.parameters.get("metadata"),
|
|
2535
|
+
event.parameters.get("task_metadata"),
|
|
2536
|
+
event.parameters.get("taskMetadata"),
|
|
2537
|
+
]
|
|
2538
|
+
for candidate in candidates:
|
|
2539
|
+
if not isinstance(candidate, dict):
|
|
2540
|
+
continue
|
|
2541
|
+
parsed = _opencode_task_metadata_from_candidate(JsonObjectAdapter.validate_python(candidate))
|
|
2542
|
+
if parsed is not None:
|
|
2543
|
+
return parsed
|
|
2544
|
+
return None
|
|
2545
|
+
|
|
2546
|
+
|
|
2547
|
+
def _opencode_task_metadata_from_candidate(candidate: JsonObject) -> _OpenCodeSpecialistTaskMetadataFields | None:
|
|
2548
|
+
if str(candidate.get("schema") or "") == "medical-notes-workbench.opencode-specialist-task-metadata.v1":
|
|
2549
|
+
try:
|
|
2550
|
+
return _OpenCodeSpecialistTaskMetadataFields.model_validate(candidate)
|
|
2551
|
+
except ValidationError:
|
|
2552
|
+
return None
|
|
2553
|
+
native_model = candidate.get("model")
|
|
2554
|
+
if not isinstance(native_model, dict):
|
|
2555
|
+
return None
|
|
2556
|
+
provider_id = str(native_model.get("providerID") or native_model.get("provider_id") or "").strip()
|
|
2557
|
+
native_model_id = str(native_model.get("modelID") or native_model.get("model_id") or "").strip()
|
|
2558
|
+
if not provider_id and not native_model_id:
|
|
2559
|
+
return None
|
|
2560
|
+
model_id = native_model_id
|
|
2561
|
+
if provider_id and native_model_id and "/" not in native_model_id:
|
|
2562
|
+
model_id = f"{provider_id}/{native_model_id}"
|
|
2563
|
+
payload = {
|
|
2564
|
+
"schema": "medical-notes-workbench.opencode-specialist-task-metadata.v1",
|
|
2565
|
+
"work_id": str(candidate.get("work_id") or candidate.get("workID") or ""),
|
|
2566
|
+
"task_id": str(candidate.get("task_id") or candidate.get("taskID") or ""),
|
|
2567
|
+
"provider_id": provider_id,
|
|
2568
|
+
"model_id": model_id,
|
|
2569
|
+
"model_tier": "specialist",
|
|
2570
|
+
"tool_sequence": ["task"],
|
|
2571
|
+
"prompt_contract": str(candidate.get("prompt_contract") or ""),
|
|
2572
|
+
"raw_content_embedded": None,
|
|
2573
|
+
}
|
|
2574
|
+
try:
|
|
2575
|
+
return _OpenCodeSpecialistTaskMetadataFields.model_validate(payload)
|
|
2576
|
+
except ValidationError:
|
|
2577
|
+
return None
|
|
2578
|
+
|
|
2579
|
+
|
|
2580
|
+
def _workflow_payload_omission_findings(
|
|
2581
|
+
payload: JsonObject,
|
|
2582
|
+
final_text: str,
|
|
2583
|
+
transcript: object | None,
|
|
2584
|
+
) -> list[AgentRunReportFinding]:
|
|
2585
|
+
findings: list[AgentRunReportFinding] = []
|
|
2586
|
+
folded = _fold_text(final_text)
|
|
2587
|
+
final_report_incomplete = _final_report_looks_like_progress_only(final_text)
|
|
2588
|
+
diagnostic = _object_field(payload, "diagnostic_context")
|
|
2589
|
+
findings.extend(_omitted_agent_event_findings(diagnostic, folded))
|
|
2590
|
+
findings.extend(_omitted_version_control_safety_findings(payload, folded, transcript))
|
|
2591
|
+
findings.extend(_api_accounting_findings(payload, folded))
|
|
2592
|
+
findings.extend(_omitted_operational_warning_findings(diagnostic, folded))
|
|
2593
|
+
findings.extend(
|
|
2594
|
+
_content_quality_audit_findings(
|
|
2595
|
+
payload,
|
|
2596
|
+
folded,
|
|
2597
|
+
final_report_incomplete=final_report_incomplete,
|
|
2598
|
+
)
|
|
2599
|
+
)
|
|
2600
|
+
return findings
|
|
2601
|
+
|
|
2602
|
+
|
|
2603
|
+
def _error_context_root_cause_findings(payload: JsonObject, final_text: str) -> list[AgentRunReportFinding]:
|
|
2604
|
+
root_cause, source_field = _canonical_root_cause(payload)
|
|
2605
|
+
if not root_cause:
|
|
2606
|
+
return []
|
|
2607
|
+
folded = _fold_text(final_text)
|
|
2608
|
+
if _folded_contains_any(
|
|
2609
|
+
folded,
|
|
2610
|
+
(root_cause, *ROOT_CAUSE_PUBLIC_LABELS.get(root_cause, ())),
|
|
2611
|
+
):
|
|
2612
|
+
return []
|
|
2613
|
+
return [
|
|
2614
|
+
AgentRunReportFinding(
|
|
2615
|
+
code=AgentRunReportFindingCode.MISSING_ERROR_CONTEXT_ROOT_CAUSE,
|
|
2616
|
+
severity="high",
|
|
2617
|
+
source="workflow_payload",
|
|
2618
|
+
source_field=source_field,
|
|
2619
|
+
expected=root_cause,
|
|
2620
|
+
actual="omitted",
|
|
2621
|
+
message="O relatório final omitiu a causa raiz oficial do workflow.",
|
|
2622
|
+
next_action=(
|
|
2623
|
+
"Reescrever o relatório final priorizando error_context.root_cause/decision.reason_code "
|
|
2624
|
+
"antes de resumir exit code ou saída ruidosa da ferramenta."
|
|
2625
|
+
),
|
|
2626
|
+
evidence={"root_cause": root_cause},
|
|
2627
|
+
)
|
|
2628
|
+
]
|
|
2629
|
+
|
|
2630
|
+
|
|
2631
|
+
def _canonical_root_cause(payload: JsonObject) -> tuple[str, str]:
|
|
2632
|
+
error_context = _object_field(payload, "error_context")
|
|
2633
|
+
root_cause = _optional_text(error_context, "root_cause")
|
|
2634
|
+
if root_cause:
|
|
2635
|
+
return root_cause, "error_context.root_cause"
|
|
2636
|
+
|
|
2637
|
+
decision = _object_field(payload, "decision")
|
|
2638
|
+
reason_code = _optional_text(decision, "reason_code")
|
|
2639
|
+
if reason_code and reason_code not in NON_ERROR_DECISION_REASON_CODES:
|
|
2640
|
+
return reason_code, "decision.reason_code"
|
|
2641
|
+
|
|
2642
|
+
blocked_reason = _optional_text(payload, "blocked_reason")
|
|
2643
|
+
if blocked_reason:
|
|
2644
|
+
return blocked_reason, "blocked_reason"
|
|
2645
|
+
|
|
2646
|
+
return "", ""
|
|
2647
|
+
|
|
2648
|
+
|
|
2649
|
+
def _omitted_agent_event_findings(diagnostic: JsonObject, folded_final_text: str) -> list[AgentRunReportFinding]:
|
|
2650
|
+
events = _collect_agent_events(diagnostic)
|
|
2651
|
+
relevant = [
|
|
2652
|
+
event
|
|
2653
|
+
for event in events
|
|
2654
|
+
if str(event.get("severity") or "").lower() in {"medium", "high", "critical"}
|
|
2655
|
+
]
|
|
2656
|
+
if not relevant:
|
|
2657
|
+
return []
|
|
2658
|
+
omitted = [
|
|
2659
|
+
event
|
|
2660
|
+
for event in relevant
|
|
2661
|
+
if not _folded_contains_any(
|
|
2662
|
+
folded_final_text,
|
|
2663
|
+
(str(event.get(key) or "") for key in ("code", "root_cause_code", "type")),
|
|
2664
|
+
)
|
|
2665
|
+
]
|
|
2666
|
+
if not omitted:
|
|
2667
|
+
return []
|
|
2668
|
+
return [
|
|
2669
|
+
AgentRunReportFinding(
|
|
2670
|
+
code=AgentRunReportFindingCode.AGENT_EVENT_OMITTED,
|
|
2671
|
+
severity="high",
|
|
2672
|
+
source="workflow_payload",
|
|
2673
|
+
source_field="diagnostic_context.agent_events",
|
|
2674
|
+
expected="eventos de agente medium+ devem aparecer no relatório final",
|
|
2675
|
+
actual=", ".join(str(event.get("code") or event.get("type") or "agent_event") for event in omitted[:5]),
|
|
2676
|
+
message="O relatório final omitiu agent_events relevantes emitidos pelo workflow.",
|
|
2677
|
+
next_action="Listar os agent_events relevantes e explicar impacto/mitigação no relatório da rodada.",
|
|
2678
|
+
)
|
|
2679
|
+
]
|
|
2680
|
+
|
|
2681
|
+
|
|
2682
|
+
def _omitted_version_control_safety_findings(
|
|
2683
|
+
payload: JsonObject,
|
|
2684
|
+
folded_final_text: str,
|
|
2685
|
+
transcript: object | None,
|
|
2686
|
+
) -> list[AgentRunReportFinding]:
|
|
2687
|
+
safety = _AgentReportVersionControlSafetyFields.model_validate(
|
|
2688
|
+
_field_payload(
|
|
2689
|
+
_object_field(payload, "version_control_safety"),
|
|
2690
|
+
(
|
|
2691
|
+
"mutation_without_guard",
|
|
2692
|
+
"resource_guard_active",
|
|
2693
|
+
"run_finish_seen",
|
|
2694
|
+
"sync_status",
|
|
2695
|
+
"agent_instruction",
|
|
2696
|
+
),
|
|
2697
|
+
)
|
|
2698
|
+
)
|
|
2699
|
+
findings: list[AgentRunReportFinding] = []
|
|
2700
|
+
if safety.mutation_without_guard is not True:
|
|
2701
|
+
pass
|
|
2702
|
+
elif not _folded_contains_any(
|
|
2703
|
+
folded_final_text,
|
|
2704
|
+
("mutation_without_guard", "vault_guard", "version control", "controle de versao", "controle de versão"),
|
|
2705
|
+
):
|
|
2706
|
+
findings.append(
|
|
2707
|
+
AgentRunReportFinding(
|
|
2708
|
+
code=AgentRunReportFindingCode.VERSION_CONTROL_SAFETY_OMITTED,
|
|
2709
|
+
severity="high",
|
|
2710
|
+
source="workflow_payload",
|
|
2711
|
+
source_field="version_control_safety.mutation_without_guard",
|
|
2712
|
+
expected="mutation_without_guard=true deve ser reportado",
|
|
2713
|
+
actual="omitted",
|
|
2714
|
+
message="O relatório final omitiu sinal de mutação sem guard de version control.",
|
|
2715
|
+
next_action="Reportar o sinal de version_control_safety e classificar se é limitação do harness ou bug do workflow.",
|
|
2716
|
+
)
|
|
2717
|
+
)
|
|
2718
|
+
if (
|
|
2719
|
+
safety.resource_guard_active is True
|
|
2720
|
+
and safety.run_finish_seen is False
|
|
2721
|
+
and not _mentions_guard_finish_pending(folded_final_text)
|
|
2722
|
+
and not _accepts_guard_finish_closed_confirmation(safety, folded_final_text)
|
|
2723
|
+
and not _transcript_confirms_guard_finish_closed(transcript)
|
|
2724
|
+
):
|
|
2725
|
+
findings.append(
|
|
2726
|
+
AgentRunReportFinding(
|
|
2727
|
+
code=AgentRunReportFindingCode.RUN_FINISH_OMITTED,
|
|
2728
|
+
severity="high",
|
|
2729
|
+
source="workflow_payload",
|
|
2730
|
+
source_field="version_control_safety.run_finish_seen",
|
|
2731
|
+
expected="run_finish_seen=false com resource_guard_active=true deve ser reportado",
|
|
2732
|
+
actual="omitted",
|
|
2733
|
+
message="O relatório final omitiu que a proteção do vault ainda estava aberta.",
|
|
2734
|
+
next_action=(
|
|
2735
|
+
"Fechar a proteção pela rota oficial ou reportar explicitamente que o workflow terminou "
|
|
2736
|
+
"com pendência de proteção/version control."
|
|
2737
|
+
),
|
|
2738
|
+
evidence={"sync_status": safety.sync_status},
|
|
2739
|
+
)
|
|
2740
|
+
)
|
|
2741
|
+
return findings
|
|
2742
|
+
|
|
2743
|
+
|
|
2744
|
+
def _transcript_confirms_guard_finish_closed(transcript: object | None) -> bool:
|
|
2745
|
+
if transcript is None:
|
|
2746
|
+
return False
|
|
2747
|
+
for event in _iter_transcript_events(transcript):
|
|
2748
|
+
if event.event_type.casefold() not in {"tool_result", "run_command"}:
|
|
2749
|
+
continue
|
|
2750
|
+
output_text = _transcript_tool_output_text(event)
|
|
2751
|
+
payload = _json_payload_from_tool_output(output_text)
|
|
2752
|
+
if _payload_confirms_guard_finish_closed(payload):
|
|
2753
|
+
return True
|
|
2754
|
+
folded = _fold_text(output_text)
|
|
2755
|
+
if (
|
|
2756
|
+
"vault-run-finish-public" in folded
|
|
2757
|
+
and "resource_guard_active" in folded
|
|
2758
|
+
and "false" in folded
|
|
2759
|
+
and "run_finish_seen" in folded
|
|
2760
|
+
and "true" in folded
|
|
2761
|
+
):
|
|
2762
|
+
return True
|
|
2763
|
+
return False
|
|
2764
|
+
|
|
2765
|
+
|
|
2766
|
+
def _payload_confirms_guard_finish_closed(payload: JsonObject) -> bool:
|
|
2767
|
+
if _optional_text(payload, "schema") != "medical-notes-workbench.vault-run-finish-public.v1":
|
|
2768
|
+
return False
|
|
2769
|
+
safety = payload.get("version_control_safety")
|
|
2770
|
+
if not isinstance(safety, dict):
|
|
2771
|
+
return False
|
|
2772
|
+
return safety.get("resource_guard_active") is False and safety.get("run_finish_seen") is True
|
|
2773
|
+
|
|
2774
|
+
|
|
2775
|
+
def _mentions_guard_finish_pending(folded_text: str) -> bool:
|
|
2776
|
+
if not folded_text:
|
|
2777
|
+
return False
|
|
2778
|
+
has_guard = any(
|
|
2779
|
+
marker in folded_text
|
|
2780
|
+
for marker in (
|
|
2781
|
+
"vault_guard",
|
|
2782
|
+
"run_finish",
|
|
2783
|
+
"run-finish",
|
|
2784
|
+
"protecao do vault",
|
|
2785
|
+
"proteção do vault",
|
|
2786
|
+
"version control",
|
|
2787
|
+
"controle de versao",
|
|
2788
|
+
"controle de versão",
|
|
2789
|
+
"alteracoes concorrentes",
|
|
2790
|
+
"alterações concorrentes",
|
|
2791
|
+
"bloqueio de escrita concorrente",
|
|
2792
|
+
"ponto de restauracao",
|
|
2793
|
+
"ponto de restauração",
|
|
2794
|
+
)
|
|
2795
|
+
)
|
|
2796
|
+
has_pending = any(
|
|
2797
|
+
marker in folded_text
|
|
2798
|
+
for marker in (
|
|
2799
|
+
"pendente",
|
|
2800
|
+
"abert",
|
|
2801
|
+
"ativa",
|
|
2802
|
+
"nao encerr",
|
|
2803
|
+
"não encerr",
|
|
2804
|
+
"nao fech",
|
|
2805
|
+
"não fech",
|
|
2806
|
+
"pending_run_finish",
|
|
2807
|
+
)
|
|
2808
|
+
)
|
|
2809
|
+
return has_guard and has_pending
|
|
2810
|
+
|
|
2811
|
+
|
|
2812
|
+
def _accepts_guard_finish_closed_confirmation(
|
|
2813
|
+
safety: _AgentReportVersionControlSafetyFields,
|
|
2814
|
+
folded_text: str,
|
|
2815
|
+
) -> bool:
|
|
2816
|
+
if not folded_text:
|
|
2817
|
+
return False
|
|
2818
|
+
folded_instruction = _fold_text(safety.agent_instruction)
|
|
2819
|
+
if "antes do run-finish" not in folded_instruction and "before run-finish" not in folded_instruction:
|
|
2820
|
+
return False
|
|
2821
|
+
has_guard = any(
|
|
2822
|
+
marker in folded_text
|
|
2823
|
+
for marker in (
|
|
2824
|
+
"protecao do vault",
|
|
2825
|
+
"proteção do vault",
|
|
2826
|
+
"protecao do repositorio",
|
|
2827
|
+
"proteção do repositório",
|
|
2828
|
+
"vault guard",
|
|
2829
|
+
"vault_guard",
|
|
2830
|
+
"version control",
|
|
2831
|
+
"controle de versao",
|
|
2832
|
+
"controle de versão",
|
|
2833
|
+
)
|
|
2834
|
+
)
|
|
2835
|
+
has_closed = any(
|
|
2836
|
+
marker in folded_text
|
|
2837
|
+
for marker in (
|
|
2838
|
+
"encerrad",
|
|
2839
|
+
"fechad",
|
|
2840
|
+
"finalizad",
|
|
2841
|
+
"repositorio limpo",
|
|
2842
|
+
"repositório limpo",
|
|
2843
|
+
"clean",
|
|
2844
|
+
)
|
|
2845
|
+
)
|
|
2846
|
+
return has_guard and has_closed
|
|
2847
|
+
|
|
2848
|
+
|
|
2849
|
+
def _runtime_log_findings(
|
|
2850
|
+
payload: JsonObject,
|
|
2851
|
+
runtime_log_text: str,
|
|
2852
|
+
final_text: str,
|
|
2853
|
+
transcript: object | None,
|
|
2854
|
+
) -> list[AgentRunReportFinding]:
|
|
2855
|
+
findings = _runtime_performance_findings(runtime_log_text)
|
|
2856
|
+
findings.extend(_runtime_route_probe_findings(payload, runtime_log_text))
|
|
2857
|
+
findings.extend(_runtime_process_chats_vault_deletion_findings(payload, runtime_log_text))
|
|
2858
|
+
findings.extend(_runtime_specialist_model_policy_findings(payload, runtime_log_text, transcript))
|
|
2859
|
+
folded_log = _fold_text(runtime_log_text)
|
|
2860
|
+
if not folded_log:
|
|
2861
|
+
return findings
|
|
2862
|
+
runtime_errors = _runtime_error_labels(folded_log)
|
|
2863
|
+
if not runtime_errors:
|
|
2864
|
+
return findings
|
|
2865
|
+
folded_final = _fold_text(final_text)
|
|
2866
|
+
omitted = [
|
|
2867
|
+
label
|
|
2868
|
+
for label in runtime_errors
|
|
2869
|
+
if not _folded_contains_any(folded_final, _runtime_error_report_markers(label))
|
|
2870
|
+
]
|
|
2871
|
+
if not omitted:
|
|
2872
|
+
return findings
|
|
2873
|
+
findings.append(
|
|
2874
|
+
AgentRunReportFinding(
|
|
2875
|
+
code=AgentRunReportFindingCode.RUNTIME_ERROR_OMITTED,
|
|
2876
|
+
severity="high",
|
|
2877
|
+
source="runtime_log",
|
|
2878
|
+
source_field="runtime_log",
|
|
2879
|
+
expected="erros de runtime/headless devem aparecer no relatório final da rodada",
|
|
2880
|
+
actual=", ".join(omitted),
|
|
2881
|
+
message="O log do runtime contém erro relevante que o relatório final do agente não reportou.",
|
|
2882
|
+
next_action=(
|
|
2883
|
+
"Reescrever o relatório final incorporando o erro do runtime e seu impacto no workflow, "
|
|
2884
|
+
"mesmo quando o processo headless retornou exit code 0."
|
|
2885
|
+
),
|
|
2886
|
+
evidence={"runtime_errors": omitted},
|
|
2887
|
+
)
|
|
2888
|
+
)
|
|
2889
|
+
return findings
|
|
2890
|
+
|
|
2891
|
+
|
|
2892
|
+
def _runtime_process_chats_vault_deletion_findings(
|
|
2893
|
+
payload: JsonObject,
|
|
2894
|
+
runtime_log_text: str,
|
|
2895
|
+
) -> list[AgentRunReportFinding]:
|
|
2896
|
+
if _optional_text(payload, "workflow") != "/mednotes:process-chats":
|
|
2897
|
+
return []
|
|
2898
|
+
folded_log = runtime_log_text or ""
|
|
2899
|
+
if not folded_log:
|
|
2900
|
+
return []
|
|
2901
|
+
deleted_paths = [
|
|
2902
|
+
match.group("path").strip()
|
|
2903
|
+
for match in PROCESS_CHATS_WIKI_DELETION_RE.finditer(folded_log)
|
|
2904
|
+
if match.group("path").strip()
|
|
2905
|
+
]
|
|
2906
|
+
if not deleted_paths:
|
|
2907
|
+
return []
|
|
2908
|
+
return [
|
|
2909
|
+
AgentRunReportFinding(
|
|
2910
|
+
code=AgentRunReportFindingCode.PROCESS_CHATS_VAULT_DELETION_WITHOUT_RECEIPT,
|
|
2911
|
+
severity="critical",
|
|
2912
|
+
source="runtime_log",
|
|
2913
|
+
source_field="git status",
|
|
2914
|
+
expected="process-chats não deve apagar notas Wiki sem recibo tipado de merge/delete",
|
|
2915
|
+
actual=", ".join(deleted_paths[:5]),
|
|
2916
|
+
message=(
|
|
2917
|
+
"O runtime observou deleção de nota Wiki durante process-chats sem recibo tipado que autorize essa mutação."
|
|
2918
|
+
),
|
|
2919
|
+
next_action=(
|
|
2920
|
+
"Parar a rodada, restaurar pelo vault guard/version control e repetir somente pela rota oficial "
|
|
2921
|
+
"de canonical merge/delete com receipt validado."
|
|
2922
|
+
),
|
|
2923
|
+
evidence={"deleted_paths": deleted_paths[:20]},
|
|
2924
|
+
)
|
|
2925
|
+
]
|
|
2926
|
+
|
|
2927
|
+
|
|
2928
|
+
def _runtime_specialist_model_policy_findings(
|
|
2929
|
+
payload: JsonObject,
|
|
2930
|
+
runtime_log_text: str,
|
|
2931
|
+
transcript: object | None,
|
|
2932
|
+
) -> list[AgentRunReportFinding]:
|
|
2933
|
+
batch = _specialist_runtime_batch_from_agent_directive(payload)
|
|
2934
|
+
if batch.phase != "style_rewrite" or not batch.current_batch_items:
|
|
2935
|
+
return []
|
|
2936
|
+
specialist_items = [
|
|
2937
|
+
item
|
|
2938
|
+
for item in batch.current_batch_items
|
|
2939
|
+
if item.required_model_tier in {"specialist", "pro"}
|
|
2940
|
+
or item.preferred_model_tier == "pro"
|
|
2941
|
+
or item.model_policy == "medical_specialist_authoring.v1"
|
|
2942
|
+
or item.agent == "med-knowledge-architect"
|
|
2943
|
+
]
|
|
2944
|
+
if not specialist_items:
|
|
2945
|
+
return []
|
|
2946
|
+
observed_model = _observed_agy_selected_model(runtime_log_text)
|
|
2947
|
+
if not observed_model or FLASH_MODEL_RE.search(observed_model) is None:
|
|
2948
|
+
return []
|
|
2949
|
+
if transcript is None or not _transcript_used_native_specialist_invocation(transcript):
|
|
2950
|
+
return []
|
|
2951
|
+
return [
|
|
2952
|
+
AgentRunReportFinding(
|
|
2953
|
+
code=AgentRunReportFindingCode.SPECIALIST_MODEL_POLICY_VIOLATION,
|
|
2954
|
+
severity="critical",
|
|
2955
|
+
source="runtime_log",
|
|
2956
|
+
source_field="runtime_log.selected_model+transcript.specialist_invocation",
|
|
2957
|
+
expected="tarefas médicas especializadas exigem modelo especialista/Pro sem fallback para Flash",
|
|
2958
|
+
actual=observed_model,
|
|
2959
|
+
message="O runtime selecionou Flash durante uma tarefa de reescrita médica especializada.",
|
|
2960
|
+
next_action=(
|
|
2961
|
+
"Não aplicar outputs desse lote; relançar a tarefa por runner oficial capaz de garantir "
|
|
2962
|
+
"modelo especialista/Pro e recibo atestado."
|
|
2963
|
+
),
|
|
2964
|
+
evidence={
|
|
2965
|
+
"observed_model": observed_model,
|
|
2966
|
+
"transcript_specialist_invocation": "native",
|
|
2967
|
+
"work_ids": [item.work_id for item in specialist_items if item.work_id],
|
|
2968
|
+
"required_model_tiers": sorted({item.required_model_tier for item in specialist_items}),
|
|
2969
|
+
"model_policies": sorted({item.model_policy for item in specialist_items if item.model_policy}),
|
|
2970
|
+
},
|
|
2971
|
+
)
|
|
2972
|
+
]
|
|
2973
|
+
|
|
2974
|
+
|
|
2975
|
+
def _transcript_specialist_model_policy_findings(
|
|
2976
|
+
payload: JsonObject,
|
|
2977
|
+
transcript: object,
|
|
2978
|
+
) -> list[AgentRunReportFinding]:
|
|
2979
|
+
batch = _specialist_runtime_batch_from_agent_directive(payload)
|
|
2980
|
+
if batch.phase != "style_rewrite":
|
|
2981
|
+
return []
|
|
2982
|
+
specialist_items = [
|
|
2983
|
+
item
|
|
2984
|
+
for item in batch.current_batch_items
|
|
2985
|
+
if item.required_model_tier in {"specialist", "pro"}
|
|
2986
|
+
or item.preferred_model_tier == "pro"
|
|
2987
|
+
or item.model_policy == "medical_specialist_authoring.v1"
|
|
2988
|
+
or item.agent == "med-knowledge-architect"
|
|
2989
|
+
]
|
|
2990
|
+
if not specialist_items:
|
|
2991
|
+
return []
|
|
2992
|
+
findings: list[AgentRunReportFinding] = []
|
|
2993
|
+
seen: set[tuple[str, str]] = set()
|
|
2994
|
+
for event in _iter_transcript_events(transcript):
|
|
2995
|
+
if event.event_type.casefold() != "tool_use":
|
|
2996
|
+
continue
|
|
2997
|
+
opencode_metadata = _opencode_task_metadata_from_event(event)
|
|
2998
|
+
if opencode_metadata is not None:
|
|
2999
|
+
observed_model = opencode_metadata.model_id
|
|
3000
|
+
if not observed_model or FLASH_MODEL_RE.search(observed_model):
|
|
3001
|
+
key = ("opencode-task-model", observed_model)
|
|
3002
|
+
if key not in seen:
|
|
3003
|
+
seen.add(key)
|
|
3004
|
+
findings.append(
|
|
3005
|
+
AgentRunReportFinding(
|
|
3006
|
+
code=AgentRunReportFindingCode.SPECIALIST_MODEL_POLICY_VIOLATION,
|
|
3007
|
+
severity="critical",
|
|
3008
|
+
source="transcript",
|
|
3009
|
+
source_field="transcript.tool_use.parameters.metadata.model_id",
|
|
3010
|
+
expected=(
|
|
3011
|
+
"OpenCode task especialista deve provar modelo especialista/Pro via "
|
|
3012
|
+
"opencode_task_metadata, sem fallback para Flash/Lite/Nano"
|
|
3013
|
+
),
|
|
3014
|
+
actual=observed_model or "<missing>",
|
|
3015
|
+
message=(
|
|
3016
|
+
"A task OpenCode de autoria médica especializada registrou modelo ausente "
|
|
3017
|
+
"ou proibido pela política de modelo."
|
|
3018
|
+
),
|
|
3019
|
+
next_action=(
|
|
3020
|
+
"Descartar outputs sem recibo valido e repetir a task OpenCode com modelo "
|
|
3021
|
+
"especialista aceito antes de aplicar."
|
|
3022
|
+
),
|
|
3023
|
+
evidence={
|
|
3024
|
+
"harness": "opencode",
|
|
3025
|
+
"observed_model": observed_model,
|
|
3026
|
+
"provider_id": opencode_metadata.provider_id,
|
|
3027
|
+
"task_id": opencode_metadata.task_id,
|
|
3028
|
+
"work_id": opencode_metadata.work_id,
|
|
3029
|
+
"work_ids": [item.work_id for item in specialist_items if item.work_id],
|
|
3030
|
+
},
|
|
3031
|
+
)
|
|
3032
|
+
)
|
|
3033
|
+
command = _event_parameter_text(event, "command")
|
|
3034
|
+
if _command_uses_unverified_specialist_model_escape(command):
|
|
3035
|
+
key = ("unverified-specialist-model-escape", "public-workflow")
|
|
3036
|
+
if key not in seen:
|
|
3037
|
+
seen.add(key)
|
|
3038
|
+
findings.append(
|
|
3039
|
+
AgentRunReportFinding(
|
|
3040
|
+
code=AgentRunReportFindingCode.SPECIALIST_MODEL_POLICY_VIOLATION,
|
|
3041
|
+
severity="critical",
|
|
3042
|
+
source="transcript",
|
|
3043
|
+
source_field="transcript.tool_use.parameters.command.env",
|
|
3044
|
+
expected=(
|
|
3045
|
+
"fluxo publico não deve usar dev-escape para aceitar modelo especialista "
|
|
3046
|
+
"não verificado pelo Workbench"
|
|
3047
|
+
),
|
|
3048
|
+
actual="MEDNOTES_ALLOW_UNVERIFIED_SPECIALIST_MODEL",
|
|
3049
|
+
message=(
|
|
3050
|
+
"O agente tentou contornar a proveniência de modelo especialista com variável "
|
|
3051
|
+
"de escape de desenvolvedor."
|
|
3052
|
+
),
|
|
3053
|
+
next_action=(
|
|
3054
|
+
"Descartar o output desse item, reportar a violação e retomar pela rota oficial "
|
|
3055
|
+
"com recibo/proveniência validada pelo Workbench."
|
|
3056
|
+
),
|
|
3057
|
+
evidence={
|
|
3058
|
+
"work_ids": [item.work_id for item in specialist_items if item.work_id],
|
|
3059
|
+
"tool_name": event.tool_name,
|
|
3060
|
+
},
|
|
3061
|
+
)
|
|
3062
|
+
)
|
|
3063
|
+
return findings
|
|
3064
|
+
|
|
3065
|
+
|
|
3066
|
+
def _command_uses_unverified_specialist_model_escape(command: str) -> bool:
|
|
3067
|
+
if "MEDNOTES_ALLOW_UNVERIFIED_SPECIALIST_MODEL" not in command:
|
|
3068
|
+
return False
|
|
3069
|
+
return "finalize-style-rewrite-output" in command or "apply-specialist-style-rewrite" in command
|
|
3070
|
+
|
|
3071
|
+
|
|
3072
|
+
def _style_rewrite_batch_progress_checkpoint_findings(
|
|
3073
|
+
payload: JsonObject,
|
|
3074
|
+
transcript: object,
|
|
3075
|
+
) -> list[AgentRunReportFinding]:
|
|
3076
|
+
batch = _specialist_runtime_batch_from_agent_directive(payload)
|
|
3077
|
+
if batch.phase != "style_rewrite":
|
|
3078
|
+
return []
|
|
3079
|
+
if not batch.report_contract.after_each_batch:
|
|
3080
|
+
return []
|
|
3081
|
+
saw_batch_apply = False
|
|
3082
|
+
assistant_message_buffer: list[str] = []
|
|
3083
|
+
for event in _iter_transcript_events(transcript):
|
|
3084
|
+
event_type = event.event_type.casefold()
|
|
3085
|
+
if event_type == "message":
|
|
3086
|
+
role = (event.role or _event_parameter_text(event, "role")).casefold()
|
|
3087
|
+
if saw_batch_apply and role in {"", "assistant", "model"}:
|
|
3088
|
+
text = _transcript_message_text(event.content)
|
|
3089
|
+
if text.strip():
|
|
3090
|
+
assistant_message_buffer.append(text)
|
|
3091
|
+
continue
|
|
3092
|
+
if event_type == "tool_result":
|
|
3093
|
+
continue
|
|
3094
|
+
if event_type != "tool_use":
|
|
3095
|
+
continue
|
|
3096
|
+
command = _event_parameter_text(event, "command")
|
|
3097
|
+
if not command:
|
|
3098
|
+
continue
|
|
3099
|
+
if saw_batch_apply and _looks_like_style_rewrite_batch_report("\n".join(assistant_message_buffer)):
|
|
3100
|
+
saw_batch_apply = False
|
|
3101
|
+
assistant_message_buffer = []
|
|
3102
|
+
if _is_real_style_rewrite_apply_command(command):
|
|
3103
|
+
saw_batch_apply = True
|
|
3104
|
+
assistant_message_buffer = []
|
|
3105
|
+
continue
|
|
3106
|
+
if saw_batch_apply and _is_next_style_rewrite_batch_command(command):
|
|
3107
|
+
return [
|
|
3108
|
+
AgentRunReportFinding(
|
|
3109
|
+
code=AgentRunReportFindingCode.BATCH_PROGRESS_REPORT_OMITTED,
|
|
3110
|
+
severity="high",
|
|
3111
|
+
source="transcript",
|
|
3112
|
+
source_field="transcript.tool_use.parameters.command",
|
|
3113
|
+
expected=(
|
|
3114
|
+
"após aplicar um lote de style-rewrite, o agente deve emitir resumo humano "
|
|
3115
|
+
"com qualidade, preservação e pendências antes de planejar/rodar a próxima leva"
|
|
3116
|
+
),
|
|
3117
|
+
actual=command,
|
|
3118
|
+
message=(
|
|
3119
|
+
"O agente continuou a próxima etapa de reescrita sem cumprir o checkpoint de relatório do lote."
|
|
3120
|
+
),
|
|
3121
|
+
next_action=(
|
|
3122
|
+
"Interromper a conclusão da rodada, reportar o lote aplicado em termos humanos e só então "
|
|
3123
|
+
"retomar a próxima leva pela rota oficial."
|
|
3124
|
+
),
|
|
3125
|
+
evidence={
|
|
3126
|
+
"command": command,
|
|
3127
|
+
"batch_work_ids": [item.work_id for item in batch.current_batch_items if item.work_id],
|
|
3128
|
+
},
|
|
3129
|
+
)
|
|
3130
|
+
]
|
|
3131
|
+
return []
|
|
3132
|
+
|
|
3133
|
+
|
|
3134
|
+
def _specialist_rewrite_count_findings(transcript: object, final_text: str) -> list[AgentRunReportFinding]:
|
|
3135
|
+
work_ids = _applied_specialist_rewrite_work_ids(transcript)
|
|
3136
|
+
if not work_ids:
|
|
3137
|
+
return []
|
|
3138
|
+
reported_count = _reported_specialist_rewrite_count(final_text)
|
|
3139
|
+
if reported_count is None or reported_count == len(work_ids):
|
|
3140
|
+
return []
|
|
3141
|
+
return [
|
|
3142
|
+
AgentRunReportFinding(
|
|
3143
|
+
code=AgentRunReportFindingCode.SPECIALIST_REWRITE_COUNT_MISMATCH,
|
|
3144
|
+
severity="high",
|
|
3145
|
+
source="transcript",
|
|
3146
|
+
source_field="tool_result.output.style_rewrite_applied_count",
|
|
3147
|
+
expected=str(len(work_ids)),
|
|
3148
|
+
actual=str(reported_count),
|
|
3149
|
+
message="O relatório final declarou uma contagem de notas reescritas diferente dos applies oficiais observados.",
|
|
3150
|
+
next_action=(
|
|
3151
|
+
"Reescrever o relatório final usando a contagem real de applies oficiais e listar qualquer item aplicado, "
|
|
3152
|
+
"bloqueado ou pendente sem arredondar a evidência."
|
|
3153
|
+
),
|
|
3154
|
+
evidence={"work_ids": work_ids},
|
|
3155
|
+
)
|
|
3156
|
+
]
|
|
3157
|
+
|
|
3158
|
+
|
|
3159
|
+
def _applied_specialist_rewrite_work_ids(transcript: object) -> list[str]:
|
|
3160
|
+
work_ids: list[str] = []
|
|
3161
|
+
|
|
3162
|
+
def append(value: object) -> None:
|
|
3163
|
+
work_id = str(value or "").strip()
|
|
3164
|
+
if work_id and work_id not in work_ids:
|
|
3165
|
+
work_ids.append(work_id)
|
|
3166
|
+
|
|
3167
|
+
for event in _iter_transcript_events(transcript):
|
|
3168
|
+
if event.event_type.casefold() not in {"tool_result", "run_command"}:
|
|
3169
|
+
continue
|
|
3170
|
+
payload = _json_payload_from_tool_output(_transcript_tool_output_text(event))
|
|
3171
|
+
schema = _optional_text(payload, "schema")
|
|
3172
|
+
if schema not in {
|
|
3173
|
+
"medical-notes-workbench.style-rewrite-atomic-apply-agent-stdout.v1",
|
|
3174
|
+
"medical-notes-workbench.style-rewrite-atomic-apply-result.v1",
|
|
3175
|
+
}:
|
|
3176
|
+
continue
|
|
3177
|
+
if _optional_text(payload, "status").casefold() in {"blocked", "failed", "waiting_external"}:
|
|
3178
|
+
continue
|
|
3179
|
+
candidates = [payload]
|
|
3180
|
+
nested_apply = _object_field(payload, "apply")
|
|
3181
|
+
if nested_apply:
|
|
3182
|
+
candidates.append(nested_apply)
|
|
3183
|
+
for candidate in candidates:
|
|
3184
|
+
try:
|
|
3185
|
+
apply_result = StyleRewriteAtomicApplyResult.model_validate(candidate)
|
|
3186
|
+
except ValidationError:
|
|
3187
|
+
continue
|
|
3188
|
+
fallback_work_id = (apply_result.work_id or _optional_text(payload, "work_id")).strip()
|
|
3189
|
+
for item in apply_result.items:
|
|
3190
|
+
if item.written:
|
|
3191
|
+
append(item.work_id or fallback_work_id)
|
|
3192
|
+
if apply_result.written_count > 0:
|
|
3193
|
+
append(fallback_work_id)
|
|
3194
|
+
return work_ids
|
|
3195
|
+
|
|
3196
|
+
|
|
3197
|
+
def _reported_specialist_rewrite_count(final_text: str) -> int | None:
|
|
3198
|
+
folded = _fold_text(final_text)
|
|
3199
|
+
for match in SPECIALIST_REWRITE_COUNT_CLAIM_RE.finditer(folded):
|
|
3200
|
+
return _as_int(match.group("count"))
|
|
3201
|
+
return None
|
|
3202
|
+
|
|
3203
|
+
|
|
3204
|
+
def _tool_result_has_style_rewrite_progress_checkpoint(output: str) -> bool:
|
|
3205
|
+
payload = _json_payload_from_tool_output(output)
|
|
3206
|
+
if not payload:
|
|
3207
|
+
return False
|
|
3208
|
+
candidate: object = payload
|
|
3209
|
+
if _optional_text(payload, "schema") == "medical-notes-workbench.style-rewrite-atomic-apply-agent-stdout.v1":
|
|
3210
|
+
candidate = payload["human_progress_checkpoint"] if "human_progress_checkpoint" in payload else None
|
|
3211
|
+
if not isinstance(candidate, dict):
|
|
3212
|
+
return False
|
|
3213
|
+
if candidate.get("schema") != "medical-notes-workbench.style-rewrite-human-progress-checkpoint.v1":
|
|
3214
|
+
return False
|
|
3215
|
+
text = "\n".join(
|
|
3216
|
+
str(candidate.get(key) or "")
|
|
3217
|
+
for key in (
|
|
3218
|
+
"summary",
|
|
3219
|
+
"content_quality",
|
|
3220
|
+
"linker_summary",
|
|
3221
|
+
"remaining_summary",
|
|
3222
|
+
)
|
|
3223
|
+
)
|
|
3224
|
+
preserved = candidate.get("preserved")
|
|
3225
|
+
if isinstance(preserved, list):
|
|
3226
|
+
text += "\n" + "\n".join(str(item) for item in preserved)
|
|
3227
|
+
return _looks_like_style_rewrite_batch_report(text)
|
|
3228
|
+
|
|
3229
|
+
|
|
3230
|
+
def _is_real_style_rewrite_apply_command(command: str) -> bool:
|
|
3231
|
+
folded = _fold_text(command)
|
|
3232
|
+
if "apply-specialist-style-rewrite" in folded:
|
|
3233
|
+
return True
|
|
3234
|
+
return "apply-style-rewrite" in folded and "--dry-run" not in folded
|
|
3235
|
+
|
|
3236
|
+
|
|
3237
|
+
def _is_next_style_rewrite_batch_command(command: str) -> bool:
|
|
3238
|
+
folded = _fold_text(command)
|
|
3239
|
+
if "plan-subagents" in folded and "style-rewrite" in folded:
|
|
3240
|
+
return True
|
|
3241
|
+
return "fix-wiki" in folded and "--apply" in folded
|
|
3242
|
+
|
|
3243
|
+
|
|
3244
|
+
def _looks_like_style_rewrite_batch_report(text: str) -> bool:
|
|
3245
|
+
folded = _fold_text(text)
|
|
3246
|
+
if not folded:
|
|
3247
|
+
return False
|
|
3248
|
+
has_batch = "lote" in folded or "batch" in folded
|
|
3249
|
+
has_quality = "qualidade" in folded or "quality" in folded
|
|
3250
|
+
has_preservation = any(
|
|
3251
|
+
marker in folded
|
|
3252
|
+
for marker in (
|
|
3253
|
+
"yaml",
|
|
3254
|
+
"proveniencia",
|
|
3255
|
+
"proveniência",
|
|
3256
|
+
"links preserv",
|
|
3257
|
+
"preservou links",
|
|
3258
|
+
"preserved links",
|
|
3259
|
+
)
|
|
3260
|
+
)
|
|
3261
|
+
has_remaining = any(
|
|
3262
|
+
marker in folded
|
|
3263
|
+
for marker in (
|
|
3264
|
+
"restam",
|
|
3265
|
+
"restante",
|
|
3266
|
+
"remaining",
|
|
3267
|
+
"pendente",
|
|
3268
|
+
"faltam",
|
|
3269
|
+
"continua",
|
|
3270
|
+
)
|
|
3271
|
+
)
|
|
3272
|
+
return has_batch and has_quality and has_preservation and has_remaining
|
|
3273
|
+
|
|
3274
|
+
|
|
3275
|
+
def _command_argument(command: str, option: str) -> str:
|
|
3276
|
+
try:
|
|
3277
|
+
parts = shlex.split(command)
|
|
3278
|
+
except ValueError:
|
|
3279
|
+
pattern = re.compile(rf"{re.escape(option)}\s+(?P<value>\S+)")
|
|
3280
|
+
match = pattern.search(command)
|
|
3281
|
+
return match.group("value") if match else ""
|
|
3282
|
+
for index, part in enumerate(parts[:-1]):
|
|
3283
|
+
if part == option:
|
|
3284
|
+
return parts[index + 1]
|
|
3285
|
+
return ""
|
|
3286
|
+
|
|
3287
|
+
|
|
3288
|
+
def _observed_runtime_model(runtime_log_text: str) -> str:
|
|
3289
|
+
labels = [match.group("label").strip() for match in AGY_SELECTED_MODEL_RE.finditer(runtime_log_text)]
|
|
3290
|
+
if labels:
|
|
3291
|
+
return labels[-1]
|
|
3292
|
+
flash_match = FLASH_MODEL_RE.search(runtime_log_text)
|
|
3293
|
+
return flash_match.group(0) if flash_match else ""
|
|
3294
|
+
|
|
3295
|
+
|
|
3296
|
+
def _observed_agy_selected_model(runtime_log_text: str) -> str:
|
|
3297
|
+
labels = [match.group("label").strip() for match in AGY_SELECTED_MODEL_RE.finditer(runtime_log_text)]
|
|
3298
|
+
return labels[-1] if labels else ""
|
|
3299
|
+
|
|
3300
|
+
|
|
3301
|
+
def _runtime_performance_findings(runtime_log_text: str) -> list[AgentRunReportFinding]:
|
|
3302
|
+
samples = _runtime_cpu_samples(runtime_log_text)
|
|
3303
|
+
findings: list[AgentRunReportFinding] = []
|
|
3304
|
+
active_runs: dict[str, list[_RuntimeCpuSample]] = {}
|
|
3305
|
+
for sample in sorted(samples, key=lambda item: item.elapsed_seconds):
|
|
3306
|
+
command_family = _cpu_command_family(sample.max_cpu_command)
|
|
3307
|
+
for stale_family in tuple(active_runs):
|
|
3308
|
+
if stale_family == command_family:
|
|
3309
|
+
continue
|
|
3310
|
+
findings.extend(
|
|
3311
|
+
_runtime_performance_findings_for_family(
|
|
3312
|
+
stale_family,
|
|
3313
|
+
active_runs.pop(stale_family),
|
|
3314
|
+
total_sample_count=len(samples),
|
|
3315
|
+
)
|
|
3316
|
+
)
|
|
3317
|
+
if max(sample.total_cpu_percent, sample.max_cpu_percent) >= HIGH_CPU_PERCENT_THRESHOLD:
|
|
3318
|
+
active_runs.setdefault(command_family, []).append(sample)
|
|
3319
|
+
continue
|
|
3320
|
+
if command_family in active_runs:
|
|
3321
|
+
findings.extend(
|
|
3322
|
+
_runtime_performance_findings_for_family(
|
|
3323
|
+
command_family,
|
|
3324
|
+
active_runs.pop(command_family),
|
|
3325
|
+
total_sample_count=len(samples),
|
|
3326
|
+
)
|
|
3327
|
+
)
|
|
3328
|
+
for command_family, family_samples in active_runs.items():
|
|
3329
|
+
findings.extend(
|
|
3330
|
+
_runtime_performance_findings_for_family(
|
|
3331
|
+
command_family,
|
|
3332
|
+
family_samples,
|
|
3333
|
+
total_sample_count=len(samples),
|
|
3334
|
+
)
|
|
3335
|
+
)
|
|
3336
|
+
return findings
|
|
3337
|
+
|
|
3338
|
+
|
|
3339
|
+
def _runtime_route_probe_findings(
|
|
3340
|
+
payload: JsonObject,
|
|
3341
|
+
runtime_log_text: str,
|
|
3342
|
+
) -> list[AgentRunReportFinding]:
|
|
3343
|
+
if not _is_process_chats_terminal_no_pending(payload):
|
|
3344
|
+
return []
|
|
3345
|
+
commands = [
|
|
3346
|
+
sample.max_cpu_command
|
|
3347
|
+
for sample in _runtime_cpu_samples(runtime_log_text)
|
|
3348
|
+
if _is_route_probe_command(sample.max_cpu_command)
|
|
3349
|
+
]
|
|
3350
|
+
if not commands:
|
|
3351
|
+
return []
|
|
3352
|
+
unique_commands = list(dict.fromkeys(commands))
|
|
3353
|
+
return [
|
|
3354
|
+
AgentRunReportFinding(
|
|
3355
|
+
code=AgentRunReportFindingCode.RUNTIME_ROUTE_PROBE_OBSERVED,
|
|
3356
|
+
severity="medium",
|
|
3357
|
+
source="runtime_log",
|
|
3358
|
+
source_field="runtime_log.cpu_samples.max_cpu_command",
|
|
3359
|
+
expected=(
|
|
3360
|
+
"process-chats terminal sem chats novos deve executar a checagem oficial direta "
|
|
3361
|
+
"sem probes recursivos de descoberta"
|
|
3362
|
+
),
|
|
3363
|
+
actual="; ".join(command[:160] for command in unique_commands),
|
|
3364
|
+
message=(
|
|
3365
|
+
"O runtime registrou busca/probe recursivo durante um fluxo terminal simples; "
|
|
3366
|
+
"isso é atrito de rota e deve aparecer no relatório da rodada."
|
|
3367
|
+
),
|
|
3368
|
+
next_action=(
|
|
3369
|
+
"Endurecer launcher/runbook ou harness para iniciar pela porta pública `list-pending --summary` "
|
|
3370
|
+
"sem busca exploratória, e repetir a rodada validando transcript/runtime log."
|
|
3371
|
+
),
|
|
3372
|
+
evidence={
|
|
3373
|
+
"schema": CPU_SAMPLE_SCHEMA,
|
|
3374
|
+
"commands": unique_commands[:5],
|
|
3375
|
+
},
|
|
3376
|
+
)
|
|
3377
|
+
]
|
|
3378
|
+
|
|
3379
|
+
|
|
3380
|
+
def _is_process_chats_terminal_no_pending(payload: JsonObject) -> bool:
|
|
3381
|
+
fields = _ProcessChatsTerminalFields.model_validate(
|
|
3382
|
+
_field_payload(
|
|
3383
|
+
payload,
|
|
3384
|
+
(
|
|
3385
|
+
"workflow",
|
|
3386
|
+
"status",
|
|
3387
|
+
"phase",
|
|
3388
|
+
"process_chats_terminal_state",
|
|
3389
|
+
"process_chats_backlog_state",
|
|
3390
|
+
"item_count",
|
|
3391
|
+
"total_available_count",
|
|
3392
|
+
),
|
|
3393
|
+
)
|
|
3394
|
+
)
|
|
3395
|
+
if fields.workflow != "/mednotes:process-chats" or fields.status != "completed":
|
|
3396
|
+
return False
|
|
3397
|
+
if fields.process_chats_terminal_state == "no_pending":
|
|
3398
|
+
return True
|
|
3399
|
+
if fields.phase == "pending_backlog" and fields.process_chats_backlog_state == "no_pending_raws":
|
|
3400
|
+
return True
|
|
3401
|
+
if fields.item_count == 0 and fields.total_available_count == 0:
|
|
3402
|
+
return True
|
|
3403
|
+
return False
|
|
3404
|
+
|
|
3405
|
+
|
|
3406
|
+
def _is_route_probe_command(command: str) -> bool:
|
|
3407
|
+
parts = shlex.split(command)
|
|
3408
|
+
if not parts:
|
|
3409
|
+
return False
|
|
3410
|
+
executable = Path(parts[0]).name
|
|
3411
|
+
if executable == "grep" and "-r" in parts:
|
|
3412
|
+
return True
|
|
3413
|
+
if executable in {"find", "mdfind"}:
|
|
3414
|
+
return True
|
|
3415
|
+
if executable in {"rg", "ripgrep"} and any(part in {"-g", "--glob", "--files"} for part in parts):
|
|
3416
|
+
return True
|
|
3417
|
+
return False
|
|
3418
|
+
|
|
3419
|
+
|
|
3420
|
+
def _runtime_performance_findings_for_family(
|
|
3421
|
+
command_family: str,
|
|
3422
|
+
high_samples: list[_RuntimeCpuSample],
|
|
3423
|
+
*,
|
|
3424
|
+
total_sample_count: int,
|
|
3425
|
+
) -> list[AgentRunReportFinding]:
|
|
3426
|
+
if len(high_samples) < HIGH_CPU_MIN_SAMPLE_COUNT:
|
|
3427
|
+
return []
|
|
3428
|
+
observed_span = _estimated_high_cpu_span_seconds(high_samples)
|
|
3429
|
+
if observed_span < HIGH_CPU_MIN_SPAN_SECONDS:
|
|
3430
|
+
return []
|
|
3431
|
+
max_total_cpu = max(sample.total_cpu_percent for sample in high_samples)
|
|
3432
|
+
max_process_cpu = max(sample.max_cpu_percent for sample in high_samples)
|
|
3433
|
+
max_observed_cpu = max(max_total_cpu, max_process_cpu)
|
|
3434
|
+
max_sample = max(high_samples, key=lambda sample: max(sample.total_cpu_percent, sample.max_cpu_percent))
|
|
3435
|
+
return [
|
|
3436
|
+
AgentRunReportFinding(
|
|
3437
|
+
code=AgentRunReportFindingCode.RUNTIME_PERFORMANCE_BUG,
|
|
3438
|
+
severity="medium",
|
|
3439
|
+
source="runtime_log",
|
|
3440
|
+
source_field="runtime_log.cpu_samples",
|
|
3441
|
+
expected="workflow longo deve manter CPU sob controle ou reportar progresso claro antes de monopolizar a sessão",
|
|
3442
|
+
actual=(
|
|
3443
|
+
f"{len(high_samples)} amostras acima de {HIGH_CPU_PERCENT_THRESHOLD:.0f}% "
|
|
3444
|
+
f"por {observed_span:.1f}s; pico={max_observed_cpu:.1f}%"
|
|
3445
|
+
),
|
|
3446
|
+
message="A execução registrou CPU alta sustentada; isso é bug de performance/UX do workflow.",
|
|
3447
|
+
next_action=(
|
|
3448
|
+
"Investigar a fase do workflow que monopolizou CPU, adicionar progresso/limites quando necessário "
|
|
3449
|
+
"e reportar o impacto na próxima rodada de experimento."
|
|
3450
|
+
),
|
|
3451
|
+
evidence={
|
|
3452
|
+
"schema": CPU_SAMPLE_SCHEMA,
|
|
3453
|
+
"command_family": command_family,
|
|
3454
|
+
"sample_count": len(high_samples),
|
|
3455
|
+
"total_sample_count": total_sample_count,
|
|
3456
|
+
"threshold_percent": HIGH_CPU_PERCENT_THRESHOLD,
|
|
3457
|
+
"observed_span_seconds": round(observed_span, 2),
|
|
3458
|
+
"max_cpu_percent": round(max_observed_cpu, 2),
|
|
3459
|
+
"max_total_cpu_percent": round(max_total_cpu, 2),
|
|
3460
|
+
"max_process_cpu_percent": round(max_process_cpu, 2),
|
|
3461
|
+
"max_cpu_command": max_sample.max_cpu_command[:500],
|
|
3462
|
+
},
|
|
3463
|
+
)
|
|
3464
|
+
]
|
|
3465
|
+
|
|
3466
|
+
|
|
3467
|
+
def _estimated_high_cpu_span_seconds(high_samples: list[_RuntimeCpuSample]) -> float:
|
|
3468
|
+
elapsed_values = sorted(sample.elapsed_seconds for sample in high_samples)
|
|
3469
|
+
if len(elapsed_values) < 2:
|
|
3470
|
+
return 0.0
|
|
3471
|
+
gaps = [
|
|
3472
|
+
after - before
|
|
3473
|
+
for before, after in zip(elapsed_values, elapsed_values[1:], strict=False)
|
|
3474
|
+
if after > before
|
|
3475
|
+
]
|
|
3476
|
+
sample_window = min(gaps) if gaps else 0.0
|
|
3477
|
+
return max(elapsed_values) - min(elapsed_values) + sample_window
|
|
3478
|
+
|
|
3479
|
+
|
|
3480
|
+
def _cpu_command_family(command: str) -> str:
|
|
3481
|
+
folded = command.casefold()
|
|
3482
|
+
if "mednotes/wiki/cli.py" in folded or "fix-wiki --apply" in folded:
|
|
3483
|
+
return "workbench_cli"
|
|
3484
|
+
if "/gemini" in folded or " gemini " in folded or folded.startswith("gemini "):
|
|
3485
|
+
return "external_model_runtime"
|
|
3486
|
+
return "other"
|
|
3487
|
+
|
|
3488
|
+
|
|
3489
|
+
def _runtime_cpu_samples(runtime_log_text: str) -> list[_RuntimeCpuSample]:
|
|
3490
|
+
samples: list[_RuntimeCpuSample] = []
|
|
3491
|
+
for line in runtime_log_text.splitlines():
|
|
3492
|
+
candidate = line.strip()
|
|
3493
|
+
if not candidate.startswith("{"):
|
|
3494
|
+
continue
|
|
3495
|
+
try:
|
|
3496
|
+
sample = _RuntimeCpuSample.model_validate_json(candidate)
|
|
3497
|
+
except ValueError:
|
|
3498
|
+
continue
|
|
3499
|
+
if sample.schema_id == CPU_SAMPLE_SCHEMA:
|
|
3500
|
+
samples.append(sample)
|
|
3501
|
+
return samples
|
|
3502
|
+
|
|
3503
|
+
|
|
3504
|
+
def _runtime_error_labels(folded_log: str) -> list[str]:
|
|
3505
|
+
labels: list[str] = []
|
|
3506
|
+
if "resource_exhausted" in folded_log or "code 429" in folded_log or " 429 " in folded_log:
|
|
3507
|
+
labels.append("RESOURCE_EXHAUSTED/429 quota")
|
|
3508
|
+
if "etimedout" in folded_log or "read timed out" in folded_log:
|
|
3509
|
+
labels.append("specialist model runtime timeout")
|
|
3510
|
+
if "agent executor error" in folded_log:
|
|
3511
|
+
labels.append("agent executor error")
|
|
3512
|
+
recovered_antigravity_auth = (
|
|
3513
|
+
"you are not logged into antigravity" in folded_log
|
|
3514
|
+
and ("auth succeeded" in folded_log or "silent auth succeeded" in folded_log)
|
|
3515
|
+
and "authentication timed out" not in folded_log
|
|
3516
|
+
)
|
|
3517
|
+
if (
|
|
3518
|
+
"authentication timed out" in folded_log
|
|
3519
|
+
or ("you are not logged into antigravity" in folded_log and not recovered_antigravity_auth)
|
|
3520
|
+
):
|
|
3521
|
+
labels.append("antigravity authentication transient")
|
|
3522
|
+
return labels
|
|
3523
|
+
|
|
3524
|
+
|
|
3525
|
+
def _runtime_error_report_markers(label: str) -> tuple[str, ...]:
|
|
3526
|
+
folded = _fold_text(label)
|
|
3527
|
+
if "resource_exhausted" in folded or "429" in folded or "quota" in folded:
|
|
3528
|
+
return ("resource_exhausted", "429", "quota", "cota", "cota 429")
|
|
3529
|
+
if "timeout" in folded:
|
|
3530
|
+
return (
|
|
3531
|
+
"etimedout",
|
|
3532
|
+
"read etimedout",
|
|
3533
|
+
"read timed out",
|
|
3534
|
+
"timeout",
|
|
3535
|
+
"tempo esgotado",
|
|
3536
|
+
"modelo especialista",
|
|
3537
|
+
)
|
|
3538
|
+
if "executor" in folded:
|
|
3539
|
+
return ("agent executor error", "executor", "erro de executor")
|
|
3540
|
+
if "authentication" in folded or "antigravity" in folded:
|
|
3541
|
+
return (
|
|
3542
|
+
"not logged into antigravity",
|
|
3543
|
+
"authentication timed out",
|
|
3544
|
+
"auth timed out",
|
|
3545
|
+
"antigravity",
|
|
3546
|
+
"autenticacao",
|
|
3547
|
+
"autenticação",
|
|
3548
|
+
)
|
|
3549
|
+
return (label,)
|
|
3550
|
+
|
|
3551
|
+
|
|
3552
|
+
def _content_quality_audit_findings(
|
|
3553
|
+
payload: JsonObject,
|
|
3554
|
+
folded_final_text: str,
|
|
3555
|
+
*,
|
|
3556
|
+
final_report_incomplete: bool = False,
|
|
3557
|
+
) -> list[AgentRunReportFinding]:
|
|
3558
|
+
if final_report_incomplete:
|
|
3559
|
+
return []
|
|
3560
|
+
batch = _specialist_runtime_batch_from_agent_directive(payload)
|
|
3561
|
+
report_contract = batch.report_contract
|
|
3562
|
+
if "content_quality_audit" not in set(report_contract.must_include):
|
|
3563
|
+
return []
|
|
3564
|
+
if _mentions_content_quality_audit(folded_final_text):
|
|
3565
|
+
return []
|
|
3566
|
+
if _mentions_content_quality_audit_not_applicable(folded_final_text):
|
|
3567
|
+
return []
|
|
3568
|
+
return [
|
|
3569
|
+
AgentRunReportFinding(
|
|
3570
|
+
code=AgentRunReportFindingCode.CONTENT_QUALITY_AUDIT_OMITTED,
|
|
3571
|
+
severity="high",
|
|
3572
|
+
source="workflow_payload",
|
|
3573
|
+
source_field="agent_directive.control.effects[].payload.report_contract.must_include",
|
|
3574
|
+
expected="auditoria de conteúdo/qualidade antes-depois das notas reescritas",
|
|
3575
|
+
actual="omitted",
|
|
3576
|
+
message="O relatório final omitiu a auditoria de conteúdo exigida para notas reescritas.",
|
|
3577
|
+
next_action=(
|
|
3578
|
+
"Reescrever o relatório final com auditoria antes/depois por nota: preservação de YAML/proveniência/links, "
|
|
3579
|
+
"qualidade clínica/didática e classificação resolvida/parcial/não resolvida/piorou."
|
|
3580
|
+
),
|
|
3581
|
+
)
|
|
3582
|
+
]
|
|
3583
|
+
|
|
3584
|
+
|
|
3585
|
+
def _mentions_content_quality_audit(folded_text: str) -> bool:
|
|
3586
|
+
has_audit = any(
|
|
3587
|
+
marker in folded_text
|
|
3588
|
+
for marker in ("auditoria de conteudo", "auditoria de qualidade", "content quality audit")
|
|
3589
|
+
)
|
|
3590
|
+
has_before_after = any(
|
|
3591
|
+
marker in folded_text
|
|
3592
|
+
for marker in ("antes/depois", "antes e depois", "before/after")
|
|
3593
|
+
)
|
|
3594
|
+
has_quality = any(
|
|
3595
|
+
marker in folded_text
|
|
3596
|
+
for marker in ("qualidade clinica", "qualidade de conteudo", "bug de conteudo", "bug de ux")
|
|
3597
|
+
)
|
|
3598
|
+
has_outcome_classification = any(
|
|
3599
|
+
marker in folded_text
|
|
3600
|
+
for marker in ("resolvid", "parcial", "nao resolvid", "não resolvid", "piorou")
|
|
3601
|
+
)
|
|
3602
|
+
return has_audit and has_before_after and has_quality and has_outcome_classification
|
|
3603
|
+
|
|
3604
|
+
|
|
3605
|
+
def _mentions_content_quality_audit_not_applicable(folded_text: str) -> bool:
|
|
3606
|
+
has_specialist_block = any(
|
|
3607
|
+
marker in folded_text
|
|
3608
|
+
for marker in (
|
|
3609
|
+
"specialist_model_quota_exhausted",
|
|
3610
|
+
"cota do modelo",
|
|
3611
|
+
"cota de uso do modelo",
|
|
3612
|
+
"cota no modelo",
|
|
3613
|
+
"quota do modelo",
|
|
3614
|
+
"capacidade do modelo",
|
|
3615
|
+
"capacidade externa do modelo",
|
|
3616
|
+
"limitacoes temporarias de cota",
|
|
3617
|
+
"limitações temporárias de cota",
|
|
3618
|
+
"modelo especialista",
|
|
3619
|
+
"modelo medico",
|
|
3620
|
+
"modelo médico",
|
|
3621
|
+
"modelo especializado",
|
|
3622
|
+
"modelo medico especializado",
|
|
3623
|
+
"modelo médico especializado",
|
|
3624
|
+
"modelo de ia especializado",
|
|
3625
|
+
"bloqueio imediato do modelo",
|
|
3626
|
+
"reescrita medica especializada",
|
|
3627
|
+
"reescrita médica especializada",
|
|
3628
|
+
"conteudo gerado",
|
|
3629
|
+
"conteúdo gerado",
|
|
3630
|
+
"criterios de estilo",
|
|
3631
|
+
"critérios de estilo",
|
|
3632
|
+
"visual didatico pendente",
|
|
3633
|
+
"visual didático pendente",
|
|
3634
|
+
)
|
|
3635
|
+
)
|
|
3636
|
+
has_rewrite_context = any(marker in folded_text for marker in ("reescrita", "rewrite", "conteudo clinico"))
|
|
3637
|
+
has_no_applied_output = any(
|
|
3638
|
+
marker in folded_text
|
|
3639
|
+
for marker in (
|
|
3640
|
+
"bloquead",
|
|
3641
|
+
"bloqueio",
|
|
3642
|
+
"interrompid",
|
|
3643
|
+
"nao foi aplicad",
|
|
3644
|
+
"não foi aplicad",
|
|
3645
|
+
"nenhuma nota",
|
|
3646
|
+
"nao avaliad",
|
|
3647
|
+
"não avaliad",
|
|
3648
|
+
"pendente",
|
|
3649
|
+
)
|
|
3650
|
+
)
|
|
3651
|
+
return has_specialist_block and has_rewrite_context and has_no_applied_output
|
|
3652
|
+
|
|
3653
|
+
|
|
3654
|
+
def _api_accounting_findings(payload: JsonObject, folded_final_text: str) -> list[AgentRunReportFinding]:
|
|
3655
|
+
headless = _AgentReportHeadlessExportFields.model_validate(
|
|
3656
|
+
_field_payload(
|
|
3657
|
+
_object_field(
|
|
3658
|
+
_object_field(_object_field(payload, "diagnostic_context"), "related_notes_export_recovery"),
|
|
3659
|
+
"headless_export",
|
|
3660
|
+
),
|
|
3661
|
+
("embedded_count",),
|
|
3662
|
+
)
|
|
3663
|
+
)
|
|
3664
|
+
embedded_count = _as_int(headless.embedded_count)
|
|
3665
|
+
if embedded_count <= 0:
|
|
3666
|
+
return []
|
|
3667
|
+
denies_api_work = bool(
|
|
3668
|
+
re.search(
|
|
3669
|
+
r"(api_calls\s*[:=]\s*0|0\s+chamadas?\s+(?:a|à|ao|de)?\s*api|"
|
|
3670
|
+
r"n[aã]o\s+houve\s+chamadas?|sem\s+chamadas?|no\s+api)",
|
|
3671
|
+
folded_final_text,
|
|
3672
|
+
)
|
|
3673
|
+
)
|
|
3674
|
+
if not denies_api_work:
|
|
3675
|
+
return []
|
|
3676
|
+
return [
|
|
3677
|
+
AgentRunReportFinding(
|
|
3678
|
+
code=AgentRunReportFindingCode.API_ACCOUNTING_MISMATCH,
|
|
3679
|
+
severity="medium",
|
|
3680
|
+
source="workflow_payload",
|
|
3681
|
+
source_field="diagnostic_context.related_notes_export_recovery.headless_export.embedded_count",
|
|
3682
|
+
expected="relatório deve reconciliar embedded_count antes de afirmar zero chamadas de API",
|
|
3683
|
+
actual=f"embedded_count={embedded_count}",
|
|
3684
|
+
message="O relatório final afirmou zero trabalho de API apesar de o payload indicar embeddings gerados.",
|
|
3685
|
+
next_action="Explicar a diferença entre api_calls do workflow e embedded_count do export, ou corrigir os contadores.",
|
|
3686
|
+
)
|
|
3687
|
+
]
|
|
3688
|
+
|
|
3689
|
+
|
|
3690
|
+
def _omitted_operational_warning_findings(
|
|
3691
|
+
diagnostic: JsonObject,
|
|
3692
|
+
folded_final_text: str,
|
|
3693
|
+
) -> list[AgentRunReportFinding]:
|
|
3694
|
+
warnings = _collect_graph_warnings(diagnostic)
|
|
3695
|
+
codes = {str(warning.get("code") or "") for warning in warnings if isinstance(warning, dict)}
|
|
3696
|
+
if "catalog_missing" not in codes or "catalog" in folded_final_text:
|
|
3697
|
+
return []
|
|
3698
|
+
return [
|
|
3699
|
+
AgentRunReportFinding(
|
|
3700
|
+
code=AgentRunReportFindingCode.OPERATIONAL_WARNING_OMITTED,
|
|
3701
|
+
severity="medium",
|
|
3702
|
+
source="workflow_payload",
|
|
3703
|
+
source_field="diagnostic_context.graph_audit_final.warnings",
|
|
3704
|
+
expected="warning catalog_missing deve aparecer no relatório de experimento",
|
|
3705
|
+
actual="omitted",
|
|
3706
|
+
message="O relatório final omitiu warning operacional catalog_missing.",
|
|
3707
|
+
next_action="Reportar o warning e decidir se CATALOGO_WIKI.json é legado ou artefato ainda obrigatório.",
|
|
3708
|
+
)
|
|
3709
|
+
]
|
|
3710
|
+
|
|
3711
|
+
|
|
3712
|
+
def _collect_agent_events(value: object) -> list[JsonObject]:
|
|
3713
|
+
events: list[JsonObject] = []
|
|
3714
|
+
seen: set[tuple[str, str, str]] = set()
|
|
3715
|
+
|
|
3716
|
+
def visit(item: object) -> None:
|
|
3717
|
+
if isinstance(item, list):
|
|
3718
|
+
for child in item:
|
|
3719
|
+
visit(child)
|
|
3720
|
+
return
|
|
3721
|
+
if not isinstance(item, dict):
|
|
3722
|
+
return
|
|
3723
|
+
agent_events = item.get("agent_events")
|
|
3724
|
+
if isinstance(agent_events, list):
|
|
3725
|
+
for event in agent_events:
|
|
3726
|
+
if isinstance(event, dict):
|
|
3727
|
+
event_payload = _json_object(event)
|
|
3728
|
+
key = (
|
|
3729
|
+
_optional_text(event_payload, "code"),
|
|
3730
|
+
_optional_text(event_payload, "type"),
|
|
3731
|
+
_optional_text(event_payload, "phase"),
|
|
3732
|
+
)
|
|
3733
|
+
if key in seen:
|
|
3734
|
+
continue
|
|
3735
|
+
seen.add(key)
|
|
3736
|
+
events.append(event_payload)
|
|
3737
|
+
for child in item.values():
|
|
3738
|
+
if isinstance(child, (dict, list)):
|
|
3739
|
+
visit(child)
|
|
3740
|
+
|
|
3741
|
+
visit(value)
|
|
3742
|
+
return events
|
|
3743
|
+
|
|
3744
|
+
|
|
3745
|
+
def _collect_graph_warnings(diagnostic: JsonObject) -> list[JsonObject]:
|
|
3746
|
+
graph = _object_field(diagnostic, "graph_audit_final")
|
|
3747
|
+
warnings = graph.get("warnings")
|
|
3748
|
+
if not isinstance(warnings, list):
|
|
3749
|
+
return []
|
|
3750
|
+
return [_json_object(warning) for warning in warnings if isinstance(warning, dict)]
|
|
3751
|
+
|
|
3752
|
+
|
|
3753
|
+
def _folded_contains_any(folded_text: str, candidates: Iterable[object]) -> bool:
|
|
3754
|
+
for candidate in candidates:
|
|
3755
|
+
text = _fold_text(str(candidate or ""))
|
|
3756
|
+
if text.strip() and text.strip() in folded_text:
|
|
3757
|
+
return True
|
|
3758
|
+
return False
|
|
3759
|
+
|
|
3760
|
+
|
|
3761
|
+
def _as_int(value: object) -> int:
|
|
3762
|
+
return _safe_positive_int(value)
|
|
3763
|
+
|
|
3764
|
+
|
|
3765
|
+
def _primary_objective_omission_findings(
|
|
3766
|
+
final_text: str,
|
|
3767
|
+
objective: PrimaryObjectiveSummary,
|
|
3768
|
+
) -> list[AgentRunReportFinding]:
|
|
3769
|
+
if isinstance(objective, ProcessChatsPrimaryObjectiveSummary):
|
|
3770
|
+
return _process_chats_primary_objective_omission_findings(final_text, objective)
|
|
3771
|
+
if isinstance(objective, WorkflowPrimaryObjectiveSummary):
|
|
3772
|
+
return _generic_primary_objective_omission_findings(final_text, objective)
|
|
3773
|
+
|
|
3774
|
+
checks = (
|
|
3775
|
+
("primary_objective.wiki_fixed", _mentions_wiki_outcome(final_text), objective.wiki_summary),
|
|
3776
|
+
("primary_objective.mutation_summary", _mentions_mutation_outcome(final_text, objective), objective.mutation_summary),
|
|
3777
|
+
("primary_objective.graph_summary", _mentions_graph_outcome(final_text, objective), objective.graph_summary),
|
|
3778
|
+
(
|
|
3779
|
+
"primary_objective.related_notes_summary",
|
|
3780
|
+
_mentions_related_notes_outcome(final_text, objective),
|
|
3781
|
+
objective.related_notes_summary,
|
|
3782
|
+
),
|
|
3783
|
+
)
|
|
3784
|
+
findings: list[AgentRunReportFinding] = []
|
|
3785
|
+
for source_field, present, expected_summary in checks:
|
|
3786
|
+
if present:
|
|
3787
|
+
continue
|
|
3788
|
+
findings.append(
|
|
3789
|
+
AgentRunReportFinding(
|
|
3790
|
+
code=AgentRunReportFindingCode.PRIMARY_OBJECTIVE_OMITTED,
|
|
3791
|
+
severity="high",
|
|
3792
|
+
source="final_report",
|
|
3793
|
+
source_field=source_field,
|
|
3794
|
+
expected=expected_summary,
|
|
3795
|
+
actual="omitted",
|
|
3796
|
+
message="O relatório final não respondeu uma pergunta obrigatória do objetivo primário do fix-wiki.",
|
|
3797
|
+
next_action=(
|
|
3798
|
+
"Reescrever o relatório final respondendo: fixou a Wiki, o que mutou, "
|
|
3799
|
+
"se o grafo melhorou e se Notas Relacionadas foi atualizado ou ficou pendente."
|
|
3800
|
+
),
|
|
3801
|
+
)
|
|
3802
|
+
)
|
|
3803
|
+
return findings
|
|
3804
|
+
|
|
3805
|
+
|
|
3806
|
+
def _primary_objective_success_claim_findings(
|
|
3807
|
+
final_text: str,
|
|
3808
|
+
objective: PrimaryObjectiveSummary,
|
|
3809
|
+
) -> list[AgentRunReportFinding]:
|
|
3810
|
+
if isinstance(objective, WorkflowPrimaryObjectiveSummary):
|
|
3811
|
+
if objective.completed or not _has_positive_success_claim(final_text):
|
|
3812
|
+
return []
|
|
3813
|
+
return [
|
|
3814
|
+
AgentRunReportFinding(
|
|
3815
|
+
code=AgentRunReportFindingCode.SUCCESS_CLAIM_MISMATCH,
|
|
3816
|
+
severity="medium",
|
|
3817
|
+
source="final_report",
|
|
3818
|
+
source_field="primary_objective.completed",
|
|
3819
|
+
expected="completed=false",
|
|
3820
|
+
actual="success_claim",
|
|
3821
|
+
message="O relatório final declarou sucesso para um objetivo primário que a FSM ainda não concluiu.",
|
|
3822
|
+
next_action="Trocar sucesso simples por prévia, espera, bloqueio ou etapa pendente conforme primary_objective_summary.",
|
|
3823
|
+
)
|
|
3824
|
+
]
|
|
3825
|
+
if not isinstance(objective, ProcessChatsPrimaryObjectiveSummary):
|
|
3826
|
+
return []
|
|
3827
|
+
if objective.process_status != "completed_with_link_blockers":
|
|
3828
|
+
return []
|
|
3829
|
+
if not _has_positive_success_claim(final_text):
|
|
3830
|
+
return []
|
|
3831
|
+
return [
|
|
3832
|
+
AgentRunReportFinding(
|
|
3833
|
+
code=AgentRunReportFindingCode.SUCCESS_CLAIM_MISMATCH,
|
|
3834
|
+
severity="medium",
|
|
3835
|
+
source="final_report",
|
|
3836
|
+
source_field="primary_objective.process_status",
|
|
3837
|
+
expected=objective.process_status,
|
|
3838
|
+
actual="success_claim",
|
|
3839
|
+
message="O relatório final usou linguagem de sucesso para process-chats com linker/grafo pendente.",
|
|
3840
|
+
next_action="Trocar sucesso simples por publicação concluída com pendência explícita de linker/grafo.",
|
|
3841
|
+
)
|
|
3842
|
+
]
|
|
3843
|
+
|
|
3844
|
+
|
|
3845
|
+
def _generic_primary_objective_omission_findings(
|
|
3846
|
+
final_text: str,
|
|
3847
|
+
objective: WorkflowPrimaryObjectiveSummary,
|
|
3848
|
+
) -> list[AgentRunReportFinding]:
|
|
3849
|
+
folded = _fold_text(final_text)
|
|
3850
|
+
checks = (
|
|
3851
|
+
("primary_objective.objective_status", _mentions_generic_objective_status(folded, objective), objective.status),
|
|
3852
|
+
(
|
|
3853
|
+
"primary_objective.mutation_summary",
|
|
3854
|
+
_mentions_summary_fragment(folded, objective.mutation_summary),
|
|
3855
|
+
objective.mutation_summary,
|
|
3856
|
+
),
|
|
3857
|
+
(
|
|
3858
|
+
"primary_objective.remaining_work_summary",
|
|
3859
|
+
_mentions_summary_fragment(folded, objective.remaining_work_summary),
|
|
3860
|
+
objective.remaining_work_summary,
|
|
3861
|
+
),
|
|
3862
|
+
(
|
|
3863
|
+
"primary_objective.next_step_summary",
|
|
3864
|
+
_mentions_summary_fragment(folded, objective.next_step_summary),
|
|
3865
|
+
objective.next_step_summary,
|
|
3866
|
+
),
|
|
3867
|
+
)
|
|
3868
|
+
findings: list[AgentRunReportFinding] = []
|
|
3869
|
+
for source_field, present, expected_summary in checks:
|
|
3870
|
+
if present:
|
|
3871
|
+
continue
|
|
3872
|
+
findings.append(
|
|
3873
|
+
AgentRunReportFinding(
|
|
3874
|
+
code=AgentRunReportFindingCode.PRIMARY_OBJECTIVE_OMITTED,
|
|
3875
|
+
severity="high",
|
|
3876
|
+
source="final_report",
|
|
3877
|
+
source_field=source_field,
|
|
3878
|
+
expected=expected_summary,
|
|
3879
|
+
actual="omitted",
|
|
3880
|
+
message="O relatório final não respondeu uma pergunta obrigatória do objetivo primário do workflow.",
|
|
3881
|
+
next_action="Reescrever o relatório final usando reports.details.primary_objective_summary.",
|
|
3882
|
+
)
|
|
3883
|
+
)
|
|
3884
|
+
return findings
|
|
3885
|
+
|
|
3886
|
+
|
|
3887
|
+
def _mentions_generic_objective_status(
|
|
3888
|
+
folded_text: str,
|
|
3889
|
+
objective: WorkflowPrimaryObjectiveSummary,
|
|
3890
|
+
) -> bool:
|
|
3891
|
+
if objective.status in folded_text:
|
|
3892
|
+
return True
|
|
3893
|
+
answer = _generic_public_objective_answer(objective)
|
|
3894
|
+
markers = NON_SUCCESS_HUMAN_STATUS_MARKERS.get(answer, ())
|
|
3895
|
+
return _folded_contains_any(folded_text, markers)
|
|
3896
|
+
|
|
3897
|
+
|
|
3898
|
+
def _mentions_summary_fragment(folded_text: str, summary: str) -> bool:
|
|
3899
|
+
words = [word for word in _fold_text(summary).split() if len(word) >= 5]
|
|
3900
|
+
if not words:
|
|
3901
|
+
return False
|
|
3902
|
+
return sum(1 for word in words[:8] if word in folded_text) >= min(2, len(words))
|
|
3903
|
+
|
|
3904
|
+
|
|
3905
|
+
def _process_chats_primary_objective_omission_findings(
|
|
3906
|
+
final_text: str,
|
|
3907
|
+
objective: ProcessChatsPrimaryObjectiveSummary,
|
|
3908
|
+
) -> list[AgentRunReportFinding]:
|
|
3909
|
+
checks = (
|
|
3910
|
+
(
|
|
3911
|
+
"primary_objective.process_status",
|
|
3912
|
+
_mentions_process_chats_status(final_text, objective),
|
|
3913
|
+
objective.process_summary,
|
|
3914
|
+
),
|
|
3915
|
+
("primary_objective.raw_summary", _mentions_process_chats_raw(final_text, objective), objective.raw_summary),
|
|
3916
|
+
(
|
|
3917
|
+
"primary_objective.wiki_write_summary",
|
|
3918
|
+
_mentions_process_chats_wiki_write(final_text, objective),
|
|
3919
|
+
objective.wiki_write_summary,
|
|
3920
|
+
),
|
|
3921
|
+
(
|
|
3922
|
+
"primary_objective.coverage_summary",
|
|
3923
|
+
_mentions_process_chats_coverage(final_text),
|
|
3924
|
+
objective.coverage_summary,
|
|
3925
|
+
),
|
|
3926
|
+
(
|
|
3927
|
+
"primary_objective.linker_summary",
|
|
3928
|
+
_mentions_process_chats_linker(final_text, objective),
|
|
3929
|
+
objective.linker_summary,
|
|
3930
|
+
),
|
|
3931
|
+
)
|
|
3932
|
+
findings: list[AgentRunReportFinding] = []
|
|
3933
|
+
for source_field, present, expected_summary in checks:
|
|
3934
|
+
if present:
|
|
3935
|
+
continue
|
|
3936
|
+
findings.append(
|
|
3937
|
+
AgentRunReportFinding(
|
|
3938
|
+
code=AgentRunReportFindingCode.PRIMARY_OBJECTIVE_OMITTED,
|
|
3939
|
+
severity="high",
|
|
3940
|
+
source="final_report",
|
|
3941
|
+
source_field=source_field,
|
|
3942
|
+
expected=expected_summary,
|
|
3943
|
+
actual="omitted",
|
|
3944
|
+
message="O relatório final não respondeu uma pergunta obrigatória do objetivo primário do process-chats.",
|
|
3945
|
+
next_action=(
|
|
3946
|
+
"Reescrever o relatório final respondendo: se publicou ou só preparou prévia, "
|
|
3947
|
+
"quais raws foram cobertos/processados, o que foi escrito na Wiki, "
|
|
3948
|
+
"se coverage/manifest bateram e qual foi o estado do linker/grafo."
|
|
3949
|
+
),
|
|
3950
|
+
)
|
|
3951
|
+
)
|
|
3952
|
+
return findings
|
|
3953
|
+
|
|
3954
|
+
|
|
3955
|
+
def _mentions_wiki_outcome(final_text: str) -> bool:
|
|
3956
|
+
folded = _fold_text(final_text)
|
|
3957
|
+
return "wiki" in folded and any(marker in folded for marker in ("fixou", "corrig", "parcial", "pendente", "nao"))
|
|
3958
|
+
|
|
3959
|
+
|
|
3960
|
+
def _mentions_mutation_outcome(final_text: str, objective: FixWikiPrimaryObjectiveSummary) -> bool:
|
|
3961
|
+
folded = _fold_text(final_text)
|
|
3962
|
+
if not any(marker in folded for marker in ("mutacao", "alterad", "modificad", "grav", "mudanca", "mudancas")):
|
|
3963
|
+
return False
|
|
3964
|
+
if objective.mutation_count == 0:
|
|
3965
|
+
return any(marker in folded for marker in (" 0 ", ": 0", "0 arquivo", "nenhum", "nada"))
|
|
3966
|
+
if str(objective.mutation_count) not in folded:
|
|
3967
|
+
return False
|
|
3968
|
+
if objective.written_count and objective.written_count != objective.mutation_count:
|
|
3969
|
+
return str(objective.written_count) in folded and any(
|
|
3970
|
+
marker in folded for marker in ("grav", "salv", "escrit", "workflow")
|
|
3971
|
+
)
|
|
3972
|
+
return True
|
|
3973
|
+
|
|
3974
|
+
|
|
3975
|
+
def _mentions_graph_outcome(final_text: str, objective: FixWikiPrimaryObjectiveSummary) -> bool:
|
|
3976
|
+
folded = _fold_text(final_text)
|
|
3977
|
+
if "grafo" not in folded and "graph" not in folded:
|
|
3978
|
+
return False
|
|
3979
|
+
match objective.graph_status:
|
|
3980
|
+
case "clean":
|
|
3981
|
+
return any(
|
|
3982
|
+
marker in folded
|
|
3983
|
+
for marker in (
|
|
3984
|
+
"limpo",
|
|
3985
|
+
"sem bloqueio",
|
|
3986
|
+
"sem blockers",
|
|
3987
|
+
"sem erro",
|
|
3988
|
+
"sem comparacao",
|
|
3989
|
+
"sem comparação",
|
|
3990
|
+
"grafo limpo",
|
|
3991
|
+
"graph clean",
|
|
3992
|
+
"terminou sem bloqueios",
|
|
3993
|
+
"terminou sem erros",
|
|
3994
|
+
)
|
|
3995
|
+
)
|
|
3996
|
+
case "improved":
|
|
3997
|
+
return any(marker in folded for marker in ("melhor", "reduz", "corrig"))
|
|
3998
|
+
case "blocked":
|
|
3999
|
+
return any(marker in folded for marker in ("bloque", "pendente", "erro"))
|
|
4000
|
+
case "unchanged":
|
|
4001
|
+
return any(marker in folded for marker in ("nao melhorou", "não melhorou", "permaneceu", "inalter"))
|
|
4002
|
+
case "worse":
|
|
4003
|
+
return any(marker in folded for marker in ("pior", "regred"))
|
|
4004
|
+
case "unknown":
|
|
4005
|
+
return any(marker in folded for marker in ("sem comparacao", "sem comparação", "nao confirmou", "não confirmou"))
|
|
4006
|
+
|
|
4007
|
+
|
|
4008
|
+
def _mentions_related_notes_outcome(final_text: str, objective: FixWikiPrimaryObjectiveSummary) -> bool:
|
|
4009
|
+
folded = _fold_text(final_text)
|
|
4010
|
+
if "related notes" not in folded and "notas relacionadas" not in folded:
|
|
4011
|
+
return False
|
|
4012
|
+
if objective.related_notes_status == "pending" and "cota" in _fold_text(objective.related_notes_summary):
|
|
4013
|
+
return "cota" in folded or "quota" in folded
|
|
4014
|
+
if objective.related_notes_status == "updated" and any(
|
|
4015
|
+
marker in folded
|
|
4016
|
+
for marker in (
|
|
4017
|
+
"convergencia total esta pendente",
|
|
4018
|
+
"convergencia pendente",
|
|
4019
|
+
"pendente da aplicacao",
|
|
4020
|
+
"pendente de aplicacao",
|
|
4021
|
+
"ficou pendente",
|
|
4022
|
+
"estao pendentes",
|
|
4023
|
+
"está pendente",
|
|
4024
|
+
)
|
|
4025
|
+
):
|
|
4026
|
+
return False
|
|
4027
|
+
return True
|
|
4028
|
+
|
|
4029
|
+
|
|
4030
|
+
def _mentions_process_chats_status(final_text: str, objective: ProcessChatsPrimaryObjectiveSummary) -> bool:
|
|
4031
|
+
folded = _fold_text(final_text)
|
|
4032
|
+
if objective.process_status in {"preview_ready", "ready_to_publish"}:
|
|
4033
|
+
return any(marker in folded for marker in ("previa", "preview", "pronta", "ready_to_publish"))
|
|
4034
|
+
return any(marker in folded for marker in ("publicacao", "publicou", "publicad", "process-chats"))
|
|
4035
|
+
|
|
4036
|
+
|
|
4037
|
+
def _mentions_process_chats_raw(final_text: str, objective: ProcessChatsPrimaryObjectiveSummary) -> bool:
|
|
4038
|
+
folded = _fold_text(final_text)
|
|
4039
|
+
if not any(marker in folded for marker in ("raw", "chat", "chats")):
|
|
4040
|
+
return False
|
|
4041
|
+
if objective.raw_count == 0:
|
|
4042
|
+
return any(marker in folded for marker in ("0", "nenhum", "nao processad", "ainda nao"))
|
|
4043
|
+
return str(objective.raw_count) in folded
|
|
4044
|
+
|
|
4045
|
+
|
|
4046
|
+
def _mentions_process_chats_wiki_write(final_text: str, objective: ProcessChatsPrimaryObjectiveSummary) -> bool:
|
|
4047
|
+
folded = _fold_text(final_text)
|
|
4048
|
+
if "wiki" not in folded:
|
|
4049
|
+
return False
|
|
4050
|
+
if not any(marker in folded for marker in ("arquivo", "nota", "escrit", "grav", "publicad")):
|
|
4051
|
+
return False
|
|
4052
|
+
if objective.note_count == 0:
|
|
4053
|
+
return any(marker in folded for marker in ("0", "nenhum", "nada", "ainda nao"))
|
|
4054
|
+
return str(objective.note_count) in folded
|
|
4055
|
+
|
|
4056
|
+
|
|
4057
|
+
def _mentions_process_chats_coverage(final_text: str) -> bool:
|
|
4058
|
+
folded = _fold_text(final_text)
|
|
4059
|
+
return ("coverage" in folded or "cobertura" in folded) and "manifest" in folded
|
|
4060
|
+
|
|
4061
|
+
|
|
4062
|
+
def _mentions_process_chats_linker(final_text: str, objective: ProcessChatsPrimaryObjectiveSummary) -> bool:
|
|
4063
|
+
folded = _fold_text(final_text)
|
|
4064
|
+
if not any(marker in folded for marker in ("linker", "grafo", "related notes", "notas relacionadas")):
|
|
4065
|
+
return False
|
|
4066
|
+
if objective.linker_status == "blocked":
|
|
4067
|
+
return any(marker in folded for marker in ("pendente", "bloque", "blocker", "nao aplicado"))
|
|
4068
|
+
if objective.linker_status == "not_run":
|
|
4069
|
+
return any(marker in folded for marker in ("nao rodou", "ainda nao", "nao foi confirmad", "publicacao nao"))
|
|
4070
|
+
return True
|
|
4071
|
+
|
|
4072
|
+
|
|
4073
|
+
def _fold_text(text: str) -> str:
|
|
4074
|
+
normalized = unicodedata.normalize("NFKD", str(text or ""))
|
|
4075
|
+
without_marks = "".join(ch for ch in normalized if not unicodedata.combining(ch))
|
|
4076
|
+
return f" {without_marks.casefold()} "
|
|
4077
|
+
|
|
4078
|
+
|
|
4079
|
+
def _omitted_tool_error_findings(transcript: object, final_text: str) -> list[AgentRunReportFinding]:
|
|
4080
|
+
findings: list[AgentRunReportFinding] = []
|
|
4081
|
+
tool_errors = [
|
|
4082
|
+
finding
|
|
4083
|
+
for finding in validate_agent_tool_calls(transcript)
|
|
4084
|
+
if str(finding.get("code") or "") == TOOL_CALL_ERROR
|
|
4085
|
+
]
|
|
4086
|
+
if not tool_errors:
|
|
4087
|
+
return []
|
|
4088
|
+
for error in tool_errors:
|
|
4089
|
+
if _final_report_mentions_tool_error(final_text, error):
|
|
4090
|
+
continue
|
|
4091
|
+
findings.append(
|
|
4092
|
+
AgentRunReportFinding(
|
|
4093
|
+
code=AgentRunReportFindingCode.OMITTED_TOOL_ERROR,
|
|
4094
|
+
severity=_finding_severity(error.get("severity")),
|
|
4095
|
+
source="transcript",
|
|
4096
|
+
source_field="tool_error",
|
|
4097
|
+
tool_error_type=str(error.get("error_type") or ""),
|
|
4098
|
+
message="O transcript contém tool call falha que o relatório final não reportou.",
|
|
4099
|
+
next_action="Reportar explicitamente a tool call falha e seu impacto, mesmo quando um retry posterior recuperar.",
|
|
4100
|
+
evidence={
|
|
4101
|
+
"tool_type": str(error.get("tool_type") or ""),
|
|
4102
|
+
"tool_error_message": str(error.get("message") or ""),
|
|
4103
|
+
},
|
|
4104
|
+
)
|
|
4105
|
+
)
|
|
4106
|
+
return findings
|
|
4107
|
+
|
|
4108
|
+
|
|
4109
|
+
def _finding_severity(value: object) -> AgentRunReportSeverity:
|
|
4110
|
+
text = str(value or "medium").strip().lower()
|
|
4111
|
+
if text in {"low", "medium", "high", "critical"}:
|
|
4112
|
+
return cast(AgentRunReportSeverity, text)
|
|
4113
|
+
return "medium"
|
|
4114
|
+
|
|
4115
|
+
|
|
4116
|
+
def _final_report_mentions_tool_error(final_text: str, error: JsonObject) -> bool:
|
|
4117
|
+
lowered = final_text.lower()
|
|
4118
|
+
if "tool" not in lowered:
|
|
4119
|
+
return False
|
|
4120
|
+
error_type = str(error.get("error_type") or "").lower()
|
|
4121
|
+
if error_type and error_type in lowered:
|
|
4122
|
+
return True
|
|
4123
|
+
return any(marker in lowered for marker in ("erro", "falh", "failed", "invalid tool", "invalid_tool"))
|
|
4124
|
+
|
|
4125
|
+
|
|
4126
|
+
def _omitted_tool_deviation_findings(transcript: object, final_text: str) -> list[AgentRunReportFinding]:
|
|
4127
|
+
deviations, finding_codes = _transcript_tool_deviation_context(transcript)
|
|
4128
|
+
if not deviations:
|
|
4129
|
+
return []
|
|
4130
|
+
if _final_report_mentions_tool_deviations(final_text, deviations):
|
|
4131
|
+
return []
|
|
4132
|
+
no_deviation_claim = bool(final_text and NO_TOOL_DEVIATION_CLAIM_RE.search(final_text))
|
|
4133
|
+
return [
|
|
4134
|
+
AgentRunReportFinding(
|
|
4135
|
+
code=AgentRunReportFindingCode.TOOL_DEVIATION_OMITTED,
|
|
4136
|
+
severity="high",
|
|
4137
|
+
source="transcript",
|
|
4138
|
+
source_field="final_report_text",
|
|
4139
|
+
expected="relatório final deve listar probes, permissões e comandos fora do roteiro quando ocorrerem",
|
|
4140
|
+
actual="no_deviations_claim" if no_deviation_claim else ",".join(deviations),
|
|
4141
|
+
message=(
|
|
4142
|
+
"O relatório final afirmou que não houve desvios, mas o transcript contém probes ou tool calls fora do roteiro."
|
|
4143
|
+
if no_deviation_claim
|
|
4144
|
+
else "O relatório final omitiu probes ou tool calls fora do roteiro presentes no transcript."
|
|
4145
|
+
),
|
|
4146
|
+
next_action=(
|
|
4147
|
+
"Reescrever a seção de avisos de execução listando os probes/tool calls observados "
|
|
4148
|
+
"e o impacto deles no experimento."
|
|
4149
|
+
),
|
|
4150
|
+
evidence=_tool_deviation_evidence(deviations=deviations, finding_codes=finding_codes),
|
|
4151
|
+
)
|
|
4152
|
+
]
|
|
4153
|
+
|
|
4154
|
+
|
|
4155
|
+
def _tool_deviation_evidence(*, deviations: list[str], finding_codes: list[str]) -> JsonObject:
|
|
4156
|
+
evidence: JsonObject = {"tool_types": deviations}
|
|
4157
|
+
if finding_codes:
|
|
4158
|
+
evidence["finding_codes"] = finding_codes
|
|
4159
|
+
return evidence
|
|
4160
|
+
|
|
4161
|
+
|
|
4162
|
+
def _update_topic_success_claim_findings(
|
|
4163
|
+
transcript: object,
|
|
4164
|
+
truth: _WorkflowTruth,
|
|
4165
|
+
) -> list[AgentRunReportFinding]:
|
|
4166
|
+
status = truth.workflow_status or truth.progress_status or truth.receipt_status
|
|
4167
|
+
if status not in NON_SUCCESS_STATUSES:
|
|
4168
|
+
return []
|
|
4169
|
+
findings: list[AgentRunReportFinding] = []
|
|
4170
|
+
for event in _iter_transcript_events(transcript):
|
|
4171
|
+
if event.event_type.casefold() != "tool_use" or event.tool_name.casefold() != "update_topic":
|
|
4172
|
+
continue
|
|
4173
|
+
text = "\n".join(
|
|
4174
|
+
str(event.parameters.get(field) or "")
|
|
4175
|
+
for field in ("title", "summary", "strategic_intent")
|
|
4176
|
+
)
|
|
4177
|
+
if not _has_positive_success_claim(text):
|
|
4178
|
+
continue
|
|
4179
|
+
if _update_topic_acknowledges_partial_workflow(text):
|
|
4180
|
+
continue
|
|
4181
|
+
findings.append(
|
|
4182
|
+
AgentRunReportFinding(
|
|
4183
|
+
code=AgentRunReportFindingCode.SUCCESS_CLAIM_MISMATCH,
|
|
4184
|
+
severity="medium",
|
|
4185
|
+
source="transcript",
|
|
4186
|
+
source_field="transcript.tool_use.update_topic",
|
|
4187
|
+
expected=f"update_topic deve comunicar estado parcial/pendente quando workflow_status={status}",
|
|
4188
|
+
actual="success_claim",
|
|
4189
|
+
message="O update_topic usou linguagem de sucesso apesar de o workflow ainda estar parcial ou bloqueado.",
|
|
4190
|
+
next_action=(
|
|
4191
|
+
"Atualizar a comunicação pública para dizer o que foi aplicado e o que ainda falta, "
|
|
4192
|
+
"sem chamar o workflow parcial de sucesso."
|
|
4193
|
+
),
|
|
4194
|
+
evidence={"workflow_status": status, "text": text},
|
|
4195
|
+
)
|
|
4196
|
+
)
|
|
4197
|
+
return findings
|
|
4198
|
+
|
|
4199
|
+
|
|
4200
|
+
def _update_topic_acknowledges_partial_workflow(text: str) -> bool:
|
|
4201
|
+
folded = _fold_text(text)
|
|
4202
|
+
return any(
|
|
4203
|
+
marker in folded
|
|
4204
|
+
for marker in (
|
|
4205
|
+
"parcial",
|
|
4206
|
+
"pendente",
|
|
4207
|
+
"aguard",
|
|
4208
|
+
"waiting",
|
|
4209
|
+
"bloque",
|
|
4210
|
+
"falta",
|
|
4211
|
+
"restam",
|
|
4212
|
+
"nao conclu",
|
|
4213
|
+
"não conclu",
|
|
4214
|
+
"nao fixou",
|
|
4215
|
+
"não fixou",
|
|
4216
|
+
)
|
|
4217
|
+
)
|
|
4218
|
+
|
|
4219
|
+
|
|
4220
|
+
def _transcript_tool_deviation_context(transcript: object) -> tuple[list[str], list[str]]:
|
|
4221
|
+
probe_types: list[str] = []
|
|
4222
|
+
finding_codes: list[str] = []
|
|
4223
|
+
|
|
4224
|
+
def visit(value: object) -> None:
|
|
4225
|
+
if isinstance(value, list):
|
|
4226
|
+
for item in value:
|
|
4227
|
+
visit(item)
|
|
4228
|
+
return
|
|
4229
|
+
if not isinstance(value, dict):
|
|
4230
|
+
return
|
|
4231
|
+
event_type = str(value.get("type") or "").upper()
|
|
4232
|
+
tool_name = str(value.get("name") or value.get("tool_name") or "").strip()
|
|
4233
|
+
if event_type in {"VIEW_FILE", "LIST_DIRECTORY", "GREP_SEARCH"}:
|
|
4234
|
+
if event_type == "VIEW_FILE":
|
|
4235
|
+
if _is_expected_workflow_skill_read(value) or _is_expected_cpu_sample_read(value):
|
|
4236
|
+
return
|
|
4237
|
+
if _is_agy_background_task_log_read(value):
|
|
4238
|
+
_append_unique(probe_types, "AGY_BACKGROUND_TASK_LOG")
|
|
4239
|
+
return
|
|
4240
|
+
_append_unique(probe_types, event_type)
|
|
4241
|
+
if event_type == "GENERIC" and tool_name == "list_permissions":
|
|
4242
|
+
_append_unique(probe_types, "GENERIC:list_permissions")
|
|
4243
|
+
for child in _transcript_child_containers(value):
|
|
4244
|
+
visit(child)
|
|
4245
|
+
|
|
4246
|
+
visit(transcript)
|
|
4247
|
+
for finding in validate_agent_tool_calls(transcript):
|
|
4248
|
+
code = str(finding.get("code") or "")
|
|
4249
|
+
if not code or code == TOOL_CALL_ERROR:
|
|
4250
|
+
continue
|
|
4251
|
+
if code == PUBLIC_TOOL_TEXT_CONTRACT_VIOLATION:
|
|
4252
|
+
continue
|
|
4253
|
+
_append_unique(finding_codes, code)
|
|
4254
|
+
tool_name = str(finding.get("tool_name") or code)
|
|
4255
|
+
_append_unique(probe_types, tool_name)
|
|
4256
|
+
return probe_types, finding_codes
|
|
4257
|
+
|
|
4258
|
+
|
|
4259
|
+
def _is_expected_workflow_skill_read(event: JsonObject) -> bool:
|
|
4260
|
+
normalized = _transcript_event_file_path(event).replace("\\", "/")
|
|
4261
|
+
if not normalized.endswith("/SKILL.md"):
|
|
4262
|
+
return False
|
|
4263
|
+
return any(
|
|
4264
|
+
marker in normalized
|
|
4265
|
+
for marker in (
|
|
4266
|
+
"/mednotes-fix-wiki/SKILL.md",
|
|
4267
|
+
"/fix-medical-wiki/SKILL.md",
|
|
4268
|
+
"/obsidian-ops/SKILL.md",
|
|
4269
|
+
f"/{SKILLS_RELPATH}/fix-medical-wiki/SKILL.md",
|
|
4270
|
+
)
|
|
4271
|
+
)
|
|
4272
|
+
|
|
4273
|
+
|
|
4274
|
+
def _is_expected_cpu_sample_read(event: JsonObject) -> bool:
|
|
4275
|
+
normalized = _transcript_event_file_path(event).replace("\\", "/")
|
|
4276
|
+
return normalized.endswith("/cpu-samples.jsonl")
|
|
4277
|
+
|
|
4278
|
+
|
|
4279
|
+
def _is_agy_background_task_log_read(event: JsonObject) -> bool:
|
|
4280
|
+
normalized = _transcript_event_file_path(event).replace("\\", "/")
|
|
4281
|
+
return "/.gemini/antigravity-cli/brain/" in normalized and "/.system_generated/tasks/task-" in normalized and normalized.endswith(".log")
|
|
4282
|
+
|
|
4283
|
+
|
|
4284
|
+
def _transcript_event_file_path(event: JsonObject) -> str:
|
|
4285
|
+
path_from_parameters = ""
|
|
4286
|
+
parameters = event.get("parameters")
|
|
4287
|
+
if isinstance(parameters, dict):
|
|
4288
|
+
args = parameters.get("args")
|
|
4289
|
+
if isinstance(args, dict):
|
|
4290
|
+
path_from_parameters = str(args.get("path") or args.get("file_path") or "")
|
|
4291
|
+
path_from_content = _tool_content_file_path(str(event.get("content") or ""))
|
|
4292
|
+
return str(
|
|
4293
|
+
event.get("path")
|
|
4294
|
+
or event.get("file_path")
|
|
4295
|
+
or path_from_parameters
|
|
4296
|
+
or path_from_content
|
|
4297
|
+
or ""
|
|
4298
|
+
)
|
|
4299
|
+
|
|
4300
|
+
|
|
4301
|
+
def _tool_content_file_path(content: str) -> str:
|
|
4302
|
+
match = TOOL_CONTENT_FILE_PATH_RE.search(content)
|
|
4303
|
+
if match is None:
|
|
4304
|
+
return ""
|
|
4305
|
+
return unquote(match.group("path"))
|
|
4306
|
+
|
|
4307
|
+
|
|
4308
|
+
def _final_report_mentions_tool_deviations(final_text: str, deviations: list[str]) -> bool:
|
|
4309
|
+
folded = _fold_text(final_text)
|
|
4310
|
+
if not folded:
|
|
4311
|
+
return False
|
|
4312
|
+
for deviation in deviations:
|
|
4313
|
+
token = _fold_text(deviation)
|
|
4314
|
+
if token in folded:
|
|
4315
|
+
continue
|
|
4316
|
+
if deviation == "VIEW_FILE" and any(marker in folded for marker in ("view_file", "leu skill", "leitura de skill", "read file")):
|
|
4317
|
+
continue
|
|
4318
|
+
if deviation == "AGY_BACKGROUND_TASK_LOG" and any(
|
|
4319
|
+
marker in folded
|
|
4320
|
+
for marker in (
|
|
4321
|
+
"agy background fallback",
|
|
4322
|
+
"task log",
|
|
4323
|
+
"background task",
|
|
4324
|
+
"fallback de background",
|
|
4325
|
+
"log indicado pela ferramenta",
|
|
4326
|
+
"log indicado pela propria ferramenta",
|
|
4327
|
+
"log indicado pela própria ferramenta",
|
|
4328
|
+
"registro indicado pela ferramenta",
|
|
4329
|
+
"registro da ferramenta",
|
|
4330
|
+
"execucao em segundo plano",
|
|
4331
|
+
"execução em segundo plano",
|
|
4332
|
+
"segundo plano",
|
|
4333
|
+
"registro temporario de progresso",
|
|
4334
|
+
"registro temporário de progresso",
|
|
4335
|
+
)
|
|
4336
|
+
):
|
|
4337
|
+
continue
|
|
4338
|
+
if deviation == "LIST_DIRECTORY" and any(marker in folded for marker in ("list_directory", "listou diretorio", "listagem de diretorio")):
|
|
4339
|
+
continue
|
|
4340
|
+
if deviation == "GREP_SEARCH" and any(marker in folded for marker in ("grep_search", "grep", "busca textual")):
|
|
4341
|
+
continue
|
|
4342
|
+
return False
|
|
4343
|
+
return True
|
|
4344
|
+
|
|
4345
|
+
|
|
4346
|
+
def _append_unique(values: list[str], value: str) -> None:
|
|
4347
|
+
if value not in values:
|
|
4348
|
+
values.append(value)
|
|
4349
|
+
|
|
4350
|
+
|
|
4351
|
+
def _final_report_local_path_leak_findings(final_text: str) -> list[AgentRunReportFinding]:
|
|
4352
|
+
findings: list[AgentRunReportFinding] = []
|
|
4353
|
+
for path in _reported_absolute_paths(final_text):
|
|
4354
|
+
if not _looks_like_local_path_leak(path):
|
|
4355
|
+
continue
|
|
4356
|
+
findings.append(
|
|
4357
|
+
AgentRunReportFinding(
|
|
4358
|
+
code=AgentRunReportFindingCode.FINAL_REPORT_LOCAL_PATH_LEAK,
|
|
4359
|
+
severity="medium",
|
|
4360
|
+
source="final_report",
|
|
4361
|
+
source_field="final_report_text",
|
|
4362
|
+
path=path,
|
|
4363
|
+
artifact_name=path.replace("\\", "/").rsplit("/", 1)[-1],
|
|
4364
|
+
expected="resposta pública sem links file:// nem caminhos locais absolutos",
|
|
4365
|
+
actual=path,
|
|
4366
|
+
message="O relatório final expôs um caminho local da máquina no texto público.",
|
|
4367
|
+
next_action=(
|
|
4368
|
+
"Trocar o caminho local por uma descrição humana do item afetado ou por referência técnica "
|
|
4369
|
+
"apenas no log/JSON do experimento."
|
|
4370
|
+
),
|
|
4371
|
+
evidence={"path": path},
|
|
4372
|
+
)
|
|
4373
|
+
)
|
|
4374
|
+
return findings
|
|
4375
|
+
|
|
4376
|
+
|
|
4377
|
+
def _looks_like_local_path_leak(path: str) -> bool:
|
|
4378
|
+
normalized = path.replace("\\", "/")
|
|
4379
|
+
if normalized.startswith(("/mednotes:", "/flashcards")):
|
|
4380
|
+
return False
|
|
4381
|
+
return normalized.startswith(("/Users/", "/tmp/", "/private/tmp/", "/private/var/"))
|
|
4382
|
+
|
|
4383
|
+
|
|
4384
|
+
def _invalid_reported_artifact_path_findings(final_text: str) -> list[AgentRunReportFinding]:
|
|
4385
|
+
findings: list[AgentRunReportFinding] = []
|
|
4386
|
+
for path in _reported_absolute_paths(final_text):
|
|
4387
|
+
if not _looks_like_reported_artifact_path(path):
|
|
4388
|
+
continue
|
|
4389
|
+
if Path(path).exists():
|
|
4390
|
+
continue
|
|
4391
|
+
findings.append(
|
|
4392
|
+
AgentRunReportFinding(
|
|
4393
|
+
code=AgentRunReportFindingCode.REPORTED_ARTIFACT_PATH_INVALID,
|
|
4394
|
+
severity="medium",
|
|
4395
|
+
source="filesystem",
|
|
4396
|
+
source_field="final_report_text",
|
|
4397
|
+
path=path,
|
|
4398
|
+
artifact_name=path.replace("\\", "/").rsplit("/", 1)[-1],
|
|
4399
|
+
message="O relatório final citou caminho de artefato ou backup que não existe no filesystem.",
|
|
4400
|
+
next_action="Remover o caminho inventado ou substituir pelo caminho oficial existente antes de concluir a rodada.",
|
|
4401
|
+
)
|
|
4402
|
+
)
|
|
4403
|
+
return findings
|
|
4404
|
+
|
|
4405
|
+
|
|
4406
|
+
def _reported_absolute_paths(final_text: str) -> list[str]:
|
|
4407
|
+
paths: list[str] = []
|
|
4408
|
+
for pattern in (BACKTICK_ABSOLUTE_PATH_RE, FILE_URI_RE, PLAIN_ABSOLUTE_PATH_RE):
|
|
4409
|
+
for match in pattern.finditer(final_text):
|
|
4410
|
+
raw_path = _normalize_reported_path_candidate(unquote(match.group("path")).rstrip(".,;"))
|
|
4411
|
+
if raw_path and raw_path not in paths:
|
|
4412
|
+
paths.append(raw_path)
|
|
4413
|
+
return paths
|
|
4414
|
+
|
|
4415
|
+
|
|
4416
|
+
def _normalize_reported_path_candidate(raw_path: str) -> str:
|
|
4417
|
+
stripped = raw_path.strip()
|
|
4418
|
+
for separator in ("\n", "\r"):
|
|
4419
|
+
if separator in stripped:
|
|
4420
|
+
stripped = stripped.split(separator, 1)[0].strip()
|
|
4421
|
+
if stripped.startswith(("/mednotes:", "/flashcards")):
|
|
4422
|
+
return stripped.split(maxsplit=1)[0]
|
|
4423
|
+
if stripped.endswith(")") and not Path(stripped).exists():
|
|
4424
|
+
markdown_link_candidate = stripped[:-1]
|
|
4425
|
+
if Path(markdown_link_candidate).exists() or _looks_like_reported_artifact_path(markdown_link_candidate):
|
|
4426
|
+
stripped = markdown_link_candidate
|
|
4427
|
+
return stripped
|
|
4428
|
+
|
|
4429
|
+
|
|
4430
|
+
def _looks_like_reported_artifact_path(path: str) -> bool:
|
|
4431
|
+
normalized = path.replace("\\", "/")
|
|
4432
|
+
if normalized.startswith(("/mednotes:", "/flashcards")):
|
|
4433
|
+
return False
|
|
4434
|
+
name = normalized.rsplit("/", 1)[-1]
|
|
4435
|
+
if name.endswith((".json", ".md", ".bak", ".log")):
|
|
4436
|
+
return True
|
|
4437
|
+
return any(
|
|
4438
|
+
marker in normalized
|
|
4439
|
+
for marker in (
|
|
4440
|
+
"/runs/",
|
|
4441
|
+
"/workflow-",
|
|
4442
|
+
"fix-wiki",
|
|
4443
|
+
"link-diagnosis",
|
|
4444
|
+
"run_state",
|
|
4445
|
+
"compact-report",
|
|
4446
|
+
"full-report",
|
|
4447
|
+
)
|
|
4448
|
+
)
|