devflow-engine 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. devflow_engine/__init__.py +3 -0
  2. devflow_engine/agentic_prompts.py +100 -0
  3. devflow_engine/agentic_runtime.py +398 -0
  4. devflow_engine/api_key_flow_harness.py +539 -0
  5. devflow_engine/api_keys.py +357 -0
  6. devflow_engine/bootstrap/__init__.py +2 -0
  7. devflow_engine/bootstrap/provision_from_template.py +84 -0
  8. devflow_engine/cli/__init__.py +0 -0
  9. devflow_engine/cli/app.py +7270 -0
  10. devflow_engine/core/__init__.py +0 -0
  11. devflow_engine/core/config.py +86 -0
  12. devflow_engine/core/logging.py +29 -0
  13. devflow_engine/core/paths.py +45 -0
  14. devflow_engine/core/toml_kv.py +33 -0
  15. devflow_engine/devflow_event_worker.py +1292 -0
  16. devflow_engine/devflow_state.py +201 -0
  17. devflow_engine/devin2/__init__.py +9 -0
  18. devflow_engine/devin2/agent_definition.py +120 -0
  19. devflow_engine/devin2/pi_runner.py +204 -0
  20. devflow_engine/devin_orchestration.py +69 -0
  21. devflow_engine/docs/prompts/anti-patterns.md +42 -0
  22. devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
  23. devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
  24. devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
  25. devflow_engine/doctor/__init__.py +2 -0
  26. devflow_engine/doctor/triage.py +140 -0
  27. devflow_engine/error/__init__.py +0 -0
  28. devflow_engine/error/remediation.py +21 -0
  29. devflow_engine/errors/error_solver_dag.py +522 -0
  30. devflow_engine/errors/runtime_observability.py +67 -0
  31. devflow_engine/idea/__init__.py +4 -0
  32. devflow_engine/idea/actors.py +481 -0
  33. devflow_engine/idea/agentic.py +465 -0
  34. devflow_engine/idea/analyze.py +93 -0
  35. devflow_engine/idea/devin_chat_dag.py +1 -0
  36. devflow_engine/idea/diff.py +99 -0
  37. devflow_engine/idea/drafts.py +446 -0
  38. devflow_engine/idea/idea_creation_dag.py +643 -0
  39. devflow_engine/idea/ideation_enrichment.py +355 -0
  40. devflow_engine/idea/ideation_enrichment_worker.py +19 -0
  41. devflow_engine/idea/paths.py +28 -0
  42. devflow_engine/idea/promote.py +53 -0
  43. devflow_engine/idea/redaction.py +27 -0
  44. devflow_engine/idea/repo_tools.py +1277 -0
  45. devflow_engine/idea/response_mode.py +30 -0
  46. devflow_engine/idea/story_pipeline.py +1585 -0
  47. devflow_engine/idea/sufficiency.py +376 -0
  48. devflow_engine/idea/traditional_stories.py +1257 -0
  49. devflow_engine/implementation/__init__.py +0 -0
  50. devflow_engine/implementation/alembic_preflight.py +700 -0
  51. devflow_engine/implementation/dag.py +8450 -0
  52. devflow_engine/implementation/green_gate.py +93 -0
  53. devflow_engine/implementation/prompts.py +108 -0
  54. devflow_engine/implementation/test_runtime.py +623 -0
  55. devflow_engine/integration/__init__.py +19 -0
  56. devflow_engine/integration/agentic.py +66 -0
  57. devflow_engine/integration/dag.py +3539 -0
  58. devflow_engine/integration/prompts.py +114 -0
  59. devflow_engine/integration/supabase_schema.sql +31 -0
  60. devflow_engine/integration/supabase_sync.py +177 -0
  61. devflow_engine/llm/__init__.py +1 -0
  62. devflow_engine/llm/cli_one_shot.py +84 -0
  63. devflow_engine/llm/cli_stream.py +371 -0
  64. devflow_engine/llm/execution_context.py +26 -0
  65. devflow_engine/llm/invoke.py +1322 -0
  66. devflow_engine/llm/provider_api.py +304 -0
  67. devflow_engine/llm/repo_knowledge.py +588 -0
  68. devflow_engine/llm_primitives.py +315 -0
  69. devflow_engine/orchestration.py +62 -0
  70. devflow_engine/planning/__init__.py +0 -0
  71. devflow_engine/planning/analyze_repo.py +92 -0
  72. devflow_engine/planning/render_drafts.py +133 -0
  73. devflow_engine/playground/__init__.py +0 -0
  74. devflow_engine/playground/hooks.py +26 -0
  75. devflow_engine/playwright_workflow/__init__.py +5 -0
  76. devflow_engine/playwright_workflow/dag.py +1317 -0
  77. devflow_engine/process/__init__.py +5 -0
  78. devflow_engine/process/dag.py +59 -0
  79. devflow_engine/project_registration/__init__.py +3 -0
  80. devflow_engine/project_registration/dag.py +1581 -0
  81. devflow_engine/project_registry.py +109 -0
  82. devflow_engine/prompts/devin/generic/prompt.md +6 -0
  83. devflow_engine/prompts/devin/ideation/prompt.md +263 -0
  84. devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
  85. devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
  86. devflow_engine/prompts/devin/insight/prompt.md +11 -0
  87. devflow_engine/prompts/devin/insight/scenarios.md +5 -0
  88. devflow_engine/prompts/devin/intake/prompt.md +15 -0
  89. devflow_engine/prompts/devin/iterate/prompt.md +12 -0
  90. devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
  91. devflow_engine/prompts/devin/shared/principles.md +246 -0
  92. devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
  93. devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
  94. devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
  95. devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
  96. devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
  97. devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
  98. devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
  99. devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
  100. devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
  101. devflow_engine/prompts/implementation/red/prompt.md +27 -0
  102. devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
  103. devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
  104. devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
  105. devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
  106. devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
  107. devflow_engine/prompts/integration/README.md +185 -0
  108. devflow_engine/prompts/integration/green/example.md +67 -0
  109. devflow_engine/prompts/integration/green/green/prompt.md +10 -0
  110. devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
  111. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
  112. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
  113. devflow_engine/prompts/integration/green_enrich/example.md +79 -0
  114. devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
  115. devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
  116. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
  117. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  118. devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
  119. devflow_engine/prompts/integration/red/example.md +152 -0
  120. devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
  121. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  122. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
  123. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
  124. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
  125. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  126. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
  127. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
  128. devflow_engine/prompts/integration/red/red/prompt.md +11 -0
  129. devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
  130. devflow_engine/prompts/integration/red_review/example.md +71 -0
  131. devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
  132. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  133. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
  134. devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
  135. devflow_engine/prompts/integration/resolve/example.md +111 -0
  136. devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
  137. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
  138. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
  139. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
  140. devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
  141. devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
  142. devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
  143. devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
  144. devflow_engine/prompts/integration/validate/example.md +143 -0
  145. devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
  146. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  147. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
  148. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
  149. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
  150. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  151. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
  152. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
  153. devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
  154. devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
  155. devflow_engine/prompts/integration/write_workflows/example.md +100 -0
  156. devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
  157. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
  158. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
  159. devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
  160. devflow_engine/prompts/iterate/README.md +7 -0
  161. devflow_engine/prompts/iterate/coder/prompt.md +11 -0
  162. devflow_engine/prompts/iterate/framer/prompt.md +11 -0
  163. devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
  164. devflow_engine/prompts/iterate/observer/prompt.md +11 -0
  165. devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
  166. devflow_engine/prompts/recovery/execution/prompt.md +8 -0
  167. devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
  168. devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
  169. devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
  170. devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
  171. devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
  172. devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
  173. devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
  174. devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
  175. devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
  176. devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
  177. devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
  178. devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
  179. devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
  180. devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
  181. devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
  182. devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
  183. devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
  184. devflow_engine/recovery/__init__.py +3 -0
  185. devflow_engine/recovery/dag.py +2609 -0
  186. devflow_engine/recovery/models.py +220 -0
  187. devflow_engine/refactor.py +93 -0
  188. devflow_engine/registry/__init__.py +1 -0
  189. devflow_engine/registry/cards.py +238 -0
  190. devflow_engine/registry/domain_normalize.py +60 -0
  191. devflow_engine/registry/effects.py +65 -0
  192. devflow_engine/registry/enforce_report.py +150 -0
  193. devflow_engine/registry/module_cards_classify.py +164 -0
  194. devflow_engine/registry/module_cards_draft.py +184 -0
  195. devflow_engine/registry/module_cards_gate.py +59 -0
  196. devflow_engine/registry/packages.py +347 -0
  197. devflow_engine/registry/pathways.py +323 -0
  198. devflow_engine/review/__init__.py +11 -0
  199. devflow_engine/review/dag.py +588 -0
  200. devflow_engine/review/review_story.py +67 -0
  201. devflow_engine/scope_idea/__init__.py +3 -0
  202. devflow_engine/scope_idea/agentic.py +39 -0
  203. devflow_engine/scope_idea/dag.py +1069 -0
  204. devflow_engine/scope_idea/models.py +175 -0
  205. devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
  206. devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
  207. devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
  208. devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
  209. devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
  210. devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
  211. devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
  212. devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
  213. devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
  214. devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
  215. devflow_engine/skills/registry.example.yaml +42 -0
  216. devflow_engine/source_doc_assumptions.py +291 -0
  217. devflow_engine/source_doc_mutation_dag.py +1606 -0
  218. devflow_engine/source_doc_mutation_eval.py +417 -0
  219. devflow_engine/source_doc_mutation_worker.py +25 -0
  220. devflow_engine/source_docs_schema.py +207 -0
  221. devflow_engine/source_docs_updater.py +309 -0
  222. devflow_engine/source_scope/__init__.py +15 -0
  223. devflow_engine/source_scope/agentic.py +45 -0
  224. devflow_engine/source_scope/dag.py +1626 -0
  225. devflow_engine/source_scope/models.py +177 -0
  226. devflow_engine/stores/__init__.py +0 -0
  227. devflow_engine/stores/execution_store.py +3534 -0
  228. devflow_engine/story/__init__.py +0 -0
  229. devflow_engine/story/contracts.py +160 -0
  230. devflow_engine/story/discovery.py +47 -0
  231. devflow_engine/story/evidence.py +118 -0
  232. devflow_engine/story/hashing.py +27 -0
  233. devflow_engine/story/implemented_queue_purge.py +148 -0
  234. devflow_engine/story/indexer.py +105 -0
  235. devflow_engine/story/io.py +20 -0
  236. devflow_engine/story/markdown_contracts.py +298 -0
  237. devflow_engine/story/reconciliation.py +408 -0
  238. devflow_engine/story/validate_stories.py +149 -0
  239. devflow_engine/story/validate_tests_story.py +512 -0
  240. devflow_engine/story/validation.py +133 -0
  241. devflow_engine/ui_grounding/__init__.py +11 -0
  242. devflow_engine/ui_grounding/agentic.py +31 -0
  243. devflow_engine/ui_grounding/dag.py +874 -0
  244. devflow_engine/ui_grounding/models.py +224 -0
  245. devflow_engine/ui_grounding/pencil_bridge.py +247 -0
  246. devflow_engine/vendor/__init__.py +0 -0
  247. devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
  248. devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
  249. devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
  250. devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
  251. devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
  252. devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
  253. devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
  254. devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
  255. devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
  256. devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
  257. devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
  258. devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
  259. devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
  260. devflow_engine/worker.py +1086 -0
  261. devflow_engine/worker_guard.py +233 -0
  262. devflow_engine-1.0.0.dist-info/METADATA +235 -0
  263. devflow_engine-1.0.0.dist-info/RECORD +393 -0
  264. devflow_engine-1.0.0.dist-info/WHEEL +4 -0
  265. devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
  266. devin/__init__.py +6 -0
  267. devin/dag.py +58 -0
  268. devin/dag_two_arm.py +138 -0
  269. devin/devin_chat_scenario_catalog.json +588 -0
  270. devin/devin_eval.py +677 -0
  271. devin/nodes/__init__.py +0 -0
  272. devin/nodes/ideation/__init__.py +0 -0
  273. devin/nodes/ideation/node.py +195 -0
  274. devin/nodes/ideation/playground.py +267 -0
  275. devin/nodes/ideation/prompt.md +65 -0
  276. devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
  277. devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
  278. devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
  279. devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
  280. devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
  281. devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
  282. devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
  283. devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
  284. devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
  285. devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
  286. devin/nodes/ideation/scenarios/vague_idea.py +16 -0
  287. devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
  288. devin/nodes/ideation/tools.json +312 -0
  289. devin/nodes/insight/__init__.py +0 -0
  290. devin/nodes/insight/node.py +49 -0
  291. devin/nodes/insight/playground.py +154 -0
  292. devin/nodes/insight/prompt.md +61 -0
  293. devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
  294. devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
  295. devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
  296. devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
  297. devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
  298. devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
  299. devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
  300. devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
  301. devin/nodes/insight/scenarios/operational_debugging.py +15 -0
  302. devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
  303. devin/nodes/insight/scenarios/operational_question.py +9 -0
  304. devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
  305. devin/nodes/insight/scenarios/queue_status.py +15 -0
  306. devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
  307. devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
  308. devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
  309. devin/nodes/insight/scenarios/worker_state_check.py +15 -0
  310. devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
  311. devin/nodes/insight/tools.json +126 -0
  312. devin/nodes/intake/__init__.py +0 -0
  313. devin/nodes/intake/node.py +27 -0
  314. devin/nodes/intake/playground.py +47 -0
  315. devin/nodes/intake/prompt.md +12 -0
  316. devin/nodes/intake/scenarios/ideation_routing.py +4 -0
  317. devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
  318. devin/nodes/intake/scenarios/insight_routing.py +4 -0
  319. devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
  320. devin/nodes/iterate/README.md +44 -0
  321. devin/nodes/iterate/__init__.py +1 -0
  322. devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
  323. devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
  324. devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
  325. devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
  326. devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
  327. devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
  328. devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
  329. devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
  330. devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
  331. devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
  332. devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
  333. devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
  334. devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
  335. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
  336. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
  337. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
  338. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
  339. devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
  340. devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
  341. devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
  342. devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
  343. devin/nodes/iterate/agent-roles.md +89 -0
  344. devin/nodes/iterate/agents/README.md +10 -0
  345. devin/nodes/iterate/artifacts.md +504 -0
  346. devin/nodes/iterate/contract.md +100 -0
  347. devin/nodes/iterate/eval-plan.md +74 -0
  348. devin/nodes/iterate/node.py +100 -0
  349. devin/nodes/iterate/pipeline/README.md +13 -0
  350. devin/nodes/iterate/playground-contract.md +76 -0
  351. devin/nodes/iterate/prompt.md +11 -0
  352. devin/nodes/iterate/scenarios/README.md +38 -0
  353. devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
  354. devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
  355. devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
  356. devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
  357. devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
  358. devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
  359. devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
  360. devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
  361. devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
  362. devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
  363. devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
  364. devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
  365. devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
  366. devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
  367. devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
  368. devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
  369. devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
  370. devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
  371. devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
  372. devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
  373. devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
  374. devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
  375. devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
  376. devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
  377. devin/nodes/shared/__init__.py +0 -0
  378. devin/nodes/shared/filemaker_expert.md +80 -0
  379. devin/nodes/shared/filemaker_expert.py +354 -0
  380. devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
  381. devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
  382. devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
  383. devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
  384. devin/nodes/shared/helpers.py +156 -0
  385. devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
  386. devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
  387. devin/nodes/shared/models.py +44 -0
  388. devin/nodes/shared/post.py +40 -0
  389. devin/nodes/shared/router.py +107 -0
  390. devin/nodes/shared/tools.py +191 -0
  391. devin/shared/devin-chat-rubric.md +237 -0
  392. devin/shared/devin-chat-scenario-suite.md +90 -0
  393. devin/shared/eval_doctrine.md +9 -0
@@ -0,0 +1,3 @@
1
+ __all__ = ["__version__"]
2
+
3
+ __version__ = "1.0.0"
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib.resources import files
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ _REPO_ROOT = Path(__file__).resolve().parents[2]
8
+ _PROMPTS_ROOT = _REPO_ROOT / "prompts"
9
+
10
+ _PROMPT_LAYOUT: dict[str, tuple[str, str]] = {
11
+ "idea_response_doctrine": ("idea", "response_doctrine"),
12
+ "idea_api_ideation_agent": ("idea", "api_ideation_agent"),
13
+ "idea_api_insight_agent": ("idea", "api_insight_agent"),
14
+ "idea_generic_response_agent": ("devin", "generic"),
15
+ "idea_insight_response_agent": ("devin", "insight"),
16
+ "idea_ideation_response_agent": ("devin", "ideation"),
17
+ "idea_devin_ideation_agent_loop": ("devin", "ideation_loop"),
18
+ "source_scope_doctrine": ("source_scope", "doctrine"),
19
+ "scope_idea_doctrine": ("scope_idea", "doctrine"),
20
+ "ui_grounding_doctrine": ("ui_grounding", "doctrine"),
21
+ "implementation_dependency_assessment": ("implementation", "dependency_assessment"),
22
+ "implementation_story_planning": ("implementation", "story_planning"),
23
+ "implementation_test_design": ("implementation", "test_design"),
24
+ "implementation_setupdoc": ("implementation", "setupdoc"),
25
+ "implementation_red": ("implementation", "red"),
26
+ "implementation_redreview": ("implementation", "redreview"),
27
+ "implementation_redreview_repair": ("implementation", "redreview_repair"),
28
+ "recovery_diagnosis": ("recovery", "diagnosis"),
29
+ "recovery_preflight_health_repo_repair": ("recovery", "preflight_health_repo_repair"),
30
+ "recovery_failure_investigation": ("recovery", "failure_investigation"),
31
+ "recovery_root_cause_investigation": ("recovery", "root_cause_investigation"),
32
+ "recovery_remediation_execution": ("recovery", "remediation_execution"),
33
+ "recovery_execution": ("recovery", "execution"),
34
+ "recovery_execution_verification": ("recovery", "execution_verification"),
35
+ "source_doc_mutation_project_doc_render": ("source_doc_mutation", "project_doc_render"),
36
+ "source_doc_mutation_product_brief": ("source_doc_mutation", "product_brief"),
37
+ "source_doc_mutation_user_workflows": ("source_doc_mutation", "user_workflows"),
38
+ "source_doc_mutation_domain_entities": ("source_doc_mutation", "domain_entities"),
39
+ "source_doc_mutation_source_doc_coherence": ("source_doc_mutation", "source_doc_coherence"),
40
+ "source_doc_mutation_source_doc_enrichment_coherence": ("source_doc_mutation", "source_doc_enrichment_coherence"),
41
+ "source_doc_mutation_project_doc_coherence": ("source_doc_mutation", "project_doc_coherence"),
42
+ "devin_eval_assessment": ("devin_eval", "assessment"),
43
+ "source_doc_eval_document": ("source_doc_eval", "document"),
44
+ "source_doc_eval_targeted_mutation": ("source_doc_eval", "targeted_mutation"),
45
+ }
46
+
47
+
48
+ def agentic_prompt_path(prompt_name: str) -> Path:
49
+ direct = Path(prompt_name).expanduser()
50
+ if direct.suffix.lower() == ".md":
51
+ if not direct.is_absolute():
52
+ direct = (_REPO_ROOT / direct).resolve()
53
+ return direct
54
+ try:
55
+ group, agent_name = _PROMPT_LAYOUT[prompt_name]
56
+ except KeyError as exc:
57
+ raise KeyError(f"Unknown agentic prompt mapping: {prompt_name}") from exc
58
+ source_path = _PROMPTS_ROOT / group / agent_name / "prompt.md"
59
+ if source_path.exists():
60
+ return source_path
61
+ return Path(str(_packaged_prompts_root().joinpath(group, agent_name, "prompt.md")))
62
+
63
+
64
+ def _packaged_prompts_root() -> Any:
65
+ return files("devflow_engine").joinpath("prompts")
66
+
67
+
68
+ def _prompt_text(path: Path) -> str:
69
+ if path.exists():
70
+ return path.read_text(encoding="utf-8").strip()
71
+ package_path = _packaged_prompts_root().joinpath(*path.parts[-3:])
72
+ return package_path.read_text(encoding="utf-8").strip()
73
+
74
+
75
+ def load_agentic_prompt_markdown(prompt_name: str) -> str:
76
+ return _prompt_text(agentic_prompt_path(prompt_name))
77
+
78
+
79
+ def load_agentic_prompt_lines(prompt_name: str) -> list[str]:
80
+ items = _parse_prompt_markdown(load_agentic_prompt_markdown(prompt_name))
81
+ if not items:
82
+ raise ValueError(f"Agentic prompt markdown is empty: {prompt_name}")
83
+ return items
84
+
85
+
86
+ def load_agentic_prompt_text(prompt_name: str) -> str:
87
+ return "\n".join(load_agentic_prompt_lines(prompt_name))
88
+
89
+
90
+ def _parse_prompt_markdown(markdown: str) -> list[str]:
91
+ items: list[str] = []
92
+ for raw_line in markdown.splitlines():
93
+ line = raw_line.strip()
94
+ if not line or line.startswith("#"):
95
+ continue
96
+ if line.startswith("- "):
97
+ items.append(line[2:].strip())
98
+ continue
99
+ items.append(line)
100
+ return items
@@ -0,0 +1,398 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from json import JSONDecodeError
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from pydantic import BaseModel, ValidationError
10
+
11
+ from .idea.traditional_stories import _extract_json, _global_devflow_dir, _read_toml
12
+ from .llm.invoke import (
13
+ LlmInvocationRequest,
14
+ _apply_model_flag,
15
+ _load_cli_config_from_cfg,
16
+ invoke_llm,
17
+ normalize_llm_cli_base,
18
+ )
19
+
20
+
21
+ def _resolve_strength_model(
22
+ cfg: dict[str, Any],
23
+ base_cmd: str,
24
+ *,
25
+ strength_override: str,
26
+ ) -> str | None:
27
+ """Resolve the CLI model for a given strength tier from the TOML config.
28
+
29
+ Looks for ``llm_cli_model_<strength>`` in cfg (e.g. llm_cli_model_strong).
30
+ Returns None if the key is absent or empty — caller must decide how to
31
+ handle the missing value (e.g. apply tier cliProfile separately).
32
+ Strips incidental whitespace from the resolved value.
33
+ """
34
+ key = f"llm_cli_model_{strength_override}"
35
+ raw = str(cfg.get(key) or "").strip()
36
+ return raw if raw else None
37
+
38
+ # ── Prompt size limits ───────────────────────────────────────────────────────
39
+ _MAX_PROMPT_CHARS = 120_000
40
+ _MAX_STRING_CHARS = 8_000
41
+ _MAX_LIST_ITEMS = 40
42
+ _MAX_DICT_ITEMS = 80
43
+
44
+
45
+ # ── Prompt shrinking ─────────────────────────────────────────────────────────
46
+
47
+ def _truncate_string(value: str, *, limit: int = _MAX_STRING_CHARS) -> str:
48
+ if len(value) <= limit:
49
+ return value
50
+ half = limit // 2
51
+ return value[:half] + f"...[{len(value) - limit} chars truncated]..." + value[-half:]
52
+
53
+
54
+ def _shrink_for_prompt(value: Any) -> Any:
55
+ """Recursively truncate large strings/lists/dicts to keep prompts within context limits."""
56
+ if isinstance(value, str):
57
+ return _truncate_string(value)
58
+ if isinstance(value, dict):
59
+ items = list(value.items())
60
+ if len(items) > _MAX_DICT_ITEMS:
61
+ items = items[:_MAX_DICT_ITEMS]
62
+ return {k: _shrink_for_prompt(v) for k, v in items}
63
+ if isinstance(value, list):
64
+ if len(value) > _MAX_LIST_ITEMS:
65
+ value = value[:_MAX_LIST_ITEMS]
66
+ return [_shrink_for_prompt(item) for item in value]
67
+ return value
68
+
69
+
70
+ # ── Prompt construction ──────────────────────────────────────────────────────
71
+
72
+ def _build_prompt(
73
+ *,
74
+ stage_name: str,
75
+ context_payload: dict[str, Any],
76
+ guidance: list[str],
77
+ output_model: type[BaseModel],
78
+ compact: bool = False,
79
+ ) -> dict[str, Any]:
80
+ ctx = _shrink_for_prompt(context_payload) if compact else context_payload
81
+ return {
82
+ "task": stage_name,
83
+ "instructions": guidance,
84
+ "context": ctx,
85
+ "output_schema": output_model.model_json_schema(),
86
+ "return_format": "json_only",
87
+ }
88
+
89
+
90
+ def _persist_large_context_artifact(
91
+ *,
92
+ repo_root: Path,
93
+ stage_name: str,
94
+ context_payload: dict[str, Any],
95
+ ) -> Path:
96
+ debug_root = repo_root / ".devflow" / "agent_debug" / "context_artifacts"
97
+ debug_root.mkdir(parents=True, exist_ok=True)
98
+ safe = re.sub(r"[^A-Za-z0-9_.-]+", "_", stage_name) or "stage"
99
+ artifact_path = debug_root / f"{safe}_context.json"
100
+ artifact_path.write_text(
101
+ json.dumps(context_payload, indent=2, sort_keys=True), encoding="utf-8"
102
+ )
103
+ return artifact_path
104
+
105
+
106
+ def _build_artifact_reference_prompt(
107
+ *,
108
+ stage_name: str,
109
+ output_model: type[BaseModel],
110
+ guidance: list[str],
111
+ artifact_path: Path,
112
+ ) -> dict[str, Any]:
113
+ return {
114
+ "task": stage_name,
115
+ "instructions": guidance,
116
+ "context_artifact_path": str(artifact_path),
117
+ "context_artifact_note": (
118
+ "The full context was persisted to disk because the prompt exceeded size limits. "
119
+ "Read the file at the path above for detailed context."
120
+ ),
121
+ "output_schema": output_model.model_json_schema(),
122
+ "return_format": "json_only",
123
+ }
124
+
125
+
126
+ # ── JSON extraction and repair ───────────────────────────────────────────────
127
+
128
+ def _extract_json_hardened(stdout: str) -> str | None:
129
+ """Hardened JSON extraction — falls back to greedy brace-matching when the
130
+ standard extractor finds nothing."""
131
+ result = _extract_json(stdout)
132
+ if result is not None:
133
+ return result
134
+ # Greedy scan: find the last valid JSON object in the output.
135
+ for match in reversed(list(re.finditer(r"\{", stdout))):
136
+ candidate = stdout[match.start():]
137
+ try:
138
+ json.loads(candidate)
139
+ return candidate
140
+ except JSONDecodeError:
141
+ pass
142
+ return None
143
+
144
+
145
+ def _persist_non_json_response(
146
+ *,
147
+ repo_root: Path,
148
+ stage_name: str,
149
+ stdout: str,
150
+ stderr: str,
151
+ ) -> Path:
152
+ debug_root = repo_root / ".devflow" / "agent_debug" / "non_json_responses"
153
+ debug_root.mkdir(parents=True, exist_ok=True)
154
+ safe = re.sub(r"[^A-Za-z0-9_.-]+", "_", stage_name) or "stage"
155
+ artifact_path = debug_root / f"{safe}_response.txt"
156
+ artifact_path.write_text(
157
+ f"STDOUT:\n{stdout}\n\nSTDERR:\n{stderr}", encoding="utf-8"
158
+ )
159
+ return artifact_path
160
+
161
+
162
+ def _repair_json_decode_error(
163
+ *,
164
+ repo_root: Path,
165
+ base_cmd: str,
166
+ delivery: str,
167
+ stage_name: str,
168
+ output_model: type[BaseModel],
169
+ raw_candidate: str,
170
+ decode_error: JSONDecodeError,
171
+ timeout_seconds: int | None,
172
+ ) -> tuple[BaseModel, dict[str, Any], Any] | None:
173
+ """Re-prompt the LLM to repair a malformed JSON response."""
174
+ repair_prompt: dict[str, Any] = {
175
+ "task": f"{stage_name}_json_repair",
176
+ "instructions": [
177
+ "Return JSON only. No markdown. No prose outside JSON.",
178
+ "The previous response contained malformed JSON. Repair it and return valid JSON only.",
179
+ f"JSON parse error: {str(decode_error)[:500]}",
180
+ ],
181
+ "malformed_json": raw_candidate[:4000],
182
+ "output_schema": output_model.model_json_schema(),
183
+ "return_format": "json_only",
184
+ }
185
+ try:
186
+ result = invoke_llm(LlmInvocationRequest(
187
+ purpose=f"{stage_name}_json_repair",
188
+ repo_root=repo_root,
189
+ prompt=json.dumps(repair_prompt, indent=2, sort_keys=True),
190
+ delivery_model="streaming",
191
+ interaction_model="agentic",
192
+ timeout_seconds=timeout_seconds,
193
+ base_cmd=base_cmd,
194
+ delivery=delivery,
195
+ strength="medium",
196
+ ))
197
+ if not result.ok:
198
+ return None
199
+ raw_json = _extract_json_hardened(result.stdout)
200
+ if raw_json is None:
201
+ return None
202
+ parsed = json.loads(raw_json)
203
+ model = output_model.model_validate(parsed)
204
+ return model, repair_prompt, result
205
+ except Exception:
206
+ return None
207
+
208
+
209
+ def _repair_validation_error(
210
+ *,
211
+ repo_root: Path,
212
+ base_cmd: str,
213
+ delivery: str,
214
+ stage_name: str,
215
+ output_model: type[BaseModel],
216
+ original_prompt: dict[str, Any],
217
+ raw_json: str,
218
+ validation_error: ValidationError,
219
+ timeout_seconds: int | None,
220
+ ) -> tuple[BaseModel, dict[str, Any], Any] | None:
221
+ """Re-prompt the LLM to repair a response that failed Pydantic validation."""
222
+ repair_prompt: dict[str, Any] = {
223
+ "task": f"{stage_name}_validation_repair",
224
+ "instructions": [
225
+ "Return JSON only. No markdown. No prose outside JSON.",
226
+ "The previous response failed schema validation. Fix the JSON to match the required schema.",
227
+ f"Validation error: {str(validation_error)[:1000]}",
228
+ ],
229
+ "invalid_json": raw_json[:4000],
230
+ "output_schema": output_model.model_json_schema(),
231
+ "return_format": "json_only",
232
+ }
233
+ try:
234
+ result = invoke_llm(LlmInvocationRequest(
235
+ purpose=f"{stage_name}_validation_repair",
236
+ repo_root=repo_root,
237
+ prompt=json.dumps(repair_prompt, indent=2, sort_keys=True),
238
+ delivery_model="streaming",
239
+ interaction_model="agentic",
240
+ timeout_seconds=timeout_seconds,
241
+ base_cmd=base_cmd,
242
+ delivery=delivery,
243
+ strength="medium",
244
+ ))
245
+ if not result.ok:
246
+ return None
247
+ raw_json2 = _extract_json_hardened(result.stdout)
248
+ if raw_json2 is None:
249
+ return None
250
+ parsed = json.loads(raw_json2)
251
+ model = output_model.model_validate(parsed)
252
+ return model, repair_prompt, result
253
+ except Exception:
254
+ return None
255
+
256
+
257
+ class AgentRunEnvelope(BaseModel):
258
+ prompt: dict[str, Any]
259
+ response: dict[str, Any]
260
+ raw_stdout: str
261
+ raw_stderr: str = ""
262
+
263
+
264
+ def _load_llm_cli_config() -> tuple[str, str]:
265
+ """Compatibility wrapper kept in agentic_runtime for existing callers/tests."""
266
+ cfg = _read_toml(_global_devflow_dir() / "config.toml")
267
+ _, base_cmd, delivery = _load_cli_config_from_cfg(cfg)
268
+ return base_cmd, delivery
269
+
270
+
271
+ # ── Core agent step ──────────────────────────────────────────────────────────
272
+
273
+ def run_agent_step(
274
+ *,
275
+ repo_root: Path,
276
+ stage_name: str,
277
+ output_model: type[BaseModel],
278
+ context_payload: dict[str, Any],
279
+ guidance: list[str],
280
+ timeout_seconds: int | None = None,
281
+ strength: str | None = None,
282
+ ) -> tuple[BaseModel, AgentRunEnvelope]:
283
+ """Execute one GenAI agent step with prompt-size management and output repair."""
284
+ # Build prompt — shrink if needed, fall back to artifact reference for huge contexts.
285
+ prompt = _build_prompt(
286
+ stage_name=stage_name,
287
+ context_payload=context_payload,
288
+ guidance=guidance,
289
+ output_model=output_model,
290
+ )
291
+ if len(json.dumps(prompt)) > _MAX_PROMPT_CHARS:
292
+ compact_prompt = _build_prompt(
293
+ stage_name=stage_name,
294
+ context_payload=context_payload,
295
+ guidance=guidance,
296
+ output_model=output_model,
297
+ compact=True,
298
+ )
299
+ if len(json.dumps(compact_prompt)) <= _MAX_PROMPT_CHARS:
300
+ prompt = compact_prompt
301
+ else:
302
+ artifact_path = _persist_large_context_artifact(
303
+ repo_root=repo_root,
304
+ stage_name=stage_name,
305
+ context_payload=context_payload,
306
+ )
307
+ prompt = _build_artifact_reference_prompt(
308
+ stage_name=stage_name,
309
+ output_model=output_model,
310
+ guidance=guidance,
311
+ artifact_path=artifact_path,
312
+ )
313
+
314
+ result = invoke_llm(
315
+ LlmInvocationRequest(
316
+ purpose=stage_name,
317
+ repo_root=repo_root,
318
+ prompt=json.dumps(prompt, indent=2, sort_keys=True),
319
+ prompt_payload=prompt,
320
+ delivery_model="streaming",
321
+ interaction_model="agentic",
322
+ response_contract="json_only",
323
+ timeout_seconds=timeout_seconds,
324
+ strength=strength,
325
+ )
326
+ )
327
+
328
+ if not result.ok:
329
+ raise RuntimeError(result.stderr or result.stdout or f"agent step failed: {stage_name}")
330
+
331
+ raw_json = _extract_json_hardened(result.stdout)
332
+ if raw_json is None:
333
+ artifact_path = _persist_non_json_response(
334
+ repo_root=repo_root,
335
+ stage_name=stage_name,
336
+ stdout=result.stdout,
337
+ stderr=result.stderr,
338
+ )
339
+ raise RuntimeError(
340
+ f"Failed to locate JSON in agent output for {stage_name}. "
341
+ f"Non-JSON response persisted to {artifact_path}"
342
+ )
343
+
344
+ # Parse with repair fallback.
345
+ try:
346
+ parsed = json.loads(raw_json)
347
+ except JSONDecodeError as exc:
348
+ repaired = _repair_json_decode_error(
349
+ repo_root=repo_root,
350
+ base_cmd=result.base_cmd,
351
+ delivery=result.delivery,
352
+ stage_name=stage_name,
353
+ output_model=output_model,
354
+ raw_candidate=raw_json,
355
+ decode_error=exc,
356
+ timeout_seconds=timeout_seconds,
357
+ )
358
+ if repaired is not None:
359
+ model, repair_prompt, repair_result = repaired
360
+ return model, AgentRunEnvelope(
361
+ prompt=repair_prompt,
362
+ response=model.model_dump(),
363
+ raw_stdout=repair_result.stdout,
364
+ raw_stderr=repair_result.stderr,
365
+ )
366
+ raise RuntimeError(f"JSON decode error in {stage_name} and repair failed: {exc}") from exc
367
+
368
+ # Validate with repair fallback.
369
+ try:
370
+ model = output_model.model_validate(parsed)
371
+ except ValidationError as exc:
372
+ repaired = _repair_validation_error(
373
+ repo_root=repo_root,
374
+ base_cmd=result.base_cmd,
375
+ delivery=result.delivery,
376
+ stage_name=stage_name,
377
+ output_model=output_model,
378
+ original_prompt=prompt,
379
+ raw_json=raw_json,
380
+ validation_error=exc,
381
+ timeout_seconds=timeout_seconds,
382
+ )
383
+ if repaired is not None:
384
+ model, repair_prompt, repair_result = repaired
385
+ return model, AgentRunEnvelope(
386
+ prompt=repair_prompt,
387
+ response=model.model_dump(),
388
+ raw_stdout=repair_result.stdout,
389
+ raw_stderr=repair_result.stderr,
390
+ )
391
+ raise RuntimeError(f"Validation error in {stage_name} and repair failed: {exc}") from exc
392
+
393
+ return model, AgentRunEnvelope(
394
+ prompt=prompt,
395
+ response=model.model_dump(),
396
+ raw_stdout=result.stdout,
397
+ raw_stderr=result.stderr,
398
+ )