devflow-engine 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. devflow_engine/__init__.py +3 -0
  2. devflow_engine/agentic_prompts.py +100 -0
  3. devflow_engine/agentic_runtime.py +398 -0
  4. devflow_engine/api_key_flow_harness.py +539 -0
  5. devflow_engine/api_keys.py +357 -0
  6. devflow_engine/bootstrap/__init__.py +2 -0
  7. devflow_engine/bootstrap/provision_from_template.py +84 -0
  8. devflow_engine/cli/__init__.py +0 -0
  9. devflow_engine/cli/app.py +7270 -0
  10. devflow_engine/core/__init__.py +0 -0
  11. devflow_engine/core/config.py +86 -0
  12. devflow_engine/core/logging.py +29 -0
  13. devflow_engine/core/paths.py +45 -0
  14. devflow_engine/core/toml_kv.py +33 -0
  15. devflow_engine/devflow_event_worker.py +1292 -0
  16. devflow_engine/devflow_state.py +201 -0
  17. devflow_engine/devin2/__init__.py +9 -0
  18. devflow_engine/devin2/agent_definition.py +120 -0
  19. devflow_engine/devin2/pi_runner.py +204 -0
  20. devflow_engine/devin_orchestration.py +69 -0
  21. devflow_engine/docs/prompts/anti-patterns.md +42 -0
  22. devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
  23. devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
  24. devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
  25. devflow_engine/doctor/__init__.py +2 -0
  26. devflow_engine/doctor/triage.py +140 -0
  27. devflow_engine/error/__init__.py +0 -0
  28. devflow_engine/error/remediation.py +21 -0
  29. devflow_engine/errors/error_solver_dag.py +522 -0
  30. devflow_engine/errors/runtime_observability.py +67 -0
  31. devflow_engine/idea/__init__.py +4 -0
  32. devflow_engine/idea/actors.py +481 -0
  33. devflow_engine/idea/agentic.py +465 -0
  34. devflow_engine/idea/analyze.py +93 -0
  35. devflow_engine/idea/devin_chat_dag.py +1 -0
  36. devflow_engine/idea/diff.py +99 -0
  37. devflow_engine/idea/drafts.py +446 -0
  38. devflow_engine/idea/idea_creation_dag.py +643 -0
  39. devflow_engine/idea/ideation_enrichment.py +355 -0
  40. devflow_engine/idea/ideation_enrichment_worker.py +19 -0
  41. devflow_engine/idea/paths.py +28 -0
  42. devflow_engine/idea/promote.py +53 -0
  43. devflow_engine/idea/redaction.py +27 -0
  44. devflow_engine/idea/repo_tools.py +1277 -0
  45. devflow_engine/idea/response_mode.py +30 -0
  46. devflow_engine/idea/story_pipeline.py +1585 -0
  47. devflow_engine/idea/sufficiency.py +376 -0
  48. devflow_engine/idea/traditional_stories.py +1257 -0
  49. devflow_engine/implementation/__init__.py +0 -0
  50. devflow_engine/implementation/alembic_preflight.py +700 -0
  51. devflow_engine/implementation/dag.py +8450 -0
  52. devflow_engine/implementation/green_gate.py +93 -0
  53. devflow_engine/implementation/prompts.py +108 -0
  54. devflow_engine/implementation/test_runtime.py +623 -0
  55. devflow_engine/integration/__init__.py +19 -0
  56. devflow_engine/integration/agentic.py +66 -0
  57. devflow_engine/integration/dag.py +3539 -0
  58. devflow_engine/integration/prompts.py +114 -0
  59. devflow_engine/integration/supabase_schema.sql +31 -0
  60. devflow_engine/integration/supabase_sync.py +177 -0
  61. devflow_engine/llm/__init__.py +1 -0
  62. devflow_engine/llm/cli_one_shot.py +84 -0
  63. devflow_engine/llm/cli_stream.py +371 -0
  64. devflow_engine/llm/execution_context.py +26 -0
  65. devflow_engine/llm/invoke.py +1322 -0
  66. devflow_engine/llm/provider_api.py +304 -0
  67. devflow_engine/llm/repo_knowledge.py +588 -0
  68. devflow_engine/llm_primitives.py +315 -0
  69. devflow_engine/orchestration.py +62 -0
  70. devflow_engine/planning/__init__.py +0 -0
  71. devflow_engine/planning/analyze_repo.py +92 -0
  72. devflow_engine/planning/render_drafts.py +133 -0
  73. devflow_engine/playground/__init__.py +0 -0
  74. devflow_engine/playground/hooks.py +26 -0
  75. devflow_engine/playwright_workflow/__init__.py +5 -0
  76. devflow_engine/playwright_workflow/dag.py +1317 -0
  77. devflow_engine/process/__init__.py +5 -0
  78. devflow_engine/process/dag.py +59 -0
  79. devflow_engine/project_registration/__init__.py +3 -0
  80. devflow_engine/project_registration/dag.py +1581 -0
  81. devflow_engine/project_registry.py +109 -0
  82. devflow_engine/prompts/devin/generic/prompt.md +6 -0
  83. devflow_engine/prompts/devin/ideation/prompt.md +263 -0
  84. devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
  85. devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
  86. devflow_engine/prompts/devin/insight/prompt.md +11 -0
  87. devflow_engine/prompts/devin/insight/scenarios.md +5 -0
  88. devflow_engine/prompts/devin/intake/prompt.md +15 -0
  89. devflow_engine/prompts/devin/iterate/prompt.md +12 -0
  90. devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
  91. devflow_engine/prompts/devin/shared/principles.md +246 -0
  92. devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
  93. devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
  94. devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
  95. devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
  96. devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
  97. devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
  98. devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
  99. devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
  100. devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
  101. devflow_engine/prompts/implementation/red/prompt.md +27 -0
  102. devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
  103. devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
  104. devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
  105. devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
  106. devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
  107. devflow_engine/prompts/integration/README.md +185 -0
  108. devflow_engine/prompts/integration/green/example.md +67 -0
  109. devflow_engine/prompts/integration/green/green/prompt.md +10 -0
  110. devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
  111. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
  112. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
  113. devflow_engine/prompts/integration/green_enrich/example.md +79 -0
  114. devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
  115. devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
  116. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
  117. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  118. devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
  119. devflow_engine/prompts/integration/red/example.md +152 -0
  120. devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
  121. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  122. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
  123. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
  124. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
  125. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  126. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
  127. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
  128. devflow_engine/prompts/integration/red/red/prompt.md +11 -0
  129. devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
  130. devflow_engine/prompts/integration/red_review/example.md +71 -0
  131. devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
  132. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  133. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
  134. devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
  135. devflow_engine/prompts/integration/resolve/example.md +111 -0
  136. devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
  137. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
  138. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
  139. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
  140. devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
  141. devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
  142. devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
  143. devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
  144. devflow_engine/prompts/integration/validate/example.md +143 -0
  145. devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
  146. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  147. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
  148. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
  149. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
  150. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  151. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
  152. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
  153. devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
  154. devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
  155. devflow_engine/prompts/integration/write_workflows/example.md +100 -0
  156. devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
  157. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
  158. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
  159. devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
  160. devflow_engine/prompts/iterate/README.md +7 -0
  161. devflow_engine/prompts/iterate/coder/prompt.md +11 -0
  162. devflow_engine/prompts/iterate/framer/prompt.md +11 -0
  163. devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
  164. devflow_engine/prompts/iterate/observer/prompt.md +11 -0
  165. devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
  166. devflow_engine/prompts/recovery/execution/prompt.md +8 -0
  167. devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
  168. devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
  169. devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
  170. devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
  171. devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
  172. devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
  173. devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
  174. devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
  175. devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
  176. devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
  177. devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
  178. devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
  179. devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
  180. devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
  181. devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
  182. devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
  183. devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
  184. devflow_engine/recovery/__init__.py +3 -0
  185. devflow_engine/recovery/dag.py +2609 -0
  186. devflow_engine/recovery/models.py +220 -0
  187. devflow_engine/refactor.py +93 -0
  188. devflow_engine/registry/__init__.py +1 -0
  189. devflow_engine/registry/cards.py +238 -0
  190. devflow_engine/registry/domain_normalize.py +60 -0
  191. devflow_engine/registry/effects.py +65 -0
  192. devflow_engine/registry/enforce_report.py +150 -0
  193. devflow_engine/registry/module_cards_classify.py +164 -0
  194. devflow_engine/registry/module_cards_draft.py +184 -0
  195. devflow_engine/registry/module_cards_gate.py +59 -0
  196. devflow_engine/registry/packages.py +347 -0
  197. devflow_engine/registry/pathways.py +323 -0
  198. devflow_engine/review/__init__.py +11 -0
  199. devflow_engine/review/dag.py +588 -0
  200. devflow_engine/review/review_story.py +67 -0
  201. devflow_engine/scope_idea/__init__.py +3 -0
  202. devflow_engine/scope_idea/agentic.py +39 -0
  203. devflow_engine/scope_idea/dag.py +1069 -0
  204. devflow_engine/scope_idea/models.py +175 -0
  205. devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
  206. devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
  207. devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
  208. devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
  209. devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
  210. devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
  211. devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
  212. devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
  213. devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
  214. devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
  215. devflow_engine/skills/registry.example.yaml +42 -0
  216. devflow_engine/source_doc_assumptions.py +291 -0
  217. devflow_engine/source_doc_mutation_dag.py +1606 -0
  218. devflow_engine/source_doc_mutation_eval.py +417 -0
  219. devflow_engine/source_doc_mutation_worker.py +25 -0
  220. devflow_engine/source_docs_schema.py +207 -0
  221. devflow_engine/source_docs_updater.py +309 -0
  222. devflow_engine/source_scope/__init__.py +15 -0
  223. devflow_engine/source_scope/agentic.py +45 -0
  224. devflow_engine/source_scope/dag.py +1626 -0
  225. devflow_engine/source_scope/models.py +177 -0
  226. devflow_engine/stores/__init__.py +0 -0
  227. devflow_engine/stores/execution_store.py +3534 -0
  228. devflow_engine/story/__init__.py +0 -0
  229. devflow_engine/story/contracts.py +160 -0
  230. devflow_engine/story/discovery.py +47 -0
  231. devflow_engine/story/evidence.py +118 -0
  232. devflow_engine/story/hashing.py +27 -0
  233. devflow_engine/story/implemented_queue_purge.py +148 -0
  234. devflow_engine/story/indexer.py +105 -0
  235. devflow_engine/story/io.py +20 -0
  236. devflow_engine/story/markdown_contracts.py +298 -0
  237. devflow_engine/story/reconciliation.py +408 -0
  238. devflow_engine/story/validate_stories.py +149 -0
  239. devflow_engine/story/validate_tests_story.py +512 -0
  240. devflow_engine/story/validation.py +133 -0
  241. devflow_engine/ui_grounding/__init__.py +11 -0
  242. devflow_engine/ui_grounding/agentic.py +31 -0
  243. devflow_engine/ui_grounding/dag.py +874 -0
  244. devflow_engine/ui_grounding/models.py +224 -0
  245. devflow_engine/ui_grounding/pencil_bridge.py +247 -0
  246. devflow_engine/vendor/__init__.py +0 -0
  247. devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
  248. devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
  249. devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
  250. devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
  251. devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
  252. devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
  253. devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
  254. devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
  255. devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
  256. devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
  257. devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
  258. devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
  259. devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
  260. devflow_engine/worker.py +1086 -0
  261. devflow_engine/worker_guard.py +233 -0
  262. devflow_engine-1.0.0.dist-info/METADATA +235 -0
  263. devflow_engine-1.0.0.dist-info/RECORD +393 -0
  264. devflow_engine-1.0.0.dist-info/WHEEL +4 -0
  265. devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
  266. devin/__init__.py +6 -0
  267. devin/dag.py +58 -0
  268. devin/dag_two_arm.py +138 -0
  269. devin/devin_chat_scenario_catalog.json +588 -0
  270. devin/devin_eval.py +677 -0
  271. devin/nodes/__init__.py +0 -0
  272. devin/nodes/ideation/__init__.py +0 -0
  273. devin/nodes/ideation/node.py +195 -0
  274. devin/nodes/ideation/playground.py +267 -0
  275. devin/nodes/ideation/prompt.md +65 -0
  276. devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
  277. devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
  278. devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
  279. devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
  280. devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
  281. devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
  282. devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
  283. devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
  284. devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
  285. devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
  286. devin/nodes/ideation/scenarios/vague_idea.py +16 -0
  287. devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
  288. devin/nodes/ideation/tools.json +312 -0
  289. devin/nodes/insight/__init__.py +0 -0
  290. devin/nodes/insight/node.py +49 -0
  291. devin/nodes/insight/playground.py +154 -0
  292. devin/nodes/insight/prompt.md +61 -0
  293. devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
  294. devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
  295. devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
  296. devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
  297. devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
  298. devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
  299. devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
  300. devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
  301. devin/nodes/insight/scenarios/operational_debugging.py +15 -0
  302. devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
  303. devin/nodes/insight/scenarios/operational_question.py +9 -0
  304. devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
  305. devin/nodes/insight/scenarios/queue_status.py +15 -0
  306. devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
  307. devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
  308. devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
  309. devin/nodes/insight/scenarios/worker_state_check.py +15 -0
  310. devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
  311. devin/nodes/insight/tools.json +126 -0
  312. devin/nodes/intake/__init__.py +0 -0
  313. devin/nodes/intake/node.py +27 -0
  314. devin/nodes/intake/playground.py +47 -0
  315. devin/nodes/intake/prompt.md +12 -0
  316. devin/nodes/intake/scenarios/ideation_routing.py +4 -0
  317. devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
  318. devin/nodes/intake/scenarios/insight_routing.py +4 -0
  319. devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
  320. devin/nodes/iterate/README.md +44 -0
  321. devin/nodes/iterate/__init__.py +1 -0
  322. devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
  323. devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
  324. devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
  325. devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
  326. devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
  327. devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
  328. devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
  329. devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
  330. devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
  331. devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
  332. devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
  333. devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
  334. devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
  335. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
  336. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
  337. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
  338. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
  339. devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
  340. devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
  341. devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
  342. devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
  343. devin/nodes/iterate/agent-roles.md +89 -0
  344. devin/nodes/iterate/agents/README.md +10 -0
  345. devin/nodes/iterate/artifacts.md +504 -0
  346. devin/nodes/iterate/contract.md +100 -0
  347. devin/nodes/iterate/eval-plan.md +74 -0
  348. devin/nodes/iterate/node.py +100 -0
  349. devin/nodes/iterate/pipeline/README.md +13 -0
  350. devin/nodes/iterate/playground-contract.md +76 -0
  351. devin/nodes/iterate/prompt.md +11 -0
  352. devin/nodes/iterate/scenarios/README.md +38 -0
  353. devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
  354. devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
  355. devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
  356. devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
  357. devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
  358. devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
  359. devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
  360. devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
  361. devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
  362. devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
  363. devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
  364. devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
  365. devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
  366. devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
  367. devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
  368. devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
  369. devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
  370. devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
  371. devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
  372. devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
  373. devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
  374. devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
  375. devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
  376. devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
  377. devin/nodes/shared/__init__.py +0 -0
  378. devin/nodes/shared/filemaker_expert.md +80 -0
  379. devin/nodes/shared/filemaker_expert.py +354 -0
  380. devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
  381. devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
  382. devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
  383. devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
  384. devin/nodes/shared/helpers.py +156 -0
  385. devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
  386. devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
  387. devin/nodes/shared/models.py +44 -0
  388. devin/nodes/shared/post.py +40 -0
  389. devin/nodes/shared/router.py +107 -0
  390. devin/nodes/shared/tools.py +191 -0
  391. devin/shared/devin-chat-rubric.md +237 -0
  392. devin/shared/devin-chat-scenario-suite.md +90 -0
  393. devin/shared/eval_doctrine.md +9 -0
@@ -0,0 +1,1322 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ import shlex
6
+ import tempfile
7
+ import warnings
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+ from typing import Any, Literal
11
+
12
+ from ..devflow_state import _postgrest_request, _resolve_supabase_rest_config
13
+ from ..idea.traditional_stories import _global_devflow_dir, _read_toml
14
+ from .cli_one_shot import run_one_shot
15
+ from .cli_stream import run_streaming
16
+ from .provider_api import (
17
+ ANTHROPIC_MODEL_DEFAULT,
18
+ GOOGLE_MODEL_DEFAULT,
19
+ OLLAMA_MODEL_DEFAULT,
20
+ OPENAI_MODEL_DEFAULT,
21
+ anthropic_messages_create,
22
+ anthropic_response_text,
23
+ canonical_provider_api_shape,
24
+ google_generate_content,
25
+ google_response_text,
26
+ load_anthropic_api_settings,
27
+ load_google_api_settings,
28
+ load_openai_api_settings,
29
+ openai_response_text,
30
+ openai_responses_create,
31
+ )
32
+
33
+ DeliveryModel = Literal["final_only", "streaming"]
34
+ InteractionModel = Literal["request_response", "agentic"]
35
+ LlmTransport = Literal["cli", "api"]
36
+ ResponseContract = Literal["text", "json_only"]
37
+
38
+ _SUPPORTED_DELIVERY_MODELS = {"final_only", "streaming"}
39
+ _SUPPORTED_INTERACTION_MODELS = {"request_response", "agentic"}
40
+ _SUPPORTED_DELIVERIES = {"argument", "stdin"}
41
+ _SUPPORTED_RESPONSE_CONTRACTS = {"text", "json_only"}
42
+ _SUPPORTED_STRENGTHS = {"ultra_light", "light", "medium", "strong"}
43
+ _STRENGTH_SHORTHAND = {"xs": "ultra_light", "s": "strong", "m": "medium", "l": "light", "ul": "ultra_light"}
44
+
45
+
46
+ def _normalize_strength(value: str | None) -> str | None:
47
+ """Normalize a strength shorthand (M, L, S, XS) to its canonical name."""
48
+ if value is None:
49
+ return None
50
+ v = str(value).strip().lower()
51
+ return _STRENGTH_SHORTHAND.get(v, v)
52
+ _SUPPORTED_TRANSPORTS = {"cli", "api"}
53
+ _SUPPORTED_PROVIDER_FAMILIES = {"anthropic", "openai", "google", "minimax", "ollama"}
54
+ _PROVIDER_ALIASES = {
55
+ "anthropic": "anthropic",
56
+ "claude": "anthropic",
57
+ "openai": "openai",
58
+ "codex": "openai",
59
+ "google": "google",
60
+ "gemini": "google",
61
+ "gemini-cli": "google",
62
+ "pi": "minimax",
63
+ "minimax": "minimax",
64
+ "ollama": "ollama",
65
+ }
66
+ _INTERACTION_MODEL_ALIASES = {
67
+ "request_response": "request_response",
68
+ "request/response": "request_response",
69
+ "agentic": "agentic",
70
+ "delegated": "agentic",
71
+ }
72
+ _API_TIER_ALIASES = {
73
+ "ultra_light": "ultra_light",
74
+ "ultra-light": "ultra_light",
75
+ "ultralight": "ultra_light",
76
+ "light": "light",
77
+ "medium": "medium",
78
+ "mid": "medium",
79
+ "strong": "strong",
80
+ }
81
+ _AUTOLOADING_CLI_PROVIDERS = {"anthropic"}
82
+ _AUTOLOADING_CLI_BINARIES = {"claude", "codex"}
83
+ _PROMPT_INLINE_BYTES_LIMIT = 120_000
84
+ _PROMPT_SECTION_INLINE_BYTES_LIMIT = 48_000
85
+ _PROMPT_ARTIFACT_DIRNAME = "llm_prompt_artifacts"
86
+ _PROMPT_ARTIFACT_MAX_FILENAME_LENGTH = 48
87
+
88
+
89
+ def _load_global_llm_cfg() -> dict[str, Any]:
90
+ raw = _read_toml(_global_devflow_dir() / "config.toml")
91
+ # _read_toml may return {} on parse failure but the raw file may still contain
92
+ # tiers as a Python dict-string (ast.literal_eval-able). Wire it through
93
+ # _parse_tiers so the tiers key is available even when tomllib can't parse it.
94
+ if "tiers" not in raw:
95
+ raw = dict(raw)
96
+ raw["tiers"] = _parse_tiers(raw)
97
+ return raw
98
+
99
+
100
+ def _nested_dict(mapping: dict[str, Any], *path: str) -> dict[str, Any] | None:
101
+ current: Any = mapping
102
+ for key in path:
103
+ if not isinstance(current, dict):
104
+ return None
105
+ current = current.get(key)
106
+ return current if isinstance(current, dict) else None
107
+
108
+
109
+ def _parse_tiers(cfg: dict[str, Any]) -> dict[str, dict[str, Any]]:
110
+ """Parse the tiers dict from global config, handling both TOML native and
111
+ JSON-string formats (e.g. tiers stored as a Python literal string)."""
112
+ raw = cfg.get("tiers")
113
+ if isinstance(raw, dict):
114
+ return raw
115
+ if isinstance(raw, str):
116
+ raw = raw.strip()
117
+ if not raw:
118
+ return {}
119
+ # Try TOML inline table first.
120
+ try:
121
+ import tomllib
122
+ # Re-parse the full config as TOML to get inline table parsed.
123
+ # Fast path: if it looks like Python literal, use ast.literal_eval.
124
+ if raw.startswith("{") or raw.startswith("'"):
125
+ import ast
126
+ try:
127
+ parsed = ast.literal_eval(raw)
128
+ if isinstance(parsed, dict):
129
+ return parsed
130
+ except Exception:
131
+ pass
132
+ except Exception:
133
+ pass
134
+ return {}
135
+
136
+
137
+ def _tier_config(strength: str, cfg: dict[str, Any]) -> dict[str, Any] | None:
138
+ """Return the tier config for the given strength, or None if not configured."""
139
+ tiers = _parse_tiers(cfg)
140
+ tier = tiers.get(strength)
141
+ if isinstance(tier, dict):
142
+ return tier
143
+ return None
144
+
145
+
146
+ def _canonical_light_tier_from_supabase() -> dict[str, Any]:
147
+ """Return the canonical ``devflow_settings.settings.tiers.light`` payload.
148
+
149
+ This is the source-of-truth path for the user-selected DevFlow light tier.
150
+ We query Supabase first so Devin 2.0 / PI-adjacent flows do not drift to
151
+ repo-local or other legacy local config surfaces.
152
+ """
153
+ config = _resolve_supabase_rest_config()
154
+ if config is None:
155
+ return {}
156
+ url, key = config
157
+ try:
158
+ rows = _postgrest_request(
159
+ method="GET",
160
+ url=f"{url}/rest/v1/devflow_settings?select=settings&order=updated_at.desc.nullslast&limit=1",
161
+ key=key,
162
+ )
163
+ except Exception:
164
+ return {}
165
+ if not isinstance(rows, list) or not rows or not isinstance(rows[0], dict):
166
+ return {}
167
+ settings = (rows[0] or {}).get("settings")
168
+ if not isinstance(settings, dict):
169
+ return {}
170
+ return _nested_dict(settings, "tiers", "light") or {}
171
+
172
+
173
+ def _canonical_light_tier_from_global_cfg(cfg: dict[str, Any]) -> dict[str, Any]:
174
+ for path in (
175
+ ("devflow_settings", "settings", "tiers", "light"),
176
+ ("settings", "tiers", "light"),
177
+ ("tiers", "light"),
178
+ ):
179
+ candidate = _nested_dict(cfg, *path)
180
+ if candidate is not None:
181
+ return candidate
182
+ return {}
183
+
184
+
185
+ def _canonical_light_tier_payload(cfg: dict[str, Any] | None = None) -> dict[str, Any]:
186
+ payload = _canonical_light_tier_from_supabase()
187
+ if payload:
188
+ return payload
189
+ resolved_cfg = cfg if cfg is not None else _load_global_llm_cfg()
190
+ return _canonical_light_tier_from_global_cfg(resolved_cfg)
191
+
192
+
193
+ def _canonical_light_tier_provider(payload: dict[str, Any]) -> str | None:
194
+ return _normalize_provider_name(
195
+ str(
196
+ payload.get("apiProvider")
197
+ or payload.get("api_provider")
198
+ or payload.get("llm_provider")
199
+ or payload.get("provider")
200
+ or ""
201
+ ).strip()
202
+ )
203
+
204
+
205
+ def _canonical_light_tier_model(payload: dict[str, Any]) -> str | None:
206
+ model = str(
207
+ payload.get("apiModel")
208
+ or payload.get("api_model")
209
+ or payload.get("llm_model")
210
+ or payload.get("model")
211
+ or payload.get("model_default")
212
+ or ""
213
+ ).strip()
214
+ return model or None
215
+
216
+
217
+ def resolve_canonical_light_tier_model(*, cfg: dict[str, Any] | None = None) -> str | None:
218
+ """Return the canonical light-tier model for Devin 2.0 / PI launch paths.
219
+
220
+ Prefers the direct Supabase ``devflow_settings.settings.tiers.light`` row,
221
+ then falls back to the synced global DevFlow config view when Supabase is
222
+ unavailable. This intentionally avoids repo-local / legacy local config.
223
+ """
224
+ payload = _canonical_light_tier_payload(cfg)
225
+ model = _canonical_light_tier_model(payload)
226
+ if model:
227
+ return model
228
+ resolved_cfg = cfg if cfg is not None else _load_global_llm_cfg()
229
+ fallback = str(resolved_cfg.get("llm_cli_model_weak") or resolved_cfg.get("llm_cli_model_default") or "").strip()
230
+ return fallback or None
231
+
232
+
233
+ def _normalize_api_tier(value: str | None) -> str | None:
234
+ if value is None:
235
+ return None
236
+ normalized = str(value).strip().lower().replace("-", "_").replace(" ", "_")
237
+ if not normalized:
238
+ return None
239
+ return _API_TIER_ALIASES.get(normalized)
240
+
241
+
242
+ def resolve_api_tier_request_overrides(
243
+ *,
244
+ tier: str,
245
+ cfg: dict[str, Any] | None = None,
246
+ strength_override: str | None = None,
247
+ ) -> dict[str, Any]:
248
+ """Resolve API-call overrides for the canonical DevFlow tier ladder.
249
+
250
+ Important Devin/PI harness note: the light-tier source of truth is the
251
+ user-managed DevFlow settings row in Supabase
252
+ (``devflow_settings.settings.tiers.light``). This helper prefers that
253
+ canonical row directly, then falls back to the synced global DevFlow
254
+ config view only when Supabase is unavailable, and never consults repo-
255
+ local or other legacy local config.
256
+
257
+ ``tier='light'`` means "use the canonical DevFlow light tier."
258
+ ``tier='medium'`` means "use the tier medium override if configured."
259
+ ``tier='strong'`` means "use the tier strong override if configured."
260
+ ``tier='ultra_light'`` means "use the ultra_light tier."
261
+ """
262
+ normalized_tier = _normalize_api_tier(tier)
263
+ if normalized_tier not in {"ultra_light", "light", "medium", "strong"}:
264
+ raise RuntimeError(f"Unsupported API tier: {tier!r}")
265
+ resolved_cfg = cfg if cfg is not None else _load_global_llm_cfg()
266
+ overrides: dict[str, Any] = {
267
+ "transport": "api",
268
+ }
269
+ if normalized_tier == "light":
270
+ # Light is always sourced from Supabase as canonical.
271
+ light_payload = _canonical_light_tier_payload(resolved_cfg)
272
+ provider = _canonical_light_tier_provider(light_payload)
273
+ model = _canonical_light_tier_model(light_payload)
274
+ if provider:
275
+ overrides["provider"] = provider
276
+ if model:
277
+ overrides["model"] = model
278
+ return overrides
279
+ if normalized_tier == "medium":
280
+ overrides["strength"] = strength_override or "strong"
281
+ return overrides
282
+ if normalized_tier == "strong":
283
+ tier_cfg = _tier_config("strong", resolved_cfg)
284
+ if tier_cfg:
285
+ provider = _normalize_provider_name(
286
+ str(tier_cfg.get("apiProvider") or tier_cfg.get("provider") or "").strip()
287
+ )
288
+ model = str(tier_cfg.get("apiModel") or tier_cfg.get("model") or "").strip() or None
289
+ if provider:
290
+ overrides["provider"] = provider
291
+ if model:
292
+ overrides["model"] = model
293
+ return overrides
294
+ overrides["strength"] = strength_override or "strong"
295
+ return overrides
296
+
297
+ # ultra_light
298
+ tier_cfg = _tier_config("ultra_light", resolved_cfg)
299
+ if tier_cfg:
300
+ provider = _normalize_provider_name(
301
+ str(tier_cfg.get("apiProvider") or tier_cfg.get("provider") or "").strip()
302
+ )
303
+ model = str(tier_cfg.get("apiModel") or tier_cfg.get("model") or "").strip() or None
304
+ if provider:
305
+ overrides["provider"] = provider
306
+ if model:
307
+ overrides["model"] = model
308
+ return overrides
309
+ overrides["strength"] = strength_override or "ultra_light"
310
+ return overrides
311
+
312
+
313
+ def normalize_llm_cli_base(base_cmd: str) -> str:
314
+ """Normalise a CLI base command and add required provider flags."""
315
+ parts = shlex.split(base_cmd)
316
+ if not parts:
317
+ raise RuntimeError("LLM CLI base command not set.")
318
+ if parts[0] == "ollama" and (len(parts) == 1 or parts[1] != "run"):
319
+ parts.insert(1, "run")
320
+ if parts[0] == "claude" and "--dangerously-skip-permissions" not in parts:
321
+ parts.append("--dangerously-skip-permissions")
322
+ if parts[:2] == ["codex", "exec"] and "--dangerously-bypass-approvals-and-sandbox" not in parts:
323
+ parts.append("--dangerously-bypass-approvals-and-sandbox")
324
+ if parts[0] == "pi" and "-p" not in parts and "--print" not in parts:
325
+ parts.append("-p")
326
+ return " ".join(parts)
327
+
328
+
329
+ def _apply_model_flag(base_cmd: str, model: str | None) -> str:
330
+ """Append --model <model> to a CLI base command (provider-aware).
331
+
332
+ - ollama: appends model as positional arg after ``run`` (ollama run <model>)
333
+ - codex / claude / minimax / pi / other: appends ``--model <model>``
334
+ - If model is None or already present as a --model flag, returns base_cmd unchanged.
335
+ """
336
+ if not model:
337
+ return base_cmd
338
+ parts = shlex.split(base_cmd)
339
+ if not parts:
340
+ return base_cmd
341
+ # Don't double-apply
342
+ if "--model" in parts:
343
+ return base_cmd
344
+ provider = _normalize_provider_name(parts[0])
345
+ if provider == "ollama":
346
+ # ollama run <model> — model is a positional arg after the subcommand
347
+ return f"{base_cmd} {model}"
348
+ return f"{base_cmd} --model {model}"
349
+
350
+
351
+ def _extract_model_flag(base_cmd: str) -> str | None:
352
+ """Pull the model value from a base_cmd that may contain --model <value>."""
353
+ parts = shlex.split(base_cmd)
354
+ for i, part in enumerate(parts):
355
+ if part == "--model" and i + 1 < len(parts):
356
+ return parts[i + 1]
357
+ return None
358
+
359
+
360
+ def _normalize_provider_name(provider: str | None) -> str | None:
361
+ if provider is None:
362
+ return None
363
+ raw = str(provider).strip()
364
+ if not raw:
365
+ return None
366
+ normalized = raw.lower()
367
+ basename = raw.replace("\\", "/").rsplit("/", 1)[-1].strip().lower()
368
+ for candidate in (normalized, basename):
369
+ if not candidate:
370
+ continue
371
+ aliased = _PROVIDER_ALIASES.get(candidate, candidate)
372
+ if candidate in _PROVIDER_ALIASES or aliased in _SUPPORTED_PROVIDER_FAMILIES:
373
+ return aliased
374
+ return _PROVIDER_ALIASES.get(normalized, normalized)
375
+
376
+
377
+ def _normalize_interaction_model(value: str | None) -> InteractionModel | None:
378
+ if value is None:
379
+ return None
380
+ normalized = str(value).strip().lower()
381
+ if not normalized:
382
+ return None
383
+ return _INTERACTION_MODEL_ALIASES.get(normalized) # type: ignore[return-value]
384
+
385
+
386
+ def _infer_provider_from_base_cmd(base_cmd: str) -> str | None:
387
+ parts = shlex.split(base_cmd)
388
+ if not parts:
389
+ return None
390
+ return _normalize_provider_name(parts[0])
391
+
392
+
393
+ def _cli_binary_name(base_cmd: str) -> str | None:
394
+ parts = shlex.split(base_cmd)
395
+ if not parts:
396
+ return None
397
+ binary = Path(parts[0]).name.strip().lower()
398
+ return binary or None
399
+
400
+
401
+ def _normalize_string_set(value: Any) -> set[str]:
402
+ if isinstance(value, str):
403
+ candidates = [part.strip().lower() for part in value.split(",")]
404
+ return {candidate for candidate in candidates if candidate}
405
+ if isinstance(value, (list, tuple, set)):
406
+ normalized: set[str] = set()
407
+ for entry in value:
408
+ text = str(entry).strip().lower()
409
+ if text:
410
+ normalized.add(text)
411
+ return normalized
412
+ return set()
413
+
414
+
415
+ def _autoloading_cli_settings() -> tuple[set[str], set[str]]:
416
+ cfg = _load_global_llm_cfg()
417
+ use_defaults = cfg.get("llm_autoloading_cli_use_defaults", True)
418
+ providers = _normalize_string_set(cfg.get("llm_autoloading_cli_providers"))
419
+ binaries = _normalize_string_set(cfg.get("llm_autoloading_cli_binaries"))
420
+ if use_defaults is not False:
421
+ providers = set(_AUTOLOADING_CLI_PROVIDERS) | providers
422
+ binaries = set(_AUTOLOADING_CLI_BINARIES) | binaries
423
+ return providers, binaries
424
+
425
+
426
+ def _provider_requires_sanitized_cli_cwd(*, provider: str | None, base_cmd: str) -> bool:
427
+ normalized_provider = _normalize_provider_name(provider)
428
+ binary_name = _cli_binary_name(base_cmd)
429
+ autoloading_providers, autoloading_binaries = _autoloading_cli_settings()
430
+ return normalized_provider in autoloading_providers or binary_name in autoloading_binaries
431
+
432
+
433
+ def _resolve_cli_cwd(*, repo_root: Path, provider: str | None, base_cmd: str) -> Path:
434
+ if not _provider_requires_sanitized_cli_cwd(provider=provider, base_cmd=base_cmd):
435
+ return repo_root
436
+ cli_cwd = Path(tempfile.mkdtemp(prefix="devflow-llm-cwd-"))
437
+ try:
438
+ (cli_cwd / "DEVFLOW_REPO_ROOT.txt").write_text(str(repo_root), encoding="utf-8")
439
+ except OSError:
440
+ pass
441
+ return cli_cwd
442
+
443
+
444
+ def _prompt_inline_byte_limit(request: "LlmInvocationRequest") -> int:
445
+ configured = request.artifact_policy.get("prompt_inline_bytes_limit")
446
+ if configured is None:
447
+ return _PROMPT_INLINE_BYTES_LIMIT
448
+ try:
449
+ parsed = int(configured)
450
+ except (TypeError, ValueError):
451
+ return _PROMPT_INLINE_BYTES_LIMIT
452
+ return parsed if parsed > 0 else _PROMPT_INLINE_BYTES_LIMIT
453
+
454
+
455
+ def _prompt_section_inline_byte_limit(request: "LlmInvocationRequest") -> int:
456
+ configured = request.artifact_policy.get("prompt_section_inline_bytes_limit")
457
+ if configured is None:
458
+ return _PROMPT_SECTION_INLINE_BYTES_LIMIT
459
+ try:
460
+ parsed = int(configured)
461
+ except (TypeError, ValueError):
462
+ return _PROMPT_SECTION_INLINE_BYTES_LIMIT
463
+ return parsed if parsed > 0 else _PROMPT_SECTION_INLINE_BYTES_LIMIT
464
+
465
+
466
+ def _prompt_artifact_root(request: "LlmInvocationRequest") -> Path:
467
+ configured = request.artifact_policy.get("prompt_artifact_root")
468
+ if configured:
469
+ return Path(str(configured)).expanduser()
470
+ return request.repo_root / ".devflow" / _PROMPT_ARTIFACT_DIRNAME
471
+
472
+
473
+ def _prompt_artifact_directory(request: "LlmInvocationRequest") -> Path:
474
+ root = _prompt_artifact_root(request)
475
+ root.mkdir(parents=True, exist_ok=True)
476
+ prefix_base = re.sub(r"[^a-z0-9]+", "-", request.purpose.strip().lower()).strip("-")
477
+ if not prefix_base:
478
+ prefix_base = "llm-prompt"
479
+ prefix = f"{prefix_base[:_PROMPT_ARTIFACT_MAX_FILENAME_LENGTH]}-"
480
+ return Path(tempfile.mkdtemp(prefix=prefix, dir=root))
481
+
482
+
483
+ def _json_byte_size(value: Any) -> int:
484
+ return len(json.dumps(value, indent=2, sort_keys=True).encode("utf-8"))
485
+
486
+
487
+ def _artifact_reference(*, path: Path, key_path: str, value: Any) -> dict[str, Any]:
488
+ return {
489
+ "artifact_ref": {
490
+ "path": str(path),
491
+ "format": "json",
492
+ "key_path": key_path,
493
+ "bytes": _json_byte_size(value),
494
+ }
495
+ }
496
+
497
+
498
+ def _write_prompt_artifact(*, artifact_dir: Path, key_path: str, value: Any) -> Path:
499
+ slug_parts = [segment for segment in re.split(r"[^a-zA-Z0-9]+", key_path) if segment]
500
+ filename = "-".join(slug_parts)[:_PROMPT_ARTIFACT_MAX_FILENAME_LENGTH] or "payload"
501
+ path = artifact_dir / f"{filename}.json"
502
+ suffix = 1
503
+ while path.exists():
504
+ path = artifact_dir / f"{filename}-{suffix}.json"
505
+ suffix += 1
506
+ path.write_text(json.dumps(value, indent=2, sort_keys=True), encoding="utf-8")
507
+ return path
508
+
509
+
510
+ def _compact_prompt_mapping(
511
+ *,
512
+ mapping: dict[str, Any],
513
+ artifact_dir: Path,
514
+ inline_limit: int,
515
+ section_limit: int,
516
+ parent_key_path: str = "",
517
+ ) -> tuple[dict[str, Any], int]:
518
+ compacted: dict[str, Any] = {}
519
+ artifact_count = 0
520
+ for key, value in mapping.items():
521
+ key_path = f"{parent_key_path}.{key}" if parent_key_path else key
522
+ if isinstance(value, (dict, list, str)) and _json_byte_size(value) > section_limit:
523
+ artifact_path = _write_prompt_artifact(
524
+ artifact_dir=artifact_dir,
525
+ key_path=key_path,
526
+ value=value,
527
+ )
528
+ compacted[key] = _artifact_reference(path=artifact_path, key_path=key_path, value=value)
529
+ artifact_count += 1
530
+ continue
531
+ if isinstance(value, dict):
532
+ nested, nested_count = _compact_prompt_mapping(
533
+ mapping=value,
534
+ artifact_dir=artifact_dir,
535
+ inline_limit=inline_limit,
536
+ section_limit=section_limit,
537
+ parent_key_path=key_path,
538
+ )
539
+ if nested_count > 0 and _json_byte_size(nested) <= inline_limit:
540
+ compacted[key] = nested
541
+ artifact_count += nested_count
542
+ continue
543
+ compacted[key] = value
544
+ return compacted, artifact_count
545
+
546
+
547
+ def _load_cli_config_from_cfg(
548
+ cfg: dict[str, Any],
549
+ *,
550
+ provider_override: str | None = None,
551
+ model_override: str | None = None,
552
+ ) -> tuple[str, str, str]:
553
+ llm_mode = str(cfg.get("llm_mode") or "").strip().lower()
554
+ if llm_mode != "cli":
555
+ raise RuntimeError("LLM not configured for CLI mode.")
556
+ provider = (
557
+ _normalize_provider_name(provider_override)
558
+ or _normalize_provider_name(str(cfg.get("llm_provider") or "").strip())
559
+ or "cli"
560
+ )
561
+ base_cmd = str(cfg.get("llm_cli_base") or "").strip()
562
+ if not base_cmd:
563
+ raise RuntimeError("LLM CLI base command not set.")
564
+ delivery = str(cfg.get("llm_cli_delivery") or "argument").strip().lower()
565
+ base_cmd = normalize_llm_cli_base(base_cmd)
566
+ base_cmd = _apply_model_flag(base_cmd, model_override)
567
+ return provider, base_cmd, delivery
568
+
569
+
570
+ def _load_llm_cli_config() -> tuple[str, str]:
571
+ cfg = _load_global_llm_cfg()
572
+ _, base_cmd, delivery = _load_cli_config_from_cfg(cfg)
573
+ return base_cmd, delivery
574
+
575
+
576
+ _ANSI_CONTROL_SEQUENCE_RE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
577
+
578
+
579
+ def _strip_terminal_control_sequences(raw_text: str) -> str:
580
+ stripped = _ANSI_CONTROL_SEQUENCE_RE.sub("", raw_text)
581
+ return "".join(ch if ord(ch) >= 32 else " " for ch in stripped)
582
+
583
+
584
+ def _extract_json_payload(raw_text: str) -> Any | None:
585
+ candidate = _strip_terminal_control_sequences(raw_text).strip()
586
+ if not candidate:
587
+ return None
588
+ if "```" in candidate:
589
+ for chunk in candidate.split("```"):
590
+ stripped = chunk.strip()
591
+ if stripped.startswith("json"):
592
+ stripped = stripped[4:].strip()
593
+ if stripped.startswith("{") or stripped.startswith("["):
594
+ candidate = stripped
595
+ break
596
+ decoder = json.JSONDecoder()
597
+ start_indexes = [0] if candidate[:1] in "[{" else []
598
+ start_indexes.extend(index for index, char in enumerate(candidate) if char in "[{")
599
+ seen: set[int] = set()
600
+ for index in start_indexes:
601
+ if index in seen:
602
+ continue
603
+ seen.add(index)
604
+ try:
605
+ parsed, _ = decoder.raw_decode(candidate[index:])
606
+ return parsed
607
+ except json.JSONDecodeError:
608
+ continue
609
+ return None
610
+
611
+
612
+ @dataclass(frozen=True)
613
+ class _PreparedPrompt:
614
+ text: str
615
+ original_bytes: int
616
+ effective_bytes: int
617
+ was_compacted: bool = False
618
+ artifact_dir: Path | None = None
619
+ artifact_count: int = 0
620
+
621
+
622
+ def _prepare_request_prompt(request: "LlmInvocationRequest") -> _PreparedPrompt:
623
+ prompt = request.prompt
624
+ if prompt:
625
+ encoded = prompt.encode("utf-8")
626
+ return _PreparedPrompt(
627
+ text=prompt,
628
+ original_bytes=len(encoded),
629
+ effective_bytes=len(encoded),
630
+ )
631
+ if request.prompt_payload is None:
632
+ raise RuntimeError("LLM invocation prompt is required.")
633
+
634
+ prompt_payload = request.prompt_payload
635
+ original_bytes = _json_byte_size(prompt_payload)
636
+ inline_limit = _prompt_inline_byte_limit(request)
637
+ if original_bytes <= inline_limit:
638
+ rendered = json.dumps(prompt_payload, indent=2, sort_keys=True)
639
+ return _PreparedPrompt(
640
+ text=rendered,
641
+ original_bytes=original_bytes,
642
+ effective_bytes=len(rendered.encode("utf-8")),
643
+ )
644
+
645
+ artifact_dir = _prompt_artifact_directory(request)
646
+ compacted_payload, artifact_count = _compact_prompt_mapping(
647
+ mapping=dict(prompt_payload),
648
+ artifact_dir=artifact_dir,
649
+ inline_limit=inline_limit,
650
+ section_limit=_prompt_section_inline_byte_limit(request),
651
+ )
652
+ if artifact_count == 0:
653
+ payload_artifact = _write_prompt_artifact(
654
+ artifact_dir=artifact_dir,
655
+ key_path="prompt_payload",
656
+ value=prompt_payload,
657
+ )
658
+ compacted_payload = {
659
+ "_devflow_prompt_materialization": {
660
+ "instructions": (
661
+ "The original prompt payload exceeded inline prompt limits. "
662
+ "Read the referenced artifact file before completing the task."
663
+ ),
664
+ "original_prompt_bytes": original_bytes,
665
+ "inline_prompt_limit_bytes": inline_limit,
666
+ },
667
+ "prompt_payload": _artifact_reference(
668
+ path=payload_artifact,
669
+ key_path="prompt_payload",
670
+ value=prompt_payload,
671
+ ),
672
+ }
673
+ artifact_count = 1
674
+ else:
675
+ compacted_payload["_devflow_prompt_materialization"] = {
676
+ "instructions": (
677
+ "Large prompt sections were replaced with artifact references. "
678
+ "Read any referenced files from the workspace before completing the task."
679
+ ),
680
+ "original_prompt_bytes": original_bytes,
681
+ "inline_prompt_limit_bytes": inline_limit,
682
+ }
683
+ rendered = json.dumps(compacted_payload, indent=2, sort_keys=True)
684
+ return _PreparedPrompt(
685
+ text=rendered,
686
+ original_bytes=original_bytes,
687
+ effective_bytes=len(rendered.encode("utf-8")),
688
+ was_compacted=True,
689
+ artifact_dir=artifact_dir,
690
+ artifact_count=artifact_count,
691
+ )
692
+
693
+
694
+ @dataclass(frozen=True)
695
+ class LlmInvocationRequest:
696
+ purpose: str
697
+ repo_root: Path
698
+ prompt: str
699
+ delivery_model: DeliveryModel | None = None
700
+ interaction_model: InteractionModel | str | None = None
701
+ transport: LlmTransport | None = None
702
+ response_contract: ResponseContract | None = None
703
+ timeout_seconds: int | None = None
704
+ strength: str | None = None
705
+ prompt_payload: dict[str, Any] | None = None
706
+ event_context: dict[str, Any] = field(default_factory=dict)
707
+ journal_context: dict[str, Any] = field(default_factory=dict)
708
+ artifact_policy: dict[str, Any] = field(default_factory=dict)
709
+ base_cmd: str | None = None
710
+ delivery: str | None = None
711
+ provider: str | None = None
712
+ model: str | None = None
713
+
714
+
715
+ @dataclass(frozen=True)
716
+ class LlmInvocationResult:
717
+ ok: bool
718
+ status: Literal["ok", "error"]
719
+ returncode: int
720
+ stdout: str
721
+ stderr: str
722
+ purpose: str
723
+ repo_root: Path
724
+ model: str | None
725
+ transport: LlmTransport = "cli"
726
+ delivery_model: DeliveryModel = "final_only"
727
+ interaction_model: InteractionModel = "request_response"
728
+ provider: str = "cli"
729
+ base_cmd: str = ""
730
+ delivery: str = ""
731
+ response_contract: ResponseContract | None = None
732
+ parsed_json: Any | None = None
733
+ contract_ok: bool | None = None
734
+ contract_error: str | None = None
735
+ session_id: str | None = None
736
+ log_path: Path | None = None
737
+ prompt_characters: int = 0
738
+ prompt_bytes: int = 0
739
+ effective_prompt_bytes: int = 0
740
+ prompt_was_compacted: bool = False
741
+ prompt_artifact_dir: Path | None = None
742
+ prompt_artifact_count: int = 0
743
+ cli_cwd: Path | None = None
744
+ used_sanitized_cli_cwd: bool = False
745
+
746
+
747
+ @dataclass(frozen=True)
748
+ class _ResolvedInvocation:
749
+ transport: LlmTransport
750
+ delivery_model: DeliveryModel
751
+ interaction_model: InteractionModel
752
+ provider: str
753
+ base_cmd: str
754
+ delivery: str
755
+ model: str | None
756
+
757
+
758
+ def _resolve_requested_transport(request: LlmInvocationRequest) -> LlmTransport:
759
+ if request.transport is not None:
760
+ return request.transport
761
+ cfg = _load_global_llm_cfg()
762
+ # Honour tier config when strength is provided (or default to "medium").
763
+ strength = _normalize_strength(request.strength) or "medium"
764
+ tier = _tier_config(strength, cfg)
765
+ if tier:
766
+ mode = str(tier.get("mode") or "").strip().lower()
767
+ llm_mode = str(cfg.get("llm_mode") or "").strip().lower()
768
+ if (
769
+ mode == "api"
770
+ and llm_mode != "api"
771
+ and any(str(tier.get(key) or "").strip() for key in ("base_cmd", "cliProfile", "cliFamily"))
772
+ ):
773
+ return "cli"
774
+ if mode in ("api", "cli"):
775
+ return mode
776
+ return "cli"
777
+
778
+
779
+ def _resolve_requested_delivery_model(request: LlmInvocationRequest) -> DeliveryModel:
780
+ if request.delivery_model is not None:
781
+ return request.delivery_model
782
+ return "final_only"
783
+
784
+
785
+ def _resolve_requested_interaction_model(request: LlmInvocationRequest) -> InteractionModel:
786
+ normalized = _normalize_interaction_model(request.interaction_model)
787
+ if normalized is not None:
788
+ return normalized
789
+ return "request_response"
790
+
791
+
792
+
793
+ def _resolve_api_model_from_cfg(
794
+ cfg: dict[str, Any],
795
+ *,
796
+ strength_override: str | None,
797
+ ) -> str | None:
798
+ """Resolve API model from the tier config for the given strength using
799
+ the TOML tier shape (apiModel for API tiers, cliProfile for CLI)."""
800
+ strength = strength_override or "medium"
801
+ tier_cfg = _tier_config(strength, cfg)
802
+ if tier_cfg:
803
+ model = str(tier_cfg.get("apiModel") or tier_cfg.get("model") or "").strip()
804
+ if model:
805
+ return model
806
+ # No tier configured — no default for API in this path.
807
+ return None
808
+
809
+
810
+ def _resolve_default_api_model(provider: str | None) -> str | None:
811
+ normalized_provider = _normalize_provider_name(provider)
812
+ if normalized_provider == "anthropic":
813
+ return ANTHROPIC_MODEL_DEFAULT
814
+ if normalized_provider == "openai":
815
+ return OPENAI_MODEL_DEFAULT
816
+ if normalized_provider == "google":
817
+ return GOOGLE_MODEL_DEFAULT
818
+ if normalized_provider == "ollama":
819
+ return OLLAMA_MODEL_DEFAULT
820
+ return None
821
+
822
+
823
+ def _resolve_api_invocation_from_global_cfg(
824
+ cfg: dict[str, Any],
825
+ *,
826
+ request: "LlmInvocationRequest",
827
+ transport: LlmTransport,
828
+ delivery_model: DeliveryModel,
829
+ interaction_model: InteractionModel,
830
+ ) -> _ResolvedInvocation | None:
831
+ llm_mode = str(cfg.get("llm_mode") or "").strip().lower()
832
+ if llm_mode != "api":
833
+ return None
834
+ provider = _normalize_provider_name(request.provider) or _normalize_provider_name(str(cfg.get("llm_provider") or ""))
835
+ resolved_model = request.model or _resolve_api_model_from_cfg(
836
+ cfg,
837
+ strength_override=request.strength,
838
+ )
839
+ normalized_strength = _normalize_api_tier(request.strength)
840
+ if normalized_strength == "light":
841
+ canonical_light = resolve_api_tier_request_overrides(tier="light", cfg=cfg)
842
+ provider = (
843
+ _normalize_provider_name(request.provider)
844
+ or _normalize_provider_name(str(canonical_light.get("provider") or ""))
845
+ or provider
846
+ )
847
+ resolved_model = request.model or str(canonical_light.get("model") or "").strip() or resolved_model
848
+ # Only providers that support streaming can use that delivery model via the API path.
849
+ # Others must use final_only or the request's fallback.
850
+ if delivery_model == "streaming" and provider not in {"openai", "anthropic", "google"}:
851
+ delivery_model = "final_only"
852
+ return _ResolvedInvocation(
853
+ transport=transport,
854
+ delivery_model=delivery_model,
855
+ interaction_model=interaction_model,
856
+ provider=provider or "provider_api",
857
+ base_cmd="",
858
+ delivery="",
859
+ model=resolved_model,
860
+ )
861
+
862
+
863
+ def _validate_request(request: "LlmInvocationRequest") -> None:
864
+ if not str(request.purpose).strip():
865
+ raise RuntimeError("LLM invocation purpose is required.")
866
+ if not isinstance(request.repo_root, Path):
867
+ raise RuntimeError("LLM invocation repo_root must be a pathlib.Path.")
868
+ if request.delivery_model is not None and request.delivery_model not in _SUPPORTED_DELIVERY_MODELS:
869
+ raise RuntimeError(f"Unsupported LLM delivery_model: {request.delivery_model!r}")
870
+ normalized_interaction_model = _normalize_interaction_model(request.interaction_model)
871
+ if request.interaction_model is not None and normalized_interaction_model is None:
872
+ raise RuntimeError(
873
+ f"Unsupported LLM interaction_model: {request.interaction_model!r}. "
874
+ "Supported values: request_response, agentic."
875
+ )
876
+ if request.transport is not None and request.transport not in _SUPPORTED_TRANSPORTS:
877
+ raise RuntimeError(f"Unsupported LLM transport: {request.transport!r}")
878
+ normalized_provider = _normalize_provider_name(request.provider)
879
+ if normalized_provider is not None and normalized_provider not in _SUPPORTED_PROVIDER_FAMILIES:
880
+ raise RuntimeError(
881
+ f"Unsupported LLM provider: {request.provider!r}. "
882
+ "Supported explicit providers: anthropic, openai, google, minimax, ollama."
883
+ )
884
+ if request.response_contract is not None and request.response_contract not in _SUPPORTED_RESPONSE_CONTRACTS:
885
+ raise RuntimeError(f"Unsupported LLM response_contract: {request.response_contract!r}")
886
+ if request.timeout_seconds is not None and request.timeout_seconds <= 0:
887
+ raise RuntimeError("LLM invocation timeout_seconds must be positive when provided.")
888
+ if request.strength is not None and _normalize_strength(request.strength) not in _SUPPORTED_STRENGTHS:
889
+ raise RuntimeError(f"Unsupported LLM strength override: {request.strength!r}")
890
+ if request.model is not None and not str(request.model).strip():
891
+ raise RuntimeError("LLM invocation model must be non-empty when provided.")
892
+ if request.base_cmd is not None or request.delivery is not None:
893
+ if not request.base_cmd or not request.delivery:
894
+ raise RuntimeError("Explicit CLI invocation requires both base_cmd and delivery.")
895
+ if request.delivery not in _SUPPORTED_DELIVERIES:
896
+ raise RuntimeError(f"Unsupported LLM delivery: {request.delivery!r}")
897
+ normalized_prompt = request.prompt
898
+ if not normalized_prompt and request.prompt_payload is not None:
899
+ normalized_prompt = json.dumps(request.prompt_payload, indent=2, sort_keys=True)
900
+ if not normalized_prompt.strip():
901
+ raise RuntimeError("LLM invocation prompt is required.")
902
+
903
+
904
+ def _provider_to_harness(provider: str) -> str:
905
+ """Derive the CLI harness from the provider for CLI-mode tiers."""
906
+ return {
907
+ "anthropic": "claude",
908
+ "openai": "codex exec",
909
+ "codex": "codex exec",
910
+ "minimax": "pi",
911
+ "google": "pi",
912
+ "ollama": "ollama",
913
+ }.get(provider, provider)
914
+
915
+
916
+ def _cli_family_to_provider_and_harness(cli_family: str | None) -> tuple[str | None, str | None]:
917
+ normalized = str(cli_family or "").strip().lower().replace("-", "_")
918
+ if not normalized:
919
+ return None, None
920
+ mapping = {
921
+ "claude": ("anthropic", "claude"),
922
+ "claude_code": ("anthropic", "claude"),
923
+ "codex": ("openai", "codex exec"),
924
+ "openai": ("openai", "codex exec"),
925
+ "pi": ("google", "pi"),
926
+ "gemini": ("google", "pi"),
927
+ "gemini_cli": ("google", "pi"),
928
+ "ollama": ("ollama", "ollama"),
929
+ }
930
+ return mapping.get(normalized, (None, None))
931
+
932
+
933
+ def _resolve_cli_from_tier(
934
+ tier_cfg: dict[str, Any],
935
+ provider_override: str | None,
936
+ model_override: str | None,
937
+ ) -> tuple[str, str, str]:
938
+ """Extract CLI (provider, base_cmd, delivery) from a tier config dict.
939
+
940
+ TOML tier keys: cliFamily, cliProfile, apiProvider, base_cmd, delivery.
941
+ When a CLI family is configured, it is the source of truth for the CLI
942
+ harness/provider pair; apiProvider remains the API transport provider.
943
+ """
944
+ family_provider, _ = _cli_family_to_provider_and_harness(str(tier_cfg.get("cliFamily") or "").strip())
945
+ provider = _normalize_provider_name(provider_override) or family_provider or _normalize_provider_name(
946
+ str(tier_cfg.get("apiProvider") or tier_cfg.get("provider") or "").strip()
947
+ ) or "cli"
948
+ model = model_override or str(tier_cfg.get("cliProfile") or tier_cfg.get("model") or "").strip() or None
949
+ base_cmd = str(tier_cfg.get("base_cmd") or "").strip()
950
+ delivery = str(tier_cfg.get("delivery") or "argument").strip().lower()
951
+ if base_cmd:
952
+ base_cmd = normalize_llm_cli_base(base_cmd)
953
+ base_cmd = _apply_model_flag(base_cmd, model)
954
+ return provider, base_cmd, delivery
955
+
956
+
957
+ def _resolve_invocation(request: LlmInvocationRequest) -> _ResolvedInvocation:
958
+ if request.strength is not None:
959
+ # Log explicit strength usage for audit trail.
960
+ warnings.warn(
961
+ f"[invoke_llm] purpose={request.purpose!r} strength={request.strength!r}",
962
+ UserWarning,
963
+ stacklevel=2,
964
+ )
965
+ transport = _resolve_requested_transport(request)
966
+ delivery_model = _resolve_requested_delivery_model(request)
967
+ interaction_model = _resolve_requested_interaction_model(request)
968
+
969
+ if request.base_cmd is not None or request.delivery is not None:
970
+ base_cmd = normalize_llm_cli_base(request.base_cmd)
971
+ delivery = request.delivery
972
+ provider = _normalize_provider_name(request.provider) or _infer_provider_from_base_cmd(base_cmd) or shlex.split(base_cmd)[0].strip().lower()
973
+ base_cmd = _apply_model_flag(base_cmd, request.model)
974
+ return _ResolvedInvocation(
975
+ transport=transport,
976
+ delivery_model=delivery_model,
977
+ interaction_model=interaction_model,
978
+ provider=provider,
979
+ base_cmd=base_cmd,
980
+ delivery=delivery,
981
+ model=request.model or _extract_model_flag(base_cmd),
982
+ )
983
+
984
+ cfg = _load_global_llm_cfg()
985
+ strength = request.strength or "medium"
986
+ tier_cfg = _tier_config(strength, cfg)
987
+ if tier_cfg is None:
988
+ provider, base_cmd, delivery = _load_cli_config_from_cfg(
989
+ cfg,
990
+ provider_override=request.provider,
991
+ model_override=request.model,
992
+ )
993
+ return _ResolvedInvocation(
994
+ transport="cli",
995
+ delivery_model=delivery_model,
996
+ interaction_model=interaction_model,
997
+ provider=provider,
998
+ base_cmd=base_cmd,
999
+ delivery=delivery,
1000
+ model=request.model or _extract_model_flag(base_cmd),
1001
+ )
1002
+
1003
+ tier_mode = str(tier_cfg.get("mode") or tier_cfg.get("transport") or "cli").strip().lower()
1004
+ if transport == "api":
1005
+ if tier_mode != "api":
1006
+ raise RuntimeError(
1007
+ f"transport='api' requested but no tier configuration provides apiMode for strength={strength!r}. "
1008
+ f"Set tier.{strength}.mode='api' and configure apiProvider + apiModel in the tier config."
1009
+ )
1010
+ tier_provider = _normalize_provider_name(
1011
+ str(tier_cfg.get("apiProvider") or tier_cfg.get("provider") or "").strip()
1012
+ )
1013
+ tier_model = str(tier_cfg.get("apiModel") or tier_cfg.get("model") or "").strip() or None
1014
+ return _ResolvedInvocation(
1015
+ transport="api",
1016
+ delivery_model=delivery_model,
1017
+ interaction_model=interaction_model,
1018
+ provider=tier_provider or "cli",
1019
+ base_cmd="",
1020
+ delivery="",
1021
+ model=request.model or tier_model,
1022
+ )
1023
+
1024
+ provider, base_cmd, delivery = _resolve_cli_from_tier(
1025
+ tier_cfg,
1026
+ provider_override=request.provider,
1027
+ model_override=request.model,
1028
+ )
1029
+ if not base_cmd:
1030
+ cli_profile = str(tier_cfg.get("cliProfile") or "").strip()
1031
+ cli_family = str(tier_cfg.get("cliFamily") or "").strip()
1032
+ family_provider, family_harness = _cli_family_to_provider_and_harness(cli_family)
1033
+ harness_provider = (
1034
+ _normalize_provider_name(request.provider)
1035
+ or family_provider
1036
+ or _normalize_provider_name(str(tier_cfg.get("apiProvider") or provider or "").strip())
1037
+ or provider
1038
+ )
1039
+ harness = family_harness or _provider_to_harness(harness_provider)
1040
+ if cli_profile:
1041
+ # Tier has a cliProfile but no explicit base_cmd — construct from
1042
+ # the configured CLI family when present.
1043
+ if harness == "pi":
1044
+ constructed = f"pi --provider {harness_provider}"
1045
+ resolved_model = request.model or cli_profile
1046
+ constructed = _apply_model_flag(constructed, resolved_model)
1047
+ base_cmd = constructed
1048
+ delivery = "argument"
1049
+ else:
1050
+ constructed = normalize_llm_cli_base(harness)
1051
+ resolved_model = request.model or cli_profile
1052
+ constructed = _apply_model_flag(constructed, resolved_model)
1053
+ base_cmd = constructed
1054
+ delivery = "argument"
1055
+ else:
1056
+ raise RuntimeError(
1057
+ f"Tier {strength!r} is configured but specifies neither base_cmd nor cliProfile "
1058
+ f"— cannot construct a CLI invocation. Configure the tier's cliFamily/cliProfile."
1059
+ )
1060
+ return _ResolvedInvocation(
1061
+ transport="cli",
1062
+ delivery_model=delivery_model,
1063
+ interaction_model=interaction_model,
1064
+ provider=provider,
1065
+ base_cmd=base_cmd,
1066
+ delivery=delivery,
1067
+ model=request.model or _extract_model_flag(base_cmd),
1068
+ )
1069
+
1070
+
1071
+ def _evaluate_response_contract(
1072
+ *,
1073
+ response_contract: ResponseContract | None,
1074
+ stdout: str,
1075
+ ) -> tuple[Any | None, bool | None, str | None]:
1076
+ parsed_json = None
1077
+ contract_ok: bool | None = None
1078
+ contract_error: str | None = None
1079
+ if response_contract == "json_only":
1080
+ parsed_json = _extract_json_payload(stdout)
1081
+ contract_ok = parsed_json is not None
1082
+ if not contract_ok:
1083
+ contract_error = "Expected a JSON object or array response."
1084
+ elif response_contract == "text":
1085
+ contract_ok = True
1086
+ return parsed_json, contract_ok, contract_error
1087
+
1088
+
1089
+ def _invoke_provider_api(
1090
+ *,
1091
+ resolved: _ResolvedInvocation,
1092
+ prompt: str,
1093
+ repo_root: Path,
1094
+ timeout_seconds: int | None,
1095
+ response_contract: ResponseContract | None,
1096
+ ) -> tuple[bool, int, str, str]:
1097
+ if resolved.delivery_model == "streaming":
1098
+ raise NotImplementedError(
1099
+ "invoke_llm API transport does not support delivery_model='streaming' yet. "
1100
+ "Implemented API subset: Anthropic-style, OpenAI, and Google one-shot execution only."
1101
+ )
1102
+ api_shape = canonical_provider_api_shape(resolved.provider)
1103
+ try:
1104
+ if api_shape == "anthropic":
1105
+ if resolved.provider != "anthropic":
1106
+ raise NotImplementedError(
1107
+ f"Provider {resolved.provider!r} is recognized as using the Anthropic-style API shape, "
1108
+ "but its provider-specific HTTP wiring is not implemented yet."
1109
+ )
1110
+ settings = load_anthropic_api_settings(
1111
+ repo_root=repo_root,
1112
+ model=resolved.model,
1113
+ )
1114
+ response = anthropic_messages_create(
1115
+ settings=settings,
1116
+ system_prompt="",
1117
+ messages=[{"role": "user", "content": prompt}],
1118
+ tools=[],
1119
+ timeout_seconds=timeout_seconds,
1120
+ )
1121
+ stdout = anthropic_response_text(response)
1122
+ elif api_shape == "openai":
1123
+ settings = load_openai_api_settings(
1124
+ repo_root=repo_root,
1125
+ provider=resolved.provider,
1126
+ model=resolved.model,
1127
+ )
1128
+ response = openai_responses_create(
1129
+ settings=settings,
1130
+ prompt=prompt,
1131
+ timeout_seconds=timeout_seconds,
1132
+ response_contract=response_contract,
1133
+ )
1134
+ stdout = openai_response_text(response)
1135
+ elif api_shape == "google":
1136
+ settings = load_google_api_settings(
1137
+ repo_root=repo_root,
1138
+ model=resolved.model,
1139
+ )
1140
+ response = google_generate_content(
1141
+ settings=settings,
1142
+ prompt=prompt,
1143
+ timeout_seconds=timeout_seconds,
1144
+ )
1145
+ stdout = google_response_text(response)
1146
+ else:
1147
+ raise NotImplementedError(
1148
+ "invoke_llm API transport one-shot mode is implemented for providers "
1149
+ "using the Anthropic, OpenAI, and Google canonical API shapes. "
1150
+ f"Received provider={resolved.provider!r}."
1151
+ )
1152
+ except RuntimeError as exc:
1153
+ return False, 1, "", str(exc)
1154
+ if not stdout:
1155
+ stdout = json.dumps(response, indent=2, sort_keys=True)
1156
+ return True, 0, stdout, ""
1157
+
1158
+
1159
+ # Patterns that indicate a retryable rate-limit / resource-exhausted error.
1160
+ # Order: most specific first.
1161
+ _RATE_LIMIT_PATTERNS = (
1162
+ "RESOURCE_EXHAUSTED",
1163
+ "429",
1164
+ "rate limit",
1165
+ "too many requests",
1166
+ "service unavailable",
1167
+ "overloaded",
1168
+ )
1169
+
1170
+
1171
+ def _is_rate_limit_failure(stderr: str, stdout: str) -> bool:
1172
+ combined = (stderr + "\n" + stdout).lower()
1173
+ return any(pat.lower() in combined for pat in _RATE_LIMIT_PATTERNS)
1174
+
1175
+
1176
+ def _sleep_for(attempt: int) -> None:
1177
+ """Exponential-ish backoff: 1, 2, 4, 10 seconds."""
1178
+ backoffs = (1, 2, 4, 10)
1179
+ import time as _time
1180
+ _time.sleep(backoffs[min(attempt, len(backoffs) - 1)])
1181
+
1182
+
1183
+ def invoke_llm(request: LlmInvocationRequest) -> LlmInvocationResult:
1184
+ _validate_request(request)
1185
+ prepared_prompt = _prepare_request_prompt(request)
1186
+ prompt = prepared_prompt.text
1187
+ resolved = _resolve_invocation(request)
1188
+
1189
+ session_id: str | None = None
1190
+ log_path: Path | None = None
1191
+ prompt_characters = len(prompt)
1192
+ prompt_bytes = prepared_prompt.original_bytes
1193
+ cli_cwd = _resolve_cli_cwd(
1194
+ repo_root=request.repo_root,
1195
+ provider=resolved.provider,
1196
+ base_cmd=resolved.base_cmd,
1197
+ ) if resolved.transport == "cli" else request.repo_root
1198
+ used_sanitized_cli_cwd = resolved.transport == "cli" and cli_cwd != request.repo_root
1199
+ if resolved.transport == "api":
1200
+ ok, returncode, stdout, stderr = _invoke_provider_api(
1201
+ resolved=resolved,
1202
+ prompt=prompt,
1203
+ repo_root=request.repo_root,
1204
+ timeout_seconds=request.timeout_seconds,
1205
+ response_contract=request.response_contract,
1206
+ )
1207
+ elif resolved.delivery_model == "streaming":
1208
+ streaming = run_streaming(
1209
+ provider=resolved.provider,
1210
+ base_cmd=resolved.base_cmd,
1211
+ delivery=resolved.delivery,
1212
+ prompt=prompt,
1213
+ cwd=cli_cwd,
1214
+ timeout_s=request.timeout_seconds,
1215
+ )
1216
+ ok = streaming.ok
1217
+ returncode = streaming.returncode
1218
+ stdout = streaming.stdout
1219
+ stderr = streaming.stderr
1220
+ session_id = streaming.session_id
1221
+ log_path = streaming.log_path
1222
+
1223
+ # Retry on rate-limit failures with exponential backoff.
1224
+ if not ok and _is_rate_limit_failure(stderr, stdout):
1225
+ backoff_labels = ("1s", "2s", "4s", "10s")
1226
+ retry_messages: list[str] = []
1227
+ for attempt in range(4):
1228
+ _sleep_for(attempt)
1229
+ retry_messages.append(f"[rate-limit retry {attempt + 1}/{4} after {backoff_labels[attempt]}]")
1230
+ streaming = run_streaming(
1231
+ provider=resolved.provider,
1232
+ base_cmd=resolved.base_cmd,
1233
+ delivery=resolved.delivery,
1234
+ prompt=prompt,
1235
+ cwd=cli_cwd,
1236
+ timeout_s=request.timeout_seconds,
1237
+ )
1238
+ if streaming.ok:
1239
+ ok = True
1240
+ returncode = streaming.returncode
1241
+ stdout = streaming.stdout
1242
+ stderr = streaming.stderr
1243
+ session_id = streaming.session_id
1244
+ log_path = streaming.log_path
1245
+ break
1246
+ retry_messages.append(f" still failing (rc={streaming.returncode}): {streaming.stderr[:120]!r}")
1247
+ if not ok:
1248
+ retry_detail = "\n".join(retry_messages)
1249
+ stderr = f"{retry_detail}\n{stderr}"
1250
+ else:
1251
+ one_shot = run_one_shot(
1252
+ base_cmd=resolved.base_cmd,
1253
+ delivery=resolved.delivery,
1254
+ prompt=prompt,
1255
+ cwd=cli_cwd,
1256
+ timeout_seconds=request.timeout_seconds,
1257
+ provider=resolved.provider,
1258
+ )
1259
+ ok = one_shot.ok
1260
+ returncode = one_shot.returncode
1261
+ stdout = one_shot.stdout
1262
+ stderr = one_shot.stderr
1263
+
1264
+ # Retry on rate-limit failures with exponential backoff.
1265
+ if not ok and _is_rate_limit_failure(stderr, stdout):
1266
+ backoff_labels = ("1s", "2s", "4s", "10s")
1267
+ retry_messages: list[str] = []
1268
+ for attempt in range(4):
1269
+ _sleep_for(attempt)
1270
+ retry_messages.append(f"[rate-limit retry {attempt + 1}/{4} after {backoff_labels[attempt]}]")
1271
+ one_shot = run_one_shot(
1272
+ base_cmd=resolved.base_cmd,
1273
+ delivery=resolved.delivery,
1274
+ prompt=prompt,
1275
+ cwd=cli_cwd,
1276
+ timeout_seconds=request.timeout_seconds,
1277
+ provider=resolved.provider,
1278
+ )
1279
+ if one_shot.ok:
1280
+ ok = True
1281
+ returncode = one_shot.returncode
1282
+ stdout = one_shot.stdout
1283
+ stderr = one_shot.stderr
1284
+ break
1285
+ retry_messages.append(f" still failing (rc={one_shot.returncode}): {one_shot.stderr[:120]!r}")
1286
+ if not ok:
1287
+ retry_detail = "\n".join(retry_messages)
1288
+ stderr = f"{retry_detail}\n{stderr}"
1289
+ parsed_json, contract_ok, contract_error = _evaluate_response_contract(
1290
+ response_contract=request.response_contract,
1291
+ stdout=stdout,
1292
+ )
1293
+ return LlmInvocationResult(
1294
+ ok=ok,
1295
+ status="ok" if ok else "error",
1296
+ returncode=returncode,
1297
+ stdout=stdout,
1298
+ stderr=stderr,
1299
+ transport=resolved.transport,
1300
+ delivery_model=resolved.delivery_model,
1301
+ interaction_model=resolved.interaction_model,
1302
+ provider=resolved.provider,
1303
+ purpose=request.purpose,
1304
+ repo_root=request.repo_root,
1305
+ base_cmd=resolved.base_cmd,
1306
+ delivery=resolved.delivery,
1307
+ model=resolved.model,
1308
+ response_contract=request.response_contract,
1309
+ parsed_json=parsed_json,
1310
+ contract_ok=contract_ok,
1311
+ contract_error=contract_error,
1312
+ session_id=session_id,
1313
+ log_path=log_path,
1314
+ prompt_characters=prompt_characters,
1315
+ prompt_bytes=prompt_bytes,
1316
+ effective_prompt_bytes=prepared_prompt.effective_bytes,
1317
+ prompt_was_compacted=prepared_prompt.was_compacted,
1318
+ prompt_artifact_dir=prepared_prompt.artifact_dir,
1319
+ prompt_artifact_count=prepared_prompt.artifact_count,
1320
+ cli_cwd=cli_cwd if resolved.transport == "cli" else None,
1321
+ used_sanitized_cli_cwd=used_sanitized_cli_cwd,
1322
+ )