devflow-engine 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. devflow_engine/__init__.py +3 -0
  2. devflow_engine/agentic_prompts.py +100 -0
  3. devflow_engine/agentic_runtime.py +398 -0
  4. devflow_engine/api_key_flow_harness.py +539 -0
  5. devflow_engine/api_keys.py +357 -0
  6. devflow_engine/bootstrap/__init__.py +2 -0
  7. devflow_engine/bootstrap/provision_from_template.py +84 -0
  8. devflow_engine/cli/__init__.py +0 -0
  9. devflow_engine/cli/app.py +7270 -0
  10. devflow_engine/core/__init__.py +0 -0
  11. devflow_engine/core/config.py +86 -0
  12. devflow_engine/core/logging.py +29 -0
  13. devflow_engine/core/paths.py +45 -0
  14. devflow_engine/core/toml_kv.py +33 -0
  15. devflow_engine/devflow_event_worker.py +1292 -0
  16. devflow_engine/devflow_state.py +201 -0
  17. devflow_engine/devin2/__init__.py +9 -0
  18. devflow_engine/devin2/agent_definition.py +120 -0
  19. devflow_engine/devin2/pi_runner.py +204 -0
  20. devflow_engine/devin_orchestration.py +69 -0
  21. devflow_engine/docs/prompts/anti-patterns.md +42 -0
  22. devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
  23. devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
  24. devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
  25. devflow_engine/doctor/__init__.py +2 -0
  26. devflow_engine/doctor/triage.py +140 -0
  27. devflow_engine/error/__init__.py +0 -0
  28. devflow_engine/error/remediation.py +21 -0
  29. devflow_engine/errors/error_solver_dag.py +522 -0
  30. devflow_engine/errors/runtime_observability.py +67 -0
  31. devflow_engine/idea/__init__.py +4 -0
  32. devflow_engine/idea/actors.py +481 -0
  33. devflow_engine/idea/agentic.py +465 -0
  34. devflow_engine/idea/analyze.py +93 -0
  35. devflow_engine/idea/devin_chat_dag.py +1 -0
  36. devflow_engine/idea/diff.py +99 -0
  37. devflow_engine/idea/drafts.py +446 -0
  38. devflow_engine/idea/idea_creation_dag.py +643 -0
  39. devflow_engine/idea/ideation_enrichment.py +355 -0
  40. devflow_engine/idea/ideation_enrichment_worker.py +19 -0
  41. devflow_engine/idea/paths.py +28 -0
  42. devflow_engine/idea/promote.py +53 -0
  43. devflow_engine/idea/redaction.py +27 -0
  44. devflow_engine/idea/repo_tools.py +1277 -0
  45. devflow_engine/idea/response_mode.py +30 -0
  46. devflow_engine/idea/story_pipeline.py +1585 -0
  47. devflow_engine/idea/sufficiency.py +376 -0
  48. devflow_engine/idea/traditional_stories.py +1257 -0
  49. devflow_engine/implementation/__init__.py +0 -0
  50. devflow_engine/implementation/alembic_preflight.py +700 -0
  51. devflow_engine/implementation/dag.py +8450 -0
  52. devflow_engine/implementation/green_gate.py +93 -0
  53. devflow_engine/implementation/prompts.py +108 -0
  54. devflow_engine/implementation/test_runtime.py +623 -0
  55. devflow_engine/integration/__init__.py +19 -0
  56. devflow_engine/integration/agentic.py +66 -0
  57. devflow_engine/integration/dag.py +3539 -0
  58. devflow_engine/integration/prompts.py +114 -0
  59. devflow_engine/integration/supabase_schema.sql +31 -0
  60. devflow_engine/integration/supabase_sync.py +177 -0
  61. devflow_engine/llm/__init__.py +1 -0
  62. devflow_engine/llm/cli_one_shot.py +84 -0
  63. devflow_engine/llm/cli_stream.py +371 -0
  64. devflow_engine/llm/execution_context.py +26 -0
  65. devflow_engine/llm/invoke.py +1322 -0
  66. devflow_engine/llm/provider_api.py +304 -0
  67. devflow_engine/llm/repo_knowledge.py +588 -0
  68. devflow_engine/llm_primitives.py +315 -0
  69. devflow_engine/orchestration.py +62 -0
  70. devflow_engine/planning/__init__.py +0 -0
  71. devflow_engine/planning/analyze_repo.py +92 -0
  72. devflow_engine/planning/render_drafts.py +133 -0
  73. devflow_engine/playground/__init__.py +0 -0
  74. devflow_engine/playground/hooks.py +26 -0
  75. devflow_engine/playwright_workflow/__init__.py +5 -0
  76. devflow_engine/playwright_workflow/dag.py +1317 -0
  77. devflow_engine/process/__init__.py +5 -0
  78. devflow_engine/process/dag.py +59 -0
  79. devflow_engine/project_registration/__init__.py +3 -0
  80. devflow_engine/project_registration/dag.py +1581 -0
  81. devflow_engine/project_registry.py +109 -0
  82. devflow_engine/prompts/devin/generic/prompt.md +6 -0
  83. devflow_engine/prompts/devin/ideation/prompt.md +263 -0
  84. devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
  85. devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
  86. devflow_engine/prompts/devin/insight/prompt.md +11 -0
  87. devflow_engine/prompts/devin/insight/scenarios.md +5 -0
  88. devflow_engine/prompts/devin/intake/prompt.md +15 -0
  89. devflow_engine/prompts/devin/iterate/prompt.md +12 -0
  90. devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
  91. devflow_engine/prompts/devin/shared/principles.md +246 -0
  92. devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
  93. devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
  94. devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
  95. devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
  96. devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
  97. devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
  98. devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
  99. devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
  100. devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
  101. devflow_engine/prompts/implementation/red/prompt.md +27 -0
  102. devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
  103. devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
  104. devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
  105. devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
  106. devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
  107. devflow_engine/prompts/integration/README.md +185 -0
  108. devflow_engine/prompts/integration/green/example.md +67 -0
  109. devflow_engine/prompts/integration/green/green/prompt.md +10 -0
  110. devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
  111. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
  112. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
  113. devflow_engine/prompts/integration/green_enrich/example.md +79 -0
  114. devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
  115. devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
  116. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
  117. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  118. devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
  119. devflow_engine/prompts/integration/red/example.md +152 -0
  120. devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
  121. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  122. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
  123. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
  124. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
  125. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  126. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
  127. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
  128. devflow_engine/prompts/integration/red/red/prompt.md +11 -0
  129. devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
  130. devflow_engine/prompts/integration/red_review/example.md +71 -0
  131. devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
  132. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  133. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
  134. devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
  135. devflow_engine/prompts/integration/resolve/example.md +111 -0
  136. devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
  137. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
  138. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
  139. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
  140. devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
  141. devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
  142. devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
  143. devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
  144. devflow_engine/prompts/integration/validate/example.md +143 -0
  145. devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
  146. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  147. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
  148. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
  149. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
  150. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  151. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
  152. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
  153. devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
  154. devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
  155. devflow_engine/prompts/integration/write_workflows/example.md +100 -0
  156. devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
  157. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
  158. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
  159. devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
  160. devflow_engine/prompts/iterate/README.md +7 -0
  161. devflow_engine/prompts/iterate/coder/prompt.md +11 -0
  162. devflow_engine/prompts/iterate/framer/prompt.md +11 -0
  163. devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
  164. devflow_engine/prompts/iterate/observer/prompt.md +11 -0
  165. devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
  166. devflow_engine/prompts/recovery/execution/prompt.md +8 -0
  167. devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
  168. devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
  169. devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
  170. devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
  171. devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
  172. devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
  173. devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
  174. devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
  175. devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
  176. devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
  177. devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
  178. devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
  179. devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
  180. devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
  181. devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
  182. devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
  183. devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
  184. devflow_engine/recovery/__init__.py +3 -0
  185. devflow_engine/recovery/dag.py +2609 -0
  186. devflow_engine/recovery/models.py +220 -0
  187. devflow_engine/refactor.py +93 -0
  188. devflow_engine/registry/__init__.py +1 -0
  189. devflow_engine/registry/cards.py +238 -0
  190. devflow_engine/registry/domain_normalize.py +60 -0
  191. devflow_engine/registry/effects.py +65 -0
  192. devflow_engine/registry/enforce_report.py +150 -0
  193. devflow_engine/registry/module_cards_classify.py +164 -0
  194. devflow_engine/registry/module_cards_draft.py +184 -0
  195. devflow_engine/registry/module_cards_gate.py +59 -0
  196. devflow_engine/registry/packages.py +347 -0
  197. devflow_engine/registry/pathways.py +323 -0
  198. devflow_engine/review/__init__.py +11 -0
  199. devflow_engine/review/dag.py +588 -0
  200. devflow_engine/review/review_story.py +67 -0
  201. devflow_engine/scope_idea/__init__.py +3 -0
  202. devflow_engine/scope_idea/agentic.py +39 -0
  203. devflow_engine/scope_idea/dag.py +1069 -0
  204. devflow_engine/scope_idea/models.py +175 -0
  205. devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
  206. devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
  207. devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
  208. devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
  209. devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
  210. devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
  211. devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
  212. devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
  213. devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
  214. devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
  215. devflow_engine/skills/registry.example.yaml +42 -0
  216. devflow_engine/source_doc_assumptions.py +291 -0
  217. devflow_engine/source_doc_mutation_dag.py +1606 -0
  218. devflow_engine/source_doc_mutation_eval.py +417 -0
  219. devflow_engine/source_doc_mutation_worker.py +25 -0
  220. devflow_engine/source_docs_schema.py +207 -0
  221. devflow_engine/source_docs_updater.py +309 -0
  222. devflow_engine/source_scope/__init__.py +15 -0
  223. devflow_engine/source_scope/agentic.py +45 -0
  224. devflow_engine/source_scope/dag.py +1626 -0
  225. devflow_engine/source_scope/models.py +177 -0
  226. devflow_engine/stores/__init__.py +0 -0
  227. devflow_engine/stores/execution_store.py +3534 -0
  228. devflow_engine/story/__init__.py +0 -0
  229. devflow_engine/story/contracts.py +160 -0
  230. devflow_engine/story/discovery.py +47 -0
  231. devflow_engine/story/evidence.py +118 -0
  232. devflow_engine/story/hashing.py +27 -0
  233. devflow_engine/story/implemented_queue_purge.py +148 -0
  234. devflow_engine/story/indexer.py +105 -0
  235. devflow_engine/story/io.py +20 -0
  236. devflow_engine/story/markdown_contracts.py +298 -0
  237. devflow_engine/story/reconciliation.py +408 -0
  238. devflow_engine/story/validate_stories.py +149 -0
  239. devflow_engine/story/validate_tests_story.py +512 -0
  240. devflow_engine/story/validation.py +133 -0
  241. devflow_engine/ui_grounding/__init__.py +11 -0
  242. devflow_engine/ui_grounding/agentic.py +31 -0
  243. devflow_engine/ui_grounding/dag.py +874 -0
  244. devflow_engine/ui_grounding/models.py +224 -0
  245. devflow_engine/ui_grounding/pencil_bridge.py +247 -0
  246. devflow_engine/vendor/__init__.py +0 -0
  247. devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
  248. devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
  249. devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
  250. devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
  251. devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
  252. devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
  253. devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
  254. devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
  255. devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
  256. devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
  257. devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
  258. devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
  259. devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
  260. devflow_engine/worker.py +1086 -0
  261. devflow_engine/worker_guard.py +233 -0
  262. devflow_engine-1.0.0.dist-info/METADATA +235 -0
  263. devflow_engine-1.0.0.dist-info/RECORD +393 -0
  264. devflow_engine-1.0.0.dist-info/WHEEL +4 -0
  265. devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
  266. devin/__init__.py +6 -0
  267. devin/dag.py +58 -0
  268. devin/dag_two_arm.py +138 -0
  269. devin/devin_chat_scenario_catalog.json +588 -0
  270. devin/devin_eval.py +677 -0
  271. devin/nodes/__init__.py +0 -0
  272. devin/nodes/ideation/__init__.py +0 -0
  273. devin/nodes/ideation/node.py +195 -0
  274. devin/nodes/ideation/playground.py +267 -0
  275. devin/nodes/ideation/prompt.md +65 -0
  276. devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
  277. devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
  278. devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
  279. devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
  280. devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
  281. devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
  282. devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
  283. devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
  284. devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
  285. devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
  286. devin/nodes/ideation/scenarios/vague_idea.py +16 -0
  287. devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
  288. devin/nodes/ideation/tools.json +312 -0
  289. devin/nodes/insight/__init__.py +0 -0
  290. devin/nodes/insight/node.py +49 -0
  291. devin/nodes/insight/playground.py +154 -0
  292. devin/nodes/insight/prompt.md +61 -0
  293. devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
  294. devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
  295. devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
  296. devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
  297. devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
  298. devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
  299. devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
  300. devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
  301. devin/nodes/insight/scenarios/operational_debugging.py +15 -0
  302. devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
  303. devin/nodes/insight/scenarios/operational_question.py +9 -0
  304. devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
  305. devin/nodes/insight/scenarios/queue_status.py +15 -0
  306. devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
  307. devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
  308. devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
  309. devin/nodes/insight/scenarios/worker_state_check.py +15 -0
  310. devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
  311. devin/nodes/insight/tools.json +126 -0
  312. devin/nodes/intake/__init__.py +0 -0
  313. devin/nodes/intake/node.py +27 -0
  314. devin/nodes/intake/playground.py +47 -0
  315. devin/nodes/intake/prompt.md +12 -0
  316. devin/nodes/intake/scenarios/ideation_routing.py +4 -0
  317. devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
  318. devin/nodes/intake/scenarios/insight_routing.py +4 -0
  319. devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
  320. devin/nodes/iterate/README.md +44 -0
  321. devin/nodes/iterate/__init__.py +1 -0
  322. devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
  323. devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
  324. devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
  325. devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
  326. devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
  327. devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
  328. devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
  329. devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
  330. devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
  331. devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
  332. devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
  333. devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
  334. devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
  335. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
  336. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
  337. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
  338. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
  339. devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
  340. devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
  341. devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
  342. devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
  343. devin/nodes/iterate/agent-roles.md +89 -0
  344. devin/nodes/iterate/agents/README.md +10 -0
  345. devin/nodes/iterate/artifacts.md +504 -0
  346. devin/nodes/iterate/contract.md +100 -0
  347. devin/nodes/iterate/eval-plan.md +74 -0
  348. devin/nodes/iterate/node.py +100 -0
  349. devin/nodes/iterate/pipeline/README.md +13 -0
  350. devin/nodes/iterate/playground-contract.md +76 -0
  351. devin/nodes/iterate/prompt.md +11 -0
  352. devin/nodes/iterate/scenarios/README.md +38 -0
  353. devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
  354. devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
  355. devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
  356. devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
  357. devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
  358. devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
  359. devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
  360. devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
  361. devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
  362. devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
  363. devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
  364. devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
  365. devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
  366. devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
  367. devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
  368. devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
  369. devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
  370. devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
  371. devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
  372. devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
  373. devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
  374. devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
  375. devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
  376. devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
  377. devin/nodes/shared/__init__.py +0 -0
  378. devin/nodes/shared/filemaker_expert.md +80 -0
  379. devin/nodes/shared/filemaker_expert.py +354 -0
  380. devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
  381. devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
  382. devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
  383. devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
  384. devin/nodes/shared/helpers.py +156 -0
  385. devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
  386. devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
  387. devin/nodes/shared/models.py +44 -0
  388. devin/nodes/shared/post.py +40 -0
  389. devin/nodes/shared/router.py +107 -0
  390. devin/nodes/shared/tools.py +191 -0
  391. devin/shared/devin-chat-rubric.md +237 -0
  392. devin/shared/devin-chat-scenario-suite.md +90 -0
  393. devin/shared/eval_doctrine.md +9 -0
@@ -0,0 +1,588 @@
1
+ {
2
+ "schema_version": "2026-04-12",
3
+ "doctrine_sources": [
4
+ "docs/devin-chat-principles.md",
5
+ "docs/evals/devin-chat-rubric.md"
6
+ ],
7
+ "design_notes": [
8
+ "The suite is intentionally weighted toward multi-turn evaluation because Marcus clarified that Devin should be judged mainly on trajectory and decision points across turns.",
9
+ "Single-turn scenarios are kept as fast screens for first-turn posture, approach ownership, and current-request discipline.",
10
+ "Multi-turn scenarios should usually be judged on whether Devin keeps momentum, owns the implementation approach, asks only outcome-shaping clarification, avoids fake progress, and activates downstream work only at the correct decision point."
11
+ ],
12
+ "single_turn": [
13
+ {
14
+ "scenario_id": "sally_forward_ready_screen",
15
+ "scenario_family": "basic_new_idea",
16
+ "persona": "Sally ForwardReady",
17
+ "mode": "single_turn",
18
+ "doctrine_tags": [
19
+ "approach_ownership",
20
+ "forward_readiness",
21
+ "momentum",
22
+ "outcome_focused_clarification"
23
+ ],
24
+ "expected_judgment_focus": [
25
+ "Devin frames a plausible first-cut product shape immediately.",
26
+ "Any clarification is about workflow, user, or business constraint rather than stack choice.",
27
+ "The reply feels underway from the first turn."
28
+ ],
29
+ "idea_id": "devin_eval_sally_forward_ready_screen",
30
+ "user_input": "I want to build a new app for salon owners to handle same-day staffing gaps and shift swaps.",
31
+ "contract": {
32
+ "must_mention": [
33
+ "app"
34
+ ],
35
+ "must_not": [
36
+ "React or Vue",
37
+ "Postgres or MySQL",
38
+ "already implemented",
39
+ "stories generated"
40
+ ]
41
+ }
42
+ },
43
+ {
44
+ "scenario_id": "jimmy_repo_extension_screen",
45
+ "scenario_family": "extend_existing_codebase",
46
+ "persona": "Jimmy RepoExtension",
47
+ "mode": "single_turn",
48
+ "doctrine_tags": [
49
+ "approach_ownership",
50
+ "momentum",
51
+ "no_form_filler_behavior"
52
+ ],
53
+ "expected_judgment_focus": [
54
+ "Devin treats the request as an addition to an existing repo, not a fresh generic intake form.",
55
+ "The response preserves momentum without forcing Jimmy to choose implementation details Devin should own."
56
+ ],
57
+ "idea_id": "devin_eval_jimmy_repo_extension_screen",
58
+ "user_input": "Add a new idea to the existing repo: a manager approval inbox for refund exceptions.",
59
+ "contract": {
60
+ "must_mention": [
61
+ "approval",
62
+ "existing"
63
+ ],
64
+ "must_not": [
65
+ "What framework",
66
+ "Which database",
67
+ "already implemented",
68
+ "stories generated"
69
+ ]
70
+ }
71
+ },
72
+ {
73
+ "scenario_id": "nora_current_request_screen",
74
+ "scenario_family": "diagnose_devflow_issue",
75
+ "persona": "Nora CurrentRequest",
76
+ "mode": "single_turn",
77
+ "doctrine_tags": [
78
+ "attention_to_current_request",
79
+ "no_stale_context_hijack"
80
+ ],
81
+ "expected_judgment_focus": [
82
+ "Devin answers the current ops/status question directly instead of continuing an older ideation thread.",
83
+ "The response stays concise and avoids dragging stale planning context into the turn."
84
+ ],
85
+ "idea_id": "devin_eval_nora_current_request_screen",
86
+ "user_input": "Separate question: what is the source-doc mutation queue status right now?",
87
+ "preloaded_history": [
88
+ {
89
+ "role": "user",
90
+ "content": "Earlier I wanted help shaping an internal approvals workflow idea."
91
+ },
92
+ {
93
+ "role": "assistant",
94
+ "content": "I started refining the workflow shape and approval path."
95
+ }
96
+ ],
97
+ "contract": {
98
+ "must_mention": [
99
+ "queue"
100
+ ],
101
+ "must_not": [
102
+ "approval workflow",
103
+ "primary workflow",
104
+ "target persona"
105
+ ]
106
+ }
107
+ }
108
+ ],
109
+ "multi_turn": [
110
+ {
111
+ "scenario_id": "sally_explicit_approval_new_app",
112
+ "persona": "Sally ExplicitApproval",
113
+ "mode": "multi_turn",
114
+ "doctrine_tags": [
115
+ "implementation_partner",
116
+ "user_owns_outcome_devin_owns_approach",
117
+ "forward_ready_from_first_prompt",
118
+ "aggressive_grounded_assumptions",
119
+ "abstracted_orchestration_visibility",
120
+ "momentum_first",
121
+ "avoid_form_filler_fake_progress_unsafe_overreach",
122
+ "attention_to_current_request",
123
+ "multi_turn_trajectory_over_single_turn_perfection",
124
+ "no_fake_progress"
125
+ ],
126
+ "expected_judgment_focus": [
127
+ "First turn feels like Devin already has a sensible product frame in mind.",
128
+ "Clarification stays outcome-focused.",
129
+ "Downstream activation waits for the explicit go-ahead.",
130
+ "Judge the multi-turn trajectory and decision points more than isolated phrasing.",
131
+ "Devin should feel forward-ready from the first turn and keep ownership of the approach."
132
+ ],
133
+ "description": "Sally wants to build a new app. Devin should own the approach, feel forward-ready from turn one, use grounded assumptions, and only activate downstream after explicit approval.",
134
+ "idea_id_seed": "devin_chat_sally_explicit_approval",
135
+ "turns": [
136
+ {
137
+ "turn_id": "turn1_initial_request",
138
+ "user_input": "I want to build a new app for salon owners to handle same-day staffing gaps and shift swaps.",
139
+ "expect_route_arm": "ideation",
140
+ "expect_status": "ideation_contract_response",
141
+ "expect_response_kind": "ideation_contract_response",
142
+ "expect_activation": false,
143
+ "must_not": [
144
+ "React or Vue",
145
+ "Postgres or MySQL",
146
+ "what framework",
147
+ "what database",
148
+ "already implemented"
149
+ ],
150
+ "judgment_focus": [
151
+ "Own the approach on turn one.",
152
+ "Do not turn the turn into a stack questionnaire."
153
+ ],
154
+ "max_follow_up_questions": 1,
155
+ "require_suggested_next_step": true,
156
+ "forbid_internal_orchestration_terms": true
157
+ },
158
+ {
159
+ "turn_id": "turn2_business_shape",
160
+ "user_input": "First release is internal for dispatch leads. Speed matters more than payroll integration.",
161
+ "expect_route_arm": "ideation",
162
+ "expect_status": "ideation_contract_response",
163
+ "expect_response_kind": "ideation_contract_response",
164
+ "expect_activation": false,
165
+ "must_mention": [
166
+ "internal"
167
+ ],
168
+ "must_not": [
169
+ "stories generated",
170
+ "queued",
171
+ "what framework",
172
+ "what database"
173
+ ],
174
+ "judgment_focus": [
175
+ "Carry forward the prior framing without losing momentum."
176
+ ],
177
+ "max_follow_up_questions": 1,
178
+ "require_suggested_next_step": true,
179
+ "forbid_internal_orchestration_terms": true
180
+ },
181
+ {
182
+ "turn_id": "turn3_explicit_approval",
183
+ "user_input": "That direction works. Proceed to downstream generation.",
184
+ "expect_route_arm": "ideation",
185
+ "expect_status": "queued",
186
+ "expect_response_kind": "ready_for_downstream",
187
+ "expect_activation": true,
188
+ "must_mention": [
189
+ "downstream"
190
+ ],
191
+ "decision_point_label": "explicit_approval",
192
+ "judgment_focus": [
193
+ "Activation should happen here and not earlier."
194
+ ],
195
+ "max_follow_up_questions": 0,
196
+ "require_suggested_next_step": true,
197
+ "forbid_internal_orchestration_terms": true,
198
+ "must_not": []
199
+ }
200
+ ]
201
+ },
202
+ {
203
+ "scenario_id": "jimmy_existing_repo_handoff",
204
+ "persona": "Jimmy ExistingRepo",
205
+ "mode": "multi_turn",
206
+ "doctrine_tags": [
207
+ "implementation_partner",
208
+ "user_owns_outcome_devin_owns_approach",
209
+ "forward_ready_from_first_prompt",
210
+ "aggressive_grounded_assumptions",
211
+ "abstracted_orchestration_visibility",
212
+ "momentum_first",
213
+ "avoid_form_filler_fake_progress_unsafe_overreach",
214
+ "attention_to_current_request",
215
+ "multi_turn_trajectory_over_single_turn_perfection",
216
+ "no_fake_progress"
217
+ ],
218
+ "expected_judgment_focus": [
219
+ "Devin treats the task as an addition to an existing repo and preserves continuity.",
220
+ "Implementation ownership stays with Devin while Jimmy owns product intent and UX constraints.",
221
+ "Judge the multi-turn trajectory and decision points more than isolated phrasing.",
222
+ "Devin should feel forward-ready from the first turn and keep ownership of the approach."
223
+ ],
224
+ "description": "Jimmy needs a new idea added to an existing repo. Devin should adapt to the existing codebase, assume repo-grounded defaults, and keep ownership of the implementation approach.",
225
+ "idea_id_seed": "devin_chat_jimmy_existing_repo",
226
+ "turns": [
227
+ {
228
+ "turn_id": "turn1_existing_repo_idea",
229
+ "user_input": "Add a new idea to the existing repo: a manager approval inbox for refund exceptions.",
230
+ "expect_route_arm": "ideation",
231
+ "expect_status": "ideation_contract_response",
232
+ "expect_response_kind": "ideation_contract_response",
233
+ "expect_activation": false,
234
+ "must_mention": [
235
+ "approval",
236
+ "repo"
237
+ ],
238
+ "must_not": [
239
+ "What framework",
240
+ "Which database",
241
+ "REST or GraphQL",
242
+ "already implemented"
243
+ ],
244
+ "judgment_focus": [
245
+ "Ground the response in the fact this extends an existing product."
246
+ ],
247
+ "max_follow_up_questions": 1,
248
+ "require_suggested_next_step": true,
249
+ "forbid_internal_orchestration_terms": true
250
+ },
251
+ {
252
+ "turn_id": "turn2_ux_constraints",
253
+ "user_input": "Keep the current design language. Managers need an inbox, detail view, and approve/reject with notes.",
254
+ "expect_route_arm": "ideation",
255
+ "expect_status": "ideation_contract_response",
256
+ "expect_response_kind": "ideation_contract_response",
257
+ "expect_activation": false,
258
+ "must_mention": [
259
+ "notes"
260
+ ],
261
+ "must_not": [
262
+ "already implemented",
263
+ "stories generated",
264
+ "what framework",
265
+ "what database"
266
+ ],
267
+ "judgment_focus": [
268
+ "Use the new UX constraints to sharpen the concept rather than re-asking old questions."
269
+ ],
270
+ "max_follow_up_questions": 1,
271
+ "require_suggested_next_step": true,
272
+ "forbid_internal_orchestration_terms": true
273
+ },
274
+ {
275
+ "turn_id": "turn3_explicit_approval",
276
+ "user_input": "Yes, push that into downstream generation.",
277
+ "expect_route_arm": "ideation",
278
+ "expect_status": "queued",
279
+ "expect_response_kind": "ready_for_downstream",
280
+ "expect_activation": true,
281
+ "decision_point_label": "explicit_approval",
282
+ "max_follow_up_questions": 0,
283
+ "require_suggested_next_step": true,
284
+ "forbid_internal_orchestration_terms": true,
285
+ "must_not": []
286
+ }
287
+ ]
288
+ },
289
+ {
290
+ "scenario_id": "jeff_sparse_brief_fast_assumptions",
291
+ "persona": "Jeff SparseBrief",
292
+ "mode": "multi_turn",
293
+ "doctrine_tags": [
294
+ "implementation_partner",
295
+ "user_owns_outcome_devin_owns_approach",
296
+ "forward_ready_from_first_prompt",
297
+ "aggressive_grounded_assumptions",
298
+ "abstracted_orchestration_visibility",
299
+ "momentum_first",
300
+ "avoid_form_filler_fake_progress_unsafe_overreach",
301
+ "attention_to_current_request",
302
+ "multi_turn_trajectory_over_single_turn_perfection",
303
+ "no_fake_progress"
304
+ ],
305
+ "expected_judgment_focus": [
306
+ "Jeff gives very little information, so Devin should infer a plausible first cut without pretending the work is already built.",
307
+ "Any clarification should be sharp and outcome-shaping, not a broad intake form.",
308
+ "Judge the multi-turn trajectory and decision points more than isolated phrasing.",
309
+ "Devin should feel forward-ready from the first turn and keep ownership of the approach."
310
+ ],
311
+ "description": "Jeff gives very little information on a greenfield idea and is eager to get coding. Devin should assume aggressively from preferences/example repos/best practices/default stack, avoid form-filler questions, and move forward fast without fake progress.",
312
+ "idea_id_seed": "devin_chat_jeff_sparse_brief",
313
+ "turns": [
314
+ {
315
+ "turn_id": "turn1_sparse_push",
316
+ "user_input": "Let's just build something for contractor daily reports. I don't care about the details yet, just start coding.",
317
+ "expect_route_arm": "ideation",
318
+ "expect_status": "ideation_contract_response",
319
+ "expect_response_kind": "ideation_contract_response",
320
+ "expect_activation": false,
321
+ "must_not": [
322
+ "What framework",
323
+ "How many screens",
324
+ "Which database",
325
+ "what auth provider",
326
+ "what architecture",
327
+ "already implemented"
328
+ ],
329
+ "judgment_focus": [
330
+ "Respond with forward-ready framing, not a questionnaire or fake progress."
331
+ ],
332
+ "max_follow_up_questions": 1,
333
+ "require_suggested_next_step": true,
334
+ "forbid_internal_orchestration_terms": true
335
+ },
336
+ {
337
+ "turn_id": "turn2_minimal_constraints",
338
+ "user_input": "Internal first. Phone-first. Photos and signatures matter.",
339
+ "expect_route_arm": "ideation",
340
+ "expect_status": "ideation_contract_response",
341
+ "expect_response_kind": "ideation_contract_response",
342
+ "expect_activation": false,
343
+ "must_mention": [
344
+ "internal"
345
+ ],
346
+ "must_not": [
347
+ "queued",
348
+ "stories generated",
349
+ "what framework",
350
+ "what database"
351
+ ],
352
+ "max_follow_up_questions": 1,
353
+ "require_suggested_next_step": true,
354
+ "forbid_internal_orchestration_terms": true
355
+ },
356
+ {
357
+ "turn_id": "turn3_explicit_approval",
358
+ "user_input": "Good enough. Proceed.",
359
+ "expect_route_arm": "ideation",
360
+ "expect_status": "queued",
361
+ "expect_response_kind": "ready_for_downstream",
362
+ "expect_activation": true,
363
+ "decision_point_label": "explicit_approval",
364
+ "max_follow_up_questions": 0,
365
+ "require_suggested_next_step": true,
366
+ "forbid_internal_orchestration_terms": true,
367
+ "must_not": []
368
+ }
369
+ ]
370
+ },
371
+ {
372
+ "scenario_id": "cleo_review_first_preactivation",
373
+ "persona": "Cleo ReviewFirst",
374
+ "mode": "multi_turn",
375
+ "doctrine_tags": [
376
+ "implementation_partner",
377
+ "user_owns_outcome_devin_owns_approach",
378
+ "forward_ready_from_first_prompt",
379
+ "aggressive_grounded_assumptions",
380
+ "abstracted_orchestration_visibility",
381
+ "momentum_first",
382
+ "avoid_form_filler_fake_progress_unsafe_overreach",
383
+ "attention_to_current_request",
384
+ "multi_turn_trajectory_over_single_turn_perfection",
385
+ "no_fake_progress"
386
+ ],
387
+ "expected_judgment_focus": [
388
+ "Cleo explicitly wants to plan and review first, so Devin must not claim downstream generation happened before approval.",
389
+ "The thread should still feel productive while staying pre-activation.",
390
+ "Judge the multi-turn trajectory and decision points more than isolated phrasing.",
391
+ "Devin should feel forward-ready from the first turn and keep ownership of the approach."
392
+ ],
393
+ "description": "Cleo plans and reviews patiently. Devin should answer the current planning request directly, keep momentum visible, and avoid premature downstream handoff until Cleo explicitly approves.",
394
+ "idea_id_seed": "devin_chat_cleo_review_first",
395
+ "turns": [
396
+ {
397
+ "turn_id": "turn1_review_first",
398
+ "user_input": "I want a customer onboarding workspace, but let's think it through carefully before generating anything.",
399
+ "expect_route_arm": "ideation",
400
+ "expect_status": "ideation_contract_response",
401
+ "expect_response_kind": "ideation_contract_response",
402
+ "expect_activation": false,
403
+ "must_not": [
404
+ "ready for downstream",
405
+ "stories generated",
406
+ "already implemented"
407
+ ],
408
+ "max_follow_up_questions": 1,
409
+ "require_suggested_next_step": true,
410
+ "forbid_internal_orchestration_terms": true
411
+ },
412
+ {
413
+ "turn_id": "turn2_review_question",
414
+ "user_input": "What is the riskiest workflow assumption to validate first?",
415
+ "expect_route_arm": "ideation",
416
+ "expect_status": "ideation_contract_response",
417
+ "expect_response_kind": "ideation_contract_response",
418
+ "expect_activation": false,
419
+ "must_mention": [
420
+ "assumption"
421
+ ],
422
+ "judgment_focus": [
423
+ "Answer the current planning question directly instead of prematurely activating the idea."
424
+ ],
425
+ "max_follow_up_questions": 1,
426
+ "require_suggested_next_step": true,
427
+ "forbid_internal_orchestration_terms": true,
428
+ "must_not": [
429
+ "already implemented",
430
+ "stories generated",
431
+ "ready for downstream"
432
+ ]
433
+ },
434
+ {
435
+ "turn_id": "turn3_hold_preactivation",
436
+ "user_input": "Good. Keep it pre-generation for now and refine the operator flow and approval boundary.",
437
+ "expect_route_arm": "ideation",
438
+ "expect_status": "ideation_contract_response",
439
+ "expect_response_kind": "ideation_contract_response",
440
+ "expect_activation": false,
441
+ "must_not": [
442
+ "queued",
443
+ "downstream generated",
444
+ "already implemented"
445
+ ],
446
+ "decision_point_label": "explicit_hold",
447
+ "max_follow_up_questions": 1,
448
+ "require_suggested_next_step": true,
449
+ "forbid_internal_orchestration_terms": true
450
+ },
451
+ {
452
+ "turn_id": "turn4_explicit_approval",
453
+ "user_input": "Okay, now proceed to downstream generation.",
454
+ "expect_route_arm": "ideation",
455
+ "expect_status": "queued",
456
+ "expect_response_kind": "ready_for_downstream",
457
+ "expect_activation": true,
458
+ "decision_point_label": "explicit_approval",
459
+ "max_follow_up_questions": 0,
460
+ "require_suggested_next_step": true,
461
+ "forbid_internal_orchestration_terms": true,
462
+ "must_not": []
463
+ }
464
+ ]
465
+ },
466
+ {
467
+ "scenario_id": "priya_risk_boundary_clarification",
468
+ "persona": "Priya RiskBoundary",
469
+ "mode": "multi_turn",
470
+ "doctrine_tags": [
471
+ "outcome_focused_clarification",
472
+ "no_unsafe_overreach",
473
+ "approach_ownership"
474
+ ],
475
+ "expected_judgment_focus": [
476
+ "Because the workflow is high-risk, Devin should ask about the constraint that changes correctness or safety instead of choosing recklessly.",
477
+ "The clarification should still stay at the outcome/approval boundary level, not tech-stack level."
478
+ ],
479
+ "description": "Priya introduces legal-review and audit-trail concerns. Devin should own the solution shape while using one sharp clarification around approval boundaries and risk.",
480
+ "idea_id_seed": "devin_chat_priya_risk_boundary",
481
+ "turns": [
482
+ {
483
+ "turn_id": "turn1_high_risk_request",
484
+ "user_input": "We need an internal HR incident intake tool with legal review before anything can leave the team.",
485
+ "expect_route_arm": "ideation",
486
+ "expect_status": "ideation_contract_response",
487
+ "expect_response_kind": "ideation_contract_response",
488
+ "expect_activation": false,
489
+ "must_mention": [
490
+ "legal",
491
+ "internal"
492
+ ],
493
+ "must_not": [
494
+ "React or Vue",
495
+ "Postgres or MySQL",
496
+ "already implemented"
497
+ ]
498
+ },
499
+ {
500
+ "turn_id": "turn2_constraint_answer",
501
+ "user_input": "Single legal approver first, but we need a full audit trail and no external access in v1.",
502
+ "expect_route_arm": "ideation",
503
+ "expect_status": "ideation_contract_response",
504
+ "expect_response_kind": "ideation_contract_response",
505
+ "expect_activation": false,
506
+ "must_mention": [
507
+ "audit"
508
+ ],
509
+ "must_not": [
510
+ "queued",
511
+ "stories generated"
512
+ ]
513
+ },
514
+ {
515
+ "turn_id": "turn3_explicit_approval",
516
+ "user_input": "That is the right boundary. Proceed.",
517
+ "expect_route_arm": "ideation",
518
+ "expect_status": "queued",
519
+ "expect_response_kind": "ready_for_downstream",
520
+ "expect_activation": true,
521
+ "decision_point_label": "explicit_approval"
522
+ }
523
+ ]
524
+ },
525
+ {
526
+ "scenario_id": "omar_status_detour_return_to_ideation",
527
+ "persona": "Omar ContextSwitch",
528
+ "mode": "multi_turn",
529
+ "doctrine_tags": [
530
+ "attention_to_current_request",
531
+ "decision_point_handling",
532
+ "trajectory_over_single_turn"
533
+ ],
534
+ "expected_judgment_focus": [
535
+ "Devin answers the current request directly even when the thread already contains an ideation arc.",
536
+ "After the detour, Devin should return to ideation cleanly without stale confusion."
537
+ ],
538
+ "description": "Exercise cross-turn attention discipline: ideation, status detour, return to ideation, then approval.",
539
+ "idea_id_seed": "devin_chat_omar_context_switch",
540
+ "turns": [
541
+ {
542
+ "turn_id": "turn1_initial_idea",
543
+ "user_input": "Build an internal approvals workspace for vendor exception requests.",
544
+ "expect_route_arm": "ideation",
545
+ "expect_status": "ideation_contract_response",
546
+ "expect_response_kind": "ideation_contract_response",
547
+ "expect_activation": false
548
+ },
549
+ {
550
+ "turn_id": "turn2_status_detour",
551
+ "user_input": "Separate question: what is the source-doc mutation queue status right now?",
552
+ "expect_route_arm": "insight",
553
+ "expect_status": "redirect",
554
+ "expect_response_kind": "redirect",
555
+ "expect_activation": false,
556
+ "must_mention": [
557
+ "queue"
558
+ ],
559
+ "must_not": [
560
+ "vendor exception",
561
+ "approval boundary"
562
+ ]
563
+ },
564
+ {
565
+ "turn_id": "turn3_back_to_ideation",
566
+ "user_input": "Back to the idea: keep reviewers internal and make the first workflow approval-only.",
567
+ "expect_route_arm": "ideation",
568
+ "expect_status": "ideation_contract_response",
569
+ "expect_response_kind": "ideation_contract_response",
570
+ "expect_activation": false,
571
+ "must_not": [
572
+ "outside software delivery scope",
573
+ "outside the ideation handoff path"
574
+ ]
575
+ },
576
+ {
577
+ "turn_id": "turn4_explicit_approval",
578
+ "user_input": "Proceed to downstream generation.",
579
+ "expect_route_arm": "ideation",
580
+ "expect_status": "queued",
581
+ "expect_response_kind": "ready_for_downstream",
582
+ "expect_activation": true,
583
+ "decision_point_label": "explicit_approval"
584
+ }
585
+ ]
586
+ }
587
+ ]
588
+ }