devflow-engine 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. devflow_engine/__init__.py +3 -0
  2. devflow_engine/agentic_prompts.py +100 -0
  3. devflow_engine/agentic_runtime.py +398 -0
  4. devflow_engine/api_key_flow_harness.py +539 -0
  5. devflow_engine/api_keys.py +357 -0
  6. devflow_engine/bootstrap/__init__.py +2 -0
  7. devflow_engine/bootstrap/provision_from_template.py +84 -0
  8. devflow_engine/cli/__init__.py +0 -0
  9. devflow_engine/cli/app.py +7270 -0
  10. devflow_engine/core/__init__.py +0 -0
  11. devflow_engine/core/config.py +86 -0
  12. devflow_engine/core/logging.py +29 -0
  13. devflow_engine/core/paths.py +45 -0
  14. devflow_engine/core/toml_kv.py +33 -0
  15. devflow_engine/devflow_event_worker.py +1292 -0
  16. devflow_engine/devflow_state.py +201 -0
  17. devflow_engine/devin2/__init__.py +9 -0
  18. devflow_engine/devin2/agent_definition.py +120 -0
  19. devflow_engine/devin2/pi_runner.py +204 -0
  20. devflow_engine/devin_orchestration.py +69 -0
  21. devflow_engine/docs/prompts/anti-patterns.md +42 -0
  22. devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
  23. devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
  24. devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
  25. devflow_engine/doctor/__init__.py +2 -0
  26. devflow_engine/doctor/triage.py +140 -0
  27. devflow_engine/error/__init__.py +0 -0
  28. devflow_engine/error/remediation.py +21 -0
  29. devflow_engine/errors/error_solver_dag.py +522 -0
  30. devflow_engine/errors/runtime_observability.py +67 -0
  31. devflow_engine/idea/__init__.py +4 -0
  32. devflow_engine/idea/actors.py +481 -0
  33. devflow_engine/idea/agentic.py +465 -0
  34. devflow_engine/idea/analyze.py +93 -0
  35. devflow_engine/idea/devin_chat_dag.py +1 -0
  36. devflow_engine/idea/diff.py +99 -0
  37. devflow_engine/idea/drafts.py +446 -0
  38. devflow_engine/idea/idea_creation_dag.py +643 -0
  39. devflow_engine/idea/ideation_enrichment.py +355 -0
  40. devflow_engine/idea/ideation_enrichment_worker.py +19 -0
  41. devflow_engine/idea/paths.py +28 -0
  42. devflow_engine/idea/promote.py +53 -0
  43. devflow_engine/idea/redaction.py +27 -0
  44. devflow_engine/idea/repo_tools.py +1277 -0
  45. devflow_engine/idea/response_mode.py +30 -0
  46. devflow_engine/idea/story_pipeline.py +1585 -0
  47. devflow_engine/idea/sufficiency.py +376 -0
  48. devflow_engine/idea/traditional_stories.py +1257 -0
  49. devflow_engine/implementation/__init__.py +0 -0
  50. devflow_engine/implementation/alembic_preflight.py +700 -0
  51. devflow_engine/implementation/dag.py +8450 -0
  52. devflow_engine/implementation/green_gate.py +93 -0
  53. devflow_engine/implementation/prompts.py +108 -0
  54. devflow_engine/implementation/test_runtime.py +623 -0
  55. devflow_engine/integration/__init__.py +19 -0
  56. devflow_engine/integration/agentic.py +66 -0
  57. devflow_engine/integration/dag.py +3539 -0
  58. devflow_engine/integration/prompts.py +114 -0
  59. devflow_engine/integration/supabase_schema.sql +31 -0
  60. devflow_engine/integration/supabase_sync.py +177 -0
  61. devflow_engine/llm/__init__.py +1 -0
  62. devflow_engine/llm/cli_one_shot.py +84 -0
  63. devflow_engine/llm/cli_stream.py +371 -0
  64. devflow_engine/llm/execution_context.py +26 -0
  65. devflow_engine/llm/invoke.py +1322 -0
  66. devflow_engine/llm/provider_api.py +304 -0
  67. devflow_engine/llm/repo_knowledge.py +588 -0
  68. devflow_engine/llm_primitives.py +315 -0
  69. devflow_engine/orchestration.py +62 -0
  70. devflow_engine/planning/__init__.py +0 -0
  71. devflow_engine/planning/analyze_repo.py +92 -0
  72. devflow_engine/planning/render_drafts.py +133 -0
  73. devflow_engine/playground/__init__.py +0 -0
  74. devflow_engine/playground/hooks.py +26 -0
  75. devflow_engine/playwright_workflow/__init__.py +5 -0
  76. devflow_engine/playwright_workflow/dag.py +1317 -0
  77. devflow_engine/process/__init__.py +5 -0
  78. devflow_engine/process/dag.py +59 -0
  79. devflow_engine/project_registration/__init__.py +3 -0
  80. devflow_engine/project_registration/dag.py +1581 -0
  81. devflow_engine/project_registry.py +109 -0
  82. devflow_engine/prompts/devin/generic/prompt.md +6 -0
  83. devflow_engine/prompts/devin/ideation/prompt.md +263 -0
  84. devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
  85. devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
  86. devflow_engine/prompts/devin/insight/prompt.md +11 -0
  87. devflow_engine/prompts/devin/insight/scenarios.md +5 -0
  88. devflow_engine/prompts/devin/intake/prompt.md +15 -0
  89. devflow_engine/prompts/devin/iterate/prompt.md +12 -0
  90. devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
  91. devflow_engine/prompts/devin/shared/principles.md +246 -0
  92. devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
  93. devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
  94. devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
  95. devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
  96. devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
  97. devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
  98. devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
  99. devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
  100. devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
  101. devflow_engine/prompts/implementation/red/prompt.md +27 -0
  102. devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
  103. devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
  104. devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
  105. devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
  106. devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
  107. devflow_engine/prompts/integration/README.md +185 -0
  108. devflow_engine/prompts/integration/green/example.md +67 -0
  109. devflow_engine/prompts/integration/green/green/prompt.md +10 -0
  110. devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
  111. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
  112. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
  113. devflow_engine/prompts/integration/green_enrich/example.md +79 -0
  114. devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
  115. devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
  116. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
  117. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  118. devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
  119. devflow_engine/prompts/integration/red/example.md +152 -0
  120. devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
  121. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  122. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
  123. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
  124. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
  125. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  126. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
  127. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
  128. devflow_engine/prompts/integration/red/red/prompt.md +11 -0
  129. devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
  130. devflow_engine/prompts/integration/red_review/example.md +71 -0
  131. devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
  132. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  133. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
  134. devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
  135. devflow_engine/prompts/integration/resolve/example.md +111 -0
  136. devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
  137. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
  138. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
  139. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
  140. devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
  141. devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
  142. devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
  143. devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
  144. devflow_engine/prompts/integration/validate/example.md +143 -0
  145. devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
  146. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  147. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
  148. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
  149. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
  150. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  151. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
  152. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
  153. devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
  154. devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
  155. devflow_engine/prompts/integration/write_workflows/example.md +100 -0
  156. devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
  157. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
  158. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
  159. devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
  160. devflow_engine/prompts/iterate/README.md +7 -0
  161. devflow_engine/prompts/iterate/coder/prompt.md +11 -0
  162. devflow_engine/prompts/iterate/framer/prompt.md +11 -0
  163. devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
  164. devflow_engine/prompts/iterate/observer/prompt.md +11 -0
  165. devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
  166. devflow_engine/prompts/recovery/execution/prompt.md +8 -0
  167. devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
  168. devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
  169. devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
  170. devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
  171. devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
  172. devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
  173. devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
  174. devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
  175. devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
  176. devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
  177. devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
  178. devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
  179. devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
  180. devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
  181. devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
  182. devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
  183. devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
  184. devflow_engine/recovery/__init__.py +3 -0
  185. devflow_engine/recovery/dag.py +2609 -0
  186. devflow_engine/recovery/models.py +220 -0
  187. devflow_engine/refactor.py +93 -0
  188. devflow_engine/registry/__init__.py +1 -0
  189. devflow_engine/registry/cards.py +238 -0
  190. devflow_engine/registry/domain_normalize.py +60 -0
  191. devflow_engine/registry/effects.py +65 -0
  192. devflow_engine/registry/enforce_report.py +150 -0
  193. devflow_engine/registry/module_cards_classify.py +164 -0
  194. devflow_engine/registry/module_cards_draft.py +184 -0
  195. devflow_engine/registry/module_cards_gate.py +59 -0
  196. devflow_engine/registry/packages.py +347 -0
  197. devflow_engine/registry/pathways.py +323 -0
  198. devflow_engine/review/__init__.py +11 -0
  199. devflow_engine/review/dag.py +588 -0
  200. devflow_engine/review/review_story.py +67 -0
  201. devflow_engine/scope_idea/__init__.py +3 -0
  202. devflow_engine/scope_idea/agentic.py +39 -0
  203. devflow_engine/scope_idea/dag.py +1069 -0
  204. devflow_engine/scope_idea/models.py +175 -0
  205. devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
  206. devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
  207. devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
  208. devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
  209. devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
  210. devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
  211. devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
  212. devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
  213. devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
  214. devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
  215. devflow_engine/skills/registry.example.yaml +42 -0
  216. devflow_engine/source_doc_assumptions.py +291 -0
  217. devflow_engine/source_doc_mutation_dag.py +1606 -0
  218. devflow_engine/source_doc_mutation_eval.py +417 -0
  219. devflow_engine/source_doc_mutation_worker.py +25 -0
  220. devflow_engine/source_docs_schema.py +207 -0
  221. devflow_engine/source_docs_updater.py +309 -0
  222. devflow_engine/source_scope/__init__.py +15 -0
  223. devflow_engine/source_scope/agentic.py +45 -0
  224. devflow_engine/source_scope/dag.py +1626 -0
  225. devflow_engine/source_scope/models.py +177 -0
  226. devflow_engine/stores/__init__.py +0 -0
  227. devflow_engine/stores/execution_store.py +3534 -0
  228. devflow_engine/story/__init__.py +0 -0
  229. devflow_engine/story/contracts.py +160 -0
  230. devflow_engine/story/discovery.py +47 -0
  231. devflow_engine/story/evidence.py +118 -0
  232. devflow_engine/story/hashing.py +27 -0
  233. devflow_engine/story/implemented_queue_purge.py +148 -0
  234. devflow_engine/story/indexer.py +105 -0
  235. devflow_engine/story/io.py +20 -0
  236. devflow_engine/story/markdown_contracts.py +298 -0
  237. devflow_engine/story/reconciliation.py +408 -0
  238. devflow_engine/story/validate_stories.py +149 -0
  239. devflow_engine/story/validate_tests_story.py +512 -0
  240. devflow_engine/story/validation.py +133 -0
  241. devflow_engine/ui_grounding/__init__.py +11 -0
  242. devflow_engine/ui_grounding/agentic.py +31 -0
  243. devflow_engine/ui_grounding/dag.py +874 -0
  244. devflow_engine/ui_grounding/models.py +224 -0
  245. devflow_engine/ui_grounding/pencil_bridge.py +247 -0
  246. devflow_engine/vendor/__init__.py +0 -0
  247. devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
  248. devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
  249. devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
  250. devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
  251. devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
  252. devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
  253. devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
  254. devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
  255. devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
  256. devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
  257. devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
  258. devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
  259. devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
  260. devflow_engine/worker.py +1086 -0
  261. devflow_engine/worker_guard.py +233 -0
  262. devflow_engine-1.0.0.dist-info/METADATA +235 -0
  263. devflow_engine-1.0.0.dist-info/RECORD +393 -0
  264. devflow_engine-1.0.0.dist-info/WHEEL +4 -0
  265. devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
  266. devin/__init__.py +6 -0
  267. devin/dag.py +58 -0
  268. devin/dag_two_arm.py +138 -0
  269. devin/devin_chat_scenario_catalog.json +588 -0
  270. devin/devin_eval.py +677 -0
  271. devin/nodes/__init__.py +0 -0
  272. devin/nodes/ideation/__init__.py +0 -0
  273. devin/nodes/ideation/node.py +195 -0
  274. devin/nodes/ideation/playground.py +267 -0
  275. devin/nodes/ideation/prompt.md +65 -0
  276. devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
  277. devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
  278. devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
  279. devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
  280. devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
  281. devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
  282. devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
  283. devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
  284. devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
  285. devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
  286. devin/nodes/ideation/scenarios/vague_idea.py +16 -0
  287. devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
  288. devin/nodes/ideation/tools.json +312 -0
  289. devin/nodes/insight/__init__.py +0 -0
  290. devin/nodes/insight/node.py +49 -0
  291. devin/nodes/insight/playground.py +154 -0
  292. devin/nodes/insight/prompt.md +61 -0
  293. devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
  294. devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
  295. devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
  296. devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
  297. devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
  298. devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
  299. devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
  300. devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
  301. devin/nodes/insight/scenarios/operational_debugging.py +15 -0
  302. devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
  303. devin/nodes/insight/scenarios/operational_question.py +9 -0
  304. devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
  305. devin/nodes/insight/scenarios/queue_status.py +15 -0
  306. devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
  307. devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
  308. devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
  309. devin/nodes/insight/scenarios/worker_state_check.py +15 -0
  310. devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
  311. devin/nodes/insight/tools.json +126 -0
  312. devin/nodes/intake/__init__.py +0 -0
  313. devin/nodes/intake/node.py +27 -0
  314. devin/nodes/intake/playground.py +47 -0
  315. devin/nodes/intake/prompt.md +12 -0
  316. devin/nodes/intake/scenarios/ideation_routing.py +4 -0
  317. devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
  318. devin/nodes/intake/scenarios/insight_routing.py +4 -0
  319. devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
  320. devin/nodes/iterate/README.md +44 -0
  321. devin/nodes/iterate/__init__.py +1 -0
  322. devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
  323. devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
  324. devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
  325. devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
  326. devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
  327. devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
  328. devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
  329. devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
  330. devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
  331. devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
  332. devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
  333. devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
  334. devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
  335. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
  336. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
  337. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
  338. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
  339. devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
  340. devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
  341. devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
  342. devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
  343. devin/nodes/iterate/agent-roles.md +89 -0
  344. devin/nodes/iterate/agents/README.md +10 -0
  345. devin/nodes/iterate/artifacts.md +504 -0
  346. devin/nodes/iterate/contract.md +100 -0
  347. devin/nodes/iterate/eval-plan.md +74 -0
  348. devin/nodes/iterate/node.py +100 -0
  349. devin/nodes/iterate/pipeline/README.md +13 -0
  350. devin/nodes/iterate/playground-contract.md +76 -0
  351. devin/nodes/iterate/prompt.md +11 -0
  352. devin/nodes/iterate/scenarios/README.md +38 -0
  353. devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
  354. devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
  355. devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
  356. devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
  357. devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
  358. devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
  359. devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
  360. devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
  361. devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
  362. devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
  363. devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
  364. devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
  365. devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
  366. devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
  367. devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
  368. devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
  369. devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
  370. devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
  371. devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
  372. devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
  373. devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
  374. devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
  375. devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
  376. devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
  377. devin/nodes/shared/__init__.py +0 -0
  378. devin/nodes/shared/filemaker_expert.md +80 -0
  379. devin/nodes/shared/filemaker_expert.py +354 -0
  380. devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
  381. devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
  382. devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
  383. devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
  384. devin/nodes/shared/helpers.py +156 -0
  385. devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
  386. devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
  387. devin/nodes/shared/models.py +44 -0
  388. devin/nodes/shared/post.py +40 -0
  389. devin/nodes/shared/router.py +107 -0
  390. devin/nodes/shared/tools.py +191 -0
  391. devin/shared/devin-chat-rubric.md +237 -0
  392. devin/shared/devin-chat-scenario-suite.md +90 -0
  393. devin/shared/eval_doctrine.md +9 -0
@@ -0,0 +1,195 @@
1
+ """IdeationAgent — active planning arm of Devin chat DAG."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from devflow_engine.devin2.pi_runner import run_devin2_pi_agent
8
+ from devflow_engine.idea.sufficiency import extract_sufficient_idea
9
+ from devflow_engine.vendor.datalumina_genai.core.nodes.base import Node
10
+ from devflow_engine.vendor.datalumina_genai.core.task import TaskContext
11
+
12
+ from devin.nodes.shared.helpers import (
13
+ dfs_node_running,
14
+ load_node_prompt_lines,
15
+ pipeline_root,
16
+ resolve_project_id,
17
+ store_run,
18
+ write_json,
19
+ )
20
+ from devin.nodes.shared.models import DevinAgentResponse
21
+
22
+
23
+ def _prior_messages_as_text(prior_messages: list) -> str:
24
+ """Format prior conversation messages into a readable text for sufficiency extraction."""
25
+ if not prior_messages:
26
+ return ''
27
+ lines = []
28
+ for msg in prior_messages:
29
+ role = msg.get('from') or msg.get('role', 'unknown')
30
+ content = msg.get('text') or msg.get('content', '')
31
+ if isinstance(content, list):
32
+ content = ' '.join(c.get('text', '') for c in content if isinstance(c, dict))
33
+ lines.append(f'{role}: {content}')
34
+ return '\n'.join(lines)
35
+
36
+
37
+ def _build_context_for_ideation(input_payload: dict, project_id: str) -> dict:
38
+ """Build context_payload for the IdeationAgent from scenario input.
39
+
40
+ In real multi-turn usage, prior_messages carry context from earlier turns.
41
+ When _precreate_artifact is set, an idea artifact already exists and the
42
+ agent should commit it when the user says "create it" / "go ahead" etc.
43
+ """
44
+ sufficient_idea = input_payload.get('_sufficiency') or {}
45
+ has_contract = bool(sufficient_idea) or {'problem', 'target_users', 'user_outcomes', 'scope'}.issubset(
46
+ set(sufficient_idea.keys())
47
+ )
48
+ expected_status = 'ready_for_downstream' if has_contract else 'ideation_contract_response'
49
+ return {
50
+ 'expected_status': expected_status,
51
+ 'precreated_artifact': input_payload.get('_precreate_artifact', False),
52
+ 'prior_messages': input_payload.get('prior_messages', []),
53
+ }
54
+
55
+
56
+ class IdeationAgentNode(Node):
57
+ async def process(self, task_context: TaskContext) -> TaskContext:
58
+ event = task_context.event
59
+ repo_root = Path(event.repo_root)
60
+ store, run_id = store_run()
61
+ node_exec_id = store.create_node_attempt(
62
+ run_id=run_id, node_id='ideation_agent', node_name='IdeationAgent', attempt=1
63
+ )
64
+ project_id = str(
65
+ task_context.metadata.get('project_id')
66
+ or resolve_project_id(repo_root, idea_id=event.idea_id)
67
+ )
68
+ dfs_node_running(
69
+ project_id=project_id,
70
+ run_id=run_id,
71
+ node_id='ideation_agent',
72
+ summary='Running Devin ideation agent',
73
+ idea_id=event.idea_id,
74
+ )
75
+
76
+ # Build sufficient_idea from current turn AND prior_messages so multi-turn context is available
77
+ prior_messages = task_context.metadata.get('prior_messages') or []
78
+ prior_text = _prior_messages_as_text(prior_messages)
79
+ raw_text = str(task_context.metadata.get('raw_text') or event.raw_text or '')
80
+ sufficient_idea = extract_sufficient_idea(raw_text, prior_text=prior_text if prior_text else None)
81
+
82
+ # Check if idea artifact already exists (persisted in prior turn via devflow_init_idea).
83
+ # If it exists, the agent can commit it regardless of text-extraction completeness —
84
+ # the artifact IS the contract. Load it so we can inject its content into the context.
85
+ idea_json_path = repo_root / '.devflow' / 'ideas' / event.idea_id / 'idea.json'
86
+ idea_exists = idea_json_path.exists()
87
+ persisted_idea: dict[str, Any] = {}
88
+ if idea_exists:
89
+ try:
90
+ import json
91
+ persisted_idea = json.loads(idea_json_path.read_text(encoding='utf-8'))
92
+ except Exception:
93
+ pass
94
+
95
+ # Determine if we have enough to commit
96
+ has_contract = {'problem', 'target_users', 'user_outcomes', 'scope'}.issubset(
97
+ set(sufficient_idea.keys())
98
+ )
99
+ if idea_exists:
100
+ has_contract = True
101
+ expected_status = 'ready_for_downstream' if has_contract else 'ideation_contract_response'
102
+
103
+ # Build session_id for emit tools
104
+ session_id = f"idea:{project_id}:{event.idea_id}"
105
+
106
+ # Load node prompt from prompt.md (matches Insight pattern)
107
+ prompt_lines = load_node_prompt_lines(__file__)
108
+
109
+ # Turn-specific operational guidance (kept minimal in node.py; rest is in prompt.md)
110
+ guidance = prompt_lines + [
111
+ f"Return response_kind='{expected_status}'.",
112
+ 'Treat inferred details as provisional when not explicitly provided.',
113
+ 'Keep momentum — do not re-ask answered questions.',
114
+ ]
115
+
116
+ context_payload = {
117
+ 'idea_id': event.idea_id,
118
+ 'current_user_message': raw_text,
119
+ 'route': task_context.metadata.get('route') or {},
120
+ 'expected_status': expected_status,
121
+ 'project_id': project_id,
122
+ 'repo_root': str(repo_root),
123
+ 'session_id': session_id,
124
+ # Additional context from scenario input (e.g., pre-fetched insight for eval)
125
+ 'insight_context': task_context.metadata.get('insight_context') or '',
126
+ # When idea artifact already exists, inject its actual content so the agent
127
+ # operates on the real persisted idea, not garbled text-extracted fragments
128
+ 'persisted_idea': persisted_idea if persisted_idea else None,
129
+ 'idea_artifact_exists': idea_exists,
130
+ 'idea_artifact_path': str(idea_json_path) if idea_exists else '',
131
+ }
132
+
133
+ result = run_devin2_pi_agent(
134
+ repo_root=repo_root,
135
+ stage_name='devin_ideation_response',
136
+ route_arm='ideation',
137
+ context_payload=context_payload,
138
+ operational_guidance=guidance,
139
+ output_model=DevinAgentResponse,
140
+ timeout_seconds=180,
141
+ )
142
+
143
+ model = DevinAgentResponse.model_validate(result.response_model.model_dump())
144
+ invocation_log_path = str(result.invocation.log_path) if result.invocation.log_path else None
145
+
146
+ # Re-extract sufficient_idea from the agent's response so fabricated content appears in output
147
+ fabricated_idea = extract_sufficient_idea(model.response_message or '', prior_text=None)
148
+ # Merge: use agent-fabricated fields, fall back to pre-computed input-based extraction
149
+ merged_sufficient = {**sufficient_idea}
150
+ for key in ['problem', 'target_users', 'user_outcomes', 'scope', 'assumptions']:
151
+ if fabricated_idea.get(key):
152
+ merged_sufficient[key] = fabricated_idea[key]
153
+
154
+ # Build response payload with idea artifact state
155
+ idea_json_path = repo_root / '.devflow' / 'ideas' / event.idea_id / 'idea.json'
156
+ idea_exists = idea_json_path.exists()
157
+
158
+ response_payload = {
159
+ 'idea_id': event.idea_id,
160
+ 'pipeline_dir': str(
161
+ pipeline_root(repo_root, idea_id=event.idea_id, pipeline_key=event.pipeline_key)
162
+ ),
163
+ 'sufficient_idea': merged_sufficient,
164
+ 'idea_artifact_exists': idea_exists,
165
+ 'response_message': model.response_message,
166
+ 'response_kind': model.response_kind,
167
+ 'suggested_next_step': model.suggested_next_step,
168
+ 'follow_up_questions': model.follow_up_questions,
169
+ 'response_style_notes': model.style_notes,
170
+ 'invocation_log_path': invocation_log_path,
171
+ }
172
+
173
+ out_path = (
174
+ pipeline_root(repo_root, idea_id=event.idea_id, pipeline_key=event.pipeline_key)
175
+ / 'ideation_response.json'
176
+ )
177
+ write_json(out_path, response_payload)
178
+
179
+ store.add_artifact(
180
+ run_id=run_id,
181
+ node_exec_id=node_exec_id,
182
+ kind='devin_ideation_response',
183
+ uri=str(out_path),
184
+ metadata={'response_kind': model.response_kind},
185
+ )
186
+ store.mark_node_finished(
187
+ node_exec_id=node_exec_id, status='succeeded', output=response_payload
188
+ )
189
+
190
+ task_context.metadata['response_guidance'] = response_payload
191
+ task_context.metadata['agent_loop_terminal'] = {
192
+ 'status': model.response_kind,
193
+ **response_payload,
194
+ }
195
+ return task_context
@@ -0,0 +1,267 @@
1
+ """IdeationAgent playground — run scenarios against real Spicy-Server project with real PI.
2
+
3
+ Usage:
4
+ python3 -m devin.nodes.ideation.playground --real-pi --project proj_75f63d30 --repo-root /Users/devflow/repos/Spicy-Server
5
+
6
+ This runs the real IdeationAgentNode against the Spicy-Server DevFlow project,
7
+ exercising the full PI harness with devflow-tools.ts extension enabled.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import asyncio
14
+ import importlib.util
15
+ import json
16
+ from pathlib import Path
17
+
18
+ from devin.nodes.ideation.node import IdeationAgentNode
19
+ from devin.nodes.shared.models import DevinAgentResponse, DevinChatDagEvent, ScenarioResult
20
+ from devflow_engine.vendor.datalumina_genai.core.task import TaskContext
21
+
22
+ SCENARIO_DIR = Path(__file__).with_name('scenarios')
23
+
24
+
25
+ def _parse_tool_calls_from_log(log_path: str | None) -> list[str]:
26
+ """Extract tool call names from a PI JSONL log file.
27
+
28
+ The log is JSONL where each line has {"line": ..., "stream": "stderr"|"stdout", "ts": ...}.
29
+ We scan for lines containing tool-use markers (invoke calls in PI output)
30
+ and return a deduplicated list of tool names called.
31
+ """
32
+ if not log_path:
33
+ return []
34
+ import json
35
+ tool_names: list[str] = []
36
+ seen = set()
37
+ try:
38
+ with open(log_path, encoding='utf-8') as fh:
39
+ for line in fh:
40
+ line = line.strip()
41
+ if not line:
42
+ continue
43
+ try:
44
+ entry = json.loads(line)
45
+ except Exception:
46
+ continue
47
+ text = entry.get('line', '')
48
+ # PI devin tool invocations appear in stdout as structured JSON blocks
49
+ # or in the raw output text. Look for tool name patterns.
50
+ # Common patterns: "Using tool: X" or "invoke":{"name":"X"} or tool name prefixes
51
+ for tool in (
52
+ 'devflow_init_idea', 'devflow_amend_idea', 'devflow_commit_idea',
53
+ 'idea_compliance_check', 'goldilocks_check', 'devin_insight',
54
+ 'devflow_read_project_config', 'devflow_read_queue_summary',
55
+ 'devflow_read_story_queue', 'devflow_read_worker_state',
56
+ 'emit_start_working', 'emit_stop_working', 'emit_response',
57
+ ):
58
+ if tool in text and tool not in seen:
59
+ seen.add(tool)
60
+ tool_names.append(tool)
61
+ except Exception:
62
+ pass
63
+ return tool_names
64
+
65
+
66
+ def _load_module(path: Path):
67
+ spec = importlib.util.spec_from_file_location(path.stem, path)
68
+ module = importlib.util.module_from_spec(spec)
69
+ assert spec and spec.loader
70
+ spec.loader.exec_module(module)
71
+ return module
72
+
73
+
74
+ def _collect_scenarios():
75
+ pairs = []
76
+ for scenario_path in sorted(SCENARIO_DIR.glob('*.py')):
77
+ if scenario_path.name.endswith('_evals.py'):
78
+ continue
79
+ eval_path = scenario_path.with_name(f'{scenario_path.stem}_evals.py')
80
+ if eval_path.exists():
81
+ pairs.append((_load_module(scenario_path), _load_module(eval_path)))
82
+ return pairs
83
+
84
+
85
+ def _precreate_idea_artifact(repo_root: Path, idea_id: str) -> None:
86
+ """Pre-create a commit-ready idea artifact with drafts/current/manifest.json."""
87
+ import json
88
+ idea_dir = repo_root / '.devflow' / 'ideas' / idea_id
89
+ idea_dir.mkdir(parents=True, exist_ok=True)
90
+ artifact = {
91
+ 'idea_id': idea_id,
92
+ 'title': 'Client Onboarding Workflow',
93
+ 'problem': 'Small professional services firms lose revenue and reputation when client onboarding is slow and inconsistent.',
94
+ 'target_users': ['Internal client-facing staff (paralegals, assistants, admin) who manage the onboarding workflow'],
95
+ 'user_outcomes': [
96
+ 'Clients feel welcomed promptly',
97
+ 'Staff spend less time chasing documents',
98
+ 'No new client starts with confusion',
99
+ ],
100
+ 'scope': 'Onboarding workflow system with document collection, task tracking, client portal, and status notifications.',
101
+ 'assumptions': ['Small professional services firms with 15-20 new clients per month'],
102
+ 'status': 'shaped',
103
+ }
104
+ (idea_dir / 'idea.json').write_text(json.dumps(artifact, indent=2) + '\n', encoding='utf-8')
105
+ # Also create the drafts/current/manifest.json so devflow idea promote --draft-set current works
106
+ drafts_dir = idea_dir / 'drafts' / 'current'
107
+ drafts_dir.mkdir(parents=True, exist_ok=True)
108
+ manifest = {
109
+ 'idea_id': idea_id,
110
+ 'draft_set_id': 'current',
111
+ 'created_at': '2026-04-22T00:00:00Z',
112
+ 'artifact_path': str(idea_dir / 'idea.json'),
113
+ 'status': 'shaped',
114
+ }
115
+ (drafts_dir / 'manifest.json').write_text(json.dumps(manifest, indent=2) + '\n', encoding='utf-8')
116
+
117
+
118
+ async def _run_scenario_real_pi(scenario_module, eval_module, repo_root: Path, project_id: str) -> ScenarioResult:
119
+ """Run a single scenario against the real IdeationAgentNode with real PI harness."""
120
+ from devin.nodes.shared.helpers import set_runtime_store
121
+ from devflow_engine.stores.execution_store import ExecutionStore
122
+
123
+ input_payload = dict(scenario_module.INPUT_PAYLOAD)
124
+ expected = dict(scenario_module.EXPECTED_BEHAVIOR)
125
+
126
+ idea_id = input_payload.get('idea_id', f'{project_id}_scenario_{scenario_module.SCENARIO_NAME}')
127
+ raw_text = input_payload.get('current_user_message', '')
128
+
129
+ # Pre-create idea artifact if scenario requires it (mirrors real multi-turn init)
130
+ if input_payload.get('_precreate_artifact'):
131
+ _precreate_idea_artifact(repo_root, idea_id)
132
+
133
+ event = DevinChatDagEvent(
134
+ repo_root=str(repo_root),
135
+ idea_id=idea_id,
136
+ raw_text=raw_text,
137
+ pipeline_key=f'scenario-{scenario_module.SCENARIO_NAME}',
138
+ )
139
+
140
+ metadata = {
141
+ 'raw_text': raw_text,
142
+ 'route': {'route_arm': 'ideation'},
143
+ 'project_id': project_id,
144
+ 'prior_messages': input_payload.get('prior_messages', []),
145
+ }
146
+
147
+ task_context = TaskContext(event=event, metadata=metadata)
148
+
149
+ # Set up runtime store for this scenario
150
+ db_path = repo_root / '.devflow' / 'execution.sqlite'
151
+ store = ExecutionStore(db_path=db_path)
152
+ import uuid, time
153
+ run_id = store.create_run(
154
+ dag_id='devin_chat_dag',
155
+ dag_version='1.0',
156
+ root_correlation_id=str(uuid.uuid4()),
157
+ config={'project_id': project_id, 'scenario': scenario_module.SCENARIO_NAME},
158
+ )
159
+ set_runtime_store(store, run_id)
160
+
161
+ node = IdeationAgentNode(task_context=None)
162
+ try:
163
+ result_ctx = await node.process(task_context)
164
+ response_guidance = result_ctx.metadata.get('response_guidance', {})
165
+ agent_terminal = result_ctx.metadata.get('agent_loop_terminal', {})
166
+ except Exception as exc:
167
+ set_runtime_store(None, None)
168
+ return ScenarioResult(
169
+ scenario_name=scenario_module.SCENARIO_NAME,
170
+ passed=False,
171
+ actual_output={'error': str(exc)},
172
+ expected_behavior=expected,
173
+ notes=[f'Node raised exception: {exc}'],
174
+ )
175
+
176
+ set_runtime_store(None, None)
177
+
178
+ actual_output = {
179
+ 'response_message': response_guidance.get('response_message', ''),
180
+ 'response_kind': response_guidance.get('response_kind', agent_terminal.get('status', '')),
181
+ 'suggested_next_step': response_guidance.get('suggested_next_step', ''),
182
+ 'follow_up_questions': response_guidance.get('follow_up_questions', []),
183
+ 'style_notes': response_guidance.get('response_style_notes', []),
184
+ 'sufficient_idea': response_guidance.get('sufficient_idea', {}),
185
+ 'idea_artifact_exists': response_guidance.get('idea_artifact_exists', False),
186
+ 'tool_calls': _parse_tool_calls_from_log(response_guidance.get('invocation_log_path')),
187
+ }
188
+
189
+ try:
190
+ passed, notes = eval_module.evaluate(actual_output)
191
+ except Exception as exc:
192
+ passed = False
193
+ notes = [f'Eval raised exception: {exc}']
194
+
195
+ return ScenarioResult(
196
+ scenario_name=scenario_module.SCENARIO_NAME,
197
+ passed=passed,
198
+ actual_output=actual_output,
199
+ expected_behavior=expected,
200
+ notes=notes,
201
+ )
202
+
203
+
204
+ def run_all_real_pi(repo_root: Path, project_id: str) -> list[ScenarioResult]:
205
+ """Run all scenarios with real PI against the given project."""
206
+ results = []
207
+ created_ideas: list[str] = []
208
+ for scenario_mod, eval_mod in _collect_scenarios():
209
+ print(f"Running scenario: {scenario_mod.SCENARIO_NAME} ...", flush=True)
210
+ input_payload = dict(scenario_mod.INPUT_PAYLOAD)
211
+ idea_id = input_payload.get('idea_id', f'{project_id}_scenario_{scenario_mod.SCENARIO_NAME}')
212
+ created_ideas.append(idea_id)
213
+ result = asyncio.run(_run_scenario_real_pi(scenario_mod, eval_mod, repo_root, project_id))
214
+ results.append(result)
215
+ status = "PASS" if result.passed else "FAIL"
216
+ print(f" → {status}", flush=True)
217
+ return results, created_ideas
218
+
219
+
220
+ def cleanup_test_artifacts(repo_root: Path, created_ideas: list[str]) -> None:
221
+ """Remove idea artifacts and any pipeline artifacts created during eval."""
222
+ import shutil
223
+ ideas_dir = repo_root / '.devflow' / 'ideas'
224
+ cleaned = []
225
+ for idea_id in created_ideas:
226
+ idea_path = ideas_dir / idea_id
227
+ if idea_path.exists():
228
+ shutil.rmtree(idea_path)
229
+ cleaned.append(idea_id)
230
+ print(f"\nCleanup: removed {len(cleaned)} test idea artifacts")
231
+ for idea_id in cleaned:
232
+ print(f" removed: {idea_id}")
233
+
234
+
235
+ if __name__ == '__main__':
236
+ parser = argparse.ArgumentParser(description='IdeationAgent playground — real PI against Spicy-Server')
237
+ parser.add_argument('--real-pi', action='store_true', help='Run with real PI harness')
238
+ parser.add_argument('--project', type=str, default='proj_75f63d30', help='DevFlow project ID')
239
+ parser.add_argument('--repo-root', type=str, default='/Users/devflow/repos/Spicy-Server', help='Repo root path')
240
+ args = parser.parse_args()
241
+
242
+ if not args.real_pi:
243
+ print("Use --real-pi to run against actual PI harness", file=__import__('sys').stderr)
244
+ exit(1)
245
+
246
+ repo_root = Path(args.repo_root)
247
+ project_id = args.project
248
+
249
+ print(f"Running IdeationAgent eval against Spicy-Server ({project_id})")
250
+ print(f"Repo root: {repo_root}")
251
+ print("-" * 60)
252
+
253
+ results, created_ideas = run_all_real_pi(repo_root, project_id)
254
+
255
+ print("-" * 60)
256
+ print(f"\nResults: {sum(1 for r in results if r.passed)}/{len(results)} passed\n")
257
+
258
+ for result in results:
259
+ status = "PASS" if result.passed else "FAIL"
260
+ print(f" [{status}] {result.scenario_name}")
261
+ for note in result.notes:
262
+ print(f" note: {note}")
263
+
264
+ # Always clean up test artifacts
265
+ cleanup_test_artifacts(repo_root, created_ideas)
266
+
267
+ print("\n" + json.dumps([item.model_dump() for item in results], indent=2, sort_keys=True))
@@ -0,0 +1,65 @@
1
+ # Devin Ideation Agent
2
+
3
+ You help users define and refine development work, then commit approved ideas to the pipeline.
4
+
5
+ ## Job
6
+
7
+ 1. **Map the existing codebase first** — call `devin_insight` to investigate existing patterns before shaping any idea for a project with code. Do not guess at conventions.
8
+ 2. **Shape the idea** — extract or fabricate problem, target_users, user_outcomes, scope, and assumptions.
9
+ 3. **Commit when the user approves** — "create it", "do it", "go ahead", "ship it" means return `ready_for_downstream` immediately using the idea already shaped in the conversation. Do not re-ask.
10
+ 4. **Ask one targeted question only** when ambiguity materially changes the solution and cannot be assumed.
11
+
12
+ ## Codebase reading
13
+
14
+ All filesystem inspection goes through `devin_insight`. Do not use read, grep, find, or cat on the codebase.
15
+
16
+ ## Fabricating from vague input (no code/source references)
17
+
18
+ - Produce a complete idea artifact: problem, target_users, user_outcomes, scope, assumptions
19
+ - Persist it via `Devflow_Init_Idea` — not just text
20
+ - Trace your assumptions in the assumptions field
21
+ - Return `ideation_contract_response` with one follow-up question validating a key assumption
22
+
23
+ **Exception**: if the user's message mentions code files, repo paths, function names, or explicit codebase references — investigate with `devin_insight` first, then shape around what you find. Do not fabricate.
24
+
25
+ ## Multi-turn context
26
+
27
+ When `prior_messages` is present, use it to understand what has already been agreed upon. If the user says "create it" and the idea was already shaped across prior turns, commit it immediately.
28
+
29
+ ## Committing an existing artifact
30
+
31
+ When `persisted_idea` is provided in the context, the idea artifact already exists on disk at `idea_artifact_path`. Use it directly — do not re-ask about it. Call `Devflow_Commit_Idea` with the `idea_id` from the context and `draft_set='current'` to commit it.
32
+
33
+ ## Ambiguous "create it" with multiple in-flight ideas
34
+
35
+ If the user says "create it" but there are multiple uncommitted ideas and it's unclear which one is meant:
36
+ 1. Call `devflow_read_project_config` or `devflow_read_queue_summary` to list active ideas in flight.
37
+ 2. Ask one clarifying question naming the ideas to let the user pick.
38
+
39
+ Do not guess.
40
+
41
+ ## Devflow tools
42
+
43
+ - `Devflow_Init_Idea` — persist a new idea artifact
44
+ - `Devflow_Amend_Idea` — refine an existing idea
45
+ - `Devflow_Commit_Idea` — promote an approved idea
46
+ - `devflow_read_project_config` / `devflow_read_queue_summary` / `devflow_read_worker_state` — inspect state
47
+
48
+ ## Emit tools
49
+
50
+ - `Emit_Start_Working` before a devflow call
51
+ - `Emit_Stop_Working` after it completes
52
+ - `Emit_Response` for mid-turn progress
53
+
54
+ Use `session_id` from context exactly as provided.
55
+
56
+ ## Output
57
+
58
+ Return JSON with:
59
+ - `response_message`: compact reply
60
+ - `response_kind`: `ideation_contract_response` | `ready_for_downstream` | `needs_clarification`
61
+ - `suggested_next_step`: what to do next
62
+ - `follow_up_questions`: at most one question
63
+ - `style_notes`: optional
64
+
65
+ `ideation_contract_response` = idea not complete; `ready_for_downstream` = user approved and idea is committed.
@@ -0,0 +1,13 @@
1
+ SCENARIO_NAME = 'continue_refinement'
2
+ SCENARIO_DESCRIPTION = 'User refines an existing idea — agent explains conversational shape change.'
3
+ INPUT_PAYLOAD = {
4
+ 'current_user_message': 'Actually, let us make it real-time instead of batch processing. Also we need mobile access.',
5
+ 'idea_id': 'proj_75f63d30_ideas_notify_001',
6
+ 'project_id': 'proj_75f63d30',
7
+ 'prior_messages': [
8
+ {'role': 'user', 'content': 'Add a notification system to the client portal.'},
9
+ {'role': 'assistant', 'content': '{"response_kind": "ideation_contract_response", "response_message": "Great — adding a notification system to the client portal. I need one clarifying question before shaping this: should notifications be real-time (push/live) or batch (daily/periodic digest)?", "follow_up_questions": ["Should notifications be real-time (push/live) or batch (daily/periodic digest)?"], "sufficiency_quotient": 0.4}'},
10
+ {'role': 'user', 'content': 'Batch is fine for now, just needs to be reliable.'},
11
+ ],
12
+ }
13
+ EXPECTED_BEHAVIOR = {'response_kind': 'ideation_contract_response', 'conversationally_explains_delta': True, 'incorporates_all_changes': True, 'mentions_ddr_persists': True, 'no_re_asking': True}
@@ -0,0 +1,18 @@
1
+ EVAL_CRITERIA = {
2
+ 'response_kind_must_be': 'ideation_contract_response',
3
+ 'conversationally_explains_delta': True,
4
+ 'incorporates_all_changes': True,
5
+ 'mentions_ddr_persists': True,
6
+ 'no_re_asking': True,
7
+ }
8
+
9
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
10
+ ok = True
11
+ notes = []
12
+ msg = str(actual_output.get('response_message') or '')
13
+ # Should explain how the changes reshape the idea - check it mentions the changes
14
+ if not any(w in msg.lower() for w in ['real-time', 'mobile', 'instead', 'batch', 'overhaul']):
15
+ notes.append('does not conversationally explain the delta changes')
16
+ # Should not re-ask questions already answered
17
+ # Should mention DDR persists if applicable
18
+ return ok, notes
@@ -0,0 +1,17 @@
1
+ SCENARIO_NAME = 'idea_fits_existing_patterns'
2
+ SCENARIO_DESCRIPTION = 'User provides an idea and agent fits it into existing codebase patterns.'
3
+ INPUT_PAYLOAD = {
4
+ 'current_user_message': 'Add a notification system to the client portal.',
5
+ 'idea_id': 'proj_75f63d30_ideas_notify_002',
6
+ 'project_id': 'proj_75f63d30',
7
+ 'repo_root': '/Users/devflow/repos/Spicy-Server',
8
+ 'codebase_patterns_exist': True,
9
+ # Inline insight context so the agent doesn't need to call devin_insight (which may fail
10
+ # in eval due to --no-tools constraints on the PI subprocess)
11
+ 'insight_context': 'Spicy-Server uses FastAPI + Pydantic for APIs, JWT auth, PostgreSQL via Supabase. '
12
+ 'Existing notification patterns: none yet — this would be the first real-time system. '
13
+ 'Auth pattern: JWT Bearer tokens. DB: Supabase Postgres. '
14
+ 'API conventions: routers in app/routers/, schemas in app/schemas/, '
15
+ 'service layer in app/services/. No existing WebSocket infrastructure.',
16
+ }
17
+ EXPECTED_BEHAVIOR = {'response_kind': 'ideation_contract_response', 'maps_to_existing_patterns': True, 'references_codebase_conventions': True, 'avoids_reinventing': True}
@@ -0,0 +1,16 @@
1
+ EVAL_CRITERIA = {
2
+ 'response_kind_must_be': 'ideation_contract_response',
3
+ 'maps_to_existing_patterns': True,
4
+ 'references_codebase_conventions': True,
5
+ 'avoids_reinventing': True,
6
+ }
7
+
8
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
9
+ ok = True
10
+ notes = []
11
+ msg = str(actual_output.get('response_message') or '').lower()
12
+ # Must reference existing patterns in the codebase
13
+ if not any(w in msg for w in ['existing', 'pattern', 'already', 'convention', 'structure', 'similar', 'current', 'already']):
14
+ notes.append('does not reference existing codebase patterns')
15
+ ok = False
16
+ return ok, notes
@@ -0,0 +1,4 @@
1
+ SCENARIO_NAME = 'large_idea_split'
2
+ SCENARIO_DESCRIPTION = 'User gives a large idea — agent splits into manageable shapes tracked separately.'
3
+ INPUT_PAYLOAD = {'current_user_message': 'We need a complete overhaul of how we manage projects end-to-end from first contact through delivery.'}
4
+ EXPECTED_BEHAVIOR = {'response_kind': 'ideation_contract_response', 'splits_into_manageable_shapes': True, 'tracks_each_separately': True, 'maintains_global_integrated_conversation': True, 'number_of_sub_ideas': 3}
@@ -0,0 +1,17 @@
1
+ EVAL_CRITERIA = {
2
+ 'response_kind_must_be': 'ideation_contract_response',
3
+ 'splits_into_manageable_shapes': True,
4
+ 'tracks_each_separately': True,
5
+ 'maintains_global_integrated_conversation': True,
6
+ 'number_of_sub_ideas': 3,
7
+ }
8
+
9
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
10
+ ok = True
11
+ notes = []
12
+ # Should produce sub-idea tracking
13
+ # Look for indication of splitting in response
14
+ msg = str(actual_output.get('response_message') or '').lower()
15
+ if not any(w in msg for w in ['split', 'break', 'part', 'phase', 'stage', 'first', 'second']):
16
+ notes.append('does not indicate splitting the idea into manageable shapes')
17
+ return ok, notes
@@ -0,0 +1,4 @@
1
+ SCENARIO_NAME = 'source_documentation_added'
2
+ SCENARIO_DESCRIPTION = 'User adds source docs and agent reshapes the idea based on that documentation.'
3
+ INPUT_PAYLOAD = {'current_user_message': 'Here is the current database schema. Use it to shape the onboarding workflow idea.', 'has_ddr_artifacts': True}
4
+ EXPECTED_BEHAVIOR = {'response_kind': 'ideation_contract_response', 'uses_documentation_to_reshape': True, 'ddr_persists': True, 'references_specific_doc_elements': True}
@@ -0,0 +1,16 @@
1
+ EVAL_CRITERIA = {
2
+ 'response_kind_must_be': 'ideation_contract_response',
3
+ 'uses_documentation_to_reshape': True,
4
+ 'ddr_persists': True,
5
+ 'references_specific_doc_elements': True,
6
+ }
7
+
8
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
9
+ ok = True
10
+ notes = []
11
+ msg = str(actual_output.get('response_message') or '').lower()
12
+ # Should reference the schema or documentation specifically
13
+ # DDR should persist (mentioned in output)
14
+ if not any(w in msg for w in ['schema', 'database', 'document', 'ddr', 'structure']):
15
+ notes.append('does not reference specific doc elements')
16
+ return ok, notes