devflow-engine 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. devflow_engine/__init__.py +3 -0
  2. devflow_engine/agentic_prompts.py +100 -0
  3. devflow_engine/agentic_runtime.py +398 -0
  4. devflow_engine/api_key_flow_harness.py +539 -0
  5. devflow_engine/api_keys.py +357 -0
  6. devflow_engine/bootstrap/__init__.py +2 -0
  7. devflow_engine/bootstrap/provision_from_template.py +84 -0
  8. devflow_engine/cli/__init__.py +0 -0
  9. devflow_engine/cli/app.py +7270 -0
  10. devflow_engine/core/__init__.py +0 -0
  11. devflow_engine/core/config.py +86 -0
  12. devflow_engine/core/logging.py +29 -0
  13. devflow_engine/core/paths.py +45 -0
  14. devflow_engine/core/toml_kv.py +33 -0
  15. devflow_engine/devflow_event_worker.py +1292 -0
  16. devflow_engine/devflow_state.py +201 -0
  17. devflow_engine/devin2/__init__.py +9 -0
  18. devflow_engine/devin2/agent_definition.py +120 -0
  19. devflow_engine/devin2/pi_runner.py +204 -0
  20. devflow_engine/devin_orchestration.py +69 -0
  21. devflow_engine/docs/prompts/anti-patterns.md +42 -0
  22. devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
  23. devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
  24. devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
  25. devflow_engine/doctor/__init__.py +2 -0
  26. devflow_engine/doctor/triage.py +140 -0
  27. devflow_engine/error/__init__.py +0 -0
  28. devflow_engine/error/remediation.py +21 -0
  29. devflow_engine/errors/error_solver_dag.py +522 -0
  30. devflow_engine/errors/runtime_observability.py +67 -0
  31. devflow_engine/idea/__init__.py +4 -0
  32. devflow_engine/idea/actors.py +481 -0
  33. devflow_engine/idea/agentic.py +465 -0
  34. devflow_engine/idea/analyze.py +93 -0
  35. devflow_engine/idea/devin_chat_dag.py +1 -0
  36. devflow_engine/idea/diff.py +99 -0
  37. devflow_engine/idea/drafts.py +446 -0
  38. devflow_engine/idea/idea_creation_dag.py +643 -0
  39. devflow_engine/idea/ideation_enrichment.py +355 -0
  40. devflow_engine/idea/ideation_enrichment_worker.py +19 -0
  41. devflow_engine/idea/paths.py +28 -0
  42. devflow_engine/idea/promote.py +53 -0
  43. devflow_engine/idea/redaction.py +27 -0
  44. devflow_engine/idea/repo_tools.py +1277 -0
  45. devflow_engine/idea/response_mode.py +30 -0
  46. devflow_engine/idea/story_pipeline.py +1585 -0
  47. devflow_engine/idea/sufficiency.py +376 -0
  48. devflow_engine/idea/traditional_stories.py +1257 -0
  49. devflow_engine/implementation/__init__.py +0 -0
  50. devflow_engine/implementation/alembic_preflight.py +700 -0
  51. devflow_engine/implementation/dag.py +8450 -0
  52. devflow_engine/implementation/green_gate.py +93 -0
  53. devflow_engine/implementation/prompts.py +108 -0
  54. devflow_engine/implementation/test_runtime.py +623 -0
  55. devflow_engine/integration/__init__.py +19 -0
  56. devflow_engine/integration/agentic.py +66 -0
  57. devflow_engine/integration/dag.py +3539 -0
  58. devflow_engine/integration/prompts.py +114 -0
  59. devflow_engine/integration/supabase_schema.sql +31 -0
  60. devflow_engine/integration/supabase_sync.py +177 -0
  61. devflow_engine/llm/__init__.py +1 -0
  62. devflow_engine/llm/cli_one_shot.py +84 -0
  63. devflow_engine/llm/cli_stream.py +371 -0
  64. devflow_engine/llm/execution_context.py +26 -0
  65. devflow_engine/llm/invoke.py +1322 -0
  66. devflow_engine/llm/provider_api.py +304 -0
  67. devflow_engine/llm/repo_knowledge.py +588 -0
  68. devflow_engine/llm_primitives.py +315 -0
  69. devflow_engine/orchestration.py +62 -0
  70. devflow_engine/planning/__init__.py +0 -0
  71. devflow_engine/planning/analyze_repo.py +92 -0
  72. devflow_engine/planning/render_drafts.py +133 -0
  73. devflow_engine/playground/__init__.py +0 -0
  74. devflow_engine/playground/hooks.py +26 -0
  75. devflow_engine/playwright_workflow/__init__.py +5 -0
  76. devflow_engine/playwright_workflow/dag.py +1317 -0
  77. devflow_engine/process/__init__.py +5 -0
  78. devflow_engine/process/dag.py +59 -0
  79. devflow_engine/project_registration/__init__.py +3 -0
  80. devflow_engine/project_registration/dag.py +1581 -0
  81. devflow_engine/project_registry.py +109 -0
  82. devflow_engine/prompts/devin/generic/prompt.md +6 -0
  83. devflow_engine/prompts/devin/ideation/prompt.md +263 -0
  84. devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
  85. devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
  86. devflow_engine/prompts/devin/insight/prompt.md +11 -0
  87. devflow_engine/prompts/devin/insight/scenarios.md +5 -0
  88. devflow_engine/prompts/devin/intake/prompt.md +15 -0
  89. devflow_engine/prompts/devin/iterate/prompt.md +12 -0
  90. devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
  91. devflow_engine/prompts/devin/shared/principles.md +246 -0
  92. devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
  93. devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
  94. devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
  95. devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
  96. devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
  97. devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
  98. devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
  99. devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
  100. devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
  101. devflow_engine/prompts/implementation/red/prompt.md +27 -0
  102. devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
  103. devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
  104. devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
  105. devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
  106. devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
  107. devflow_engine/prompts/integration/README.md +185 -0
  108. devflow_engine/prompts/integration/green/example.md +67 -0
  109. devflow_engine/prompts/integration/green/green/prompt.md +10 -0
  110. devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
  111. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
  112. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
  113. devflow_engine/prompts/integration/green_enrich/example.md +79 -0
  114. devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
  115. devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
  116. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
  117. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  118. devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
  119. devflow_engine/prompts/integration/red/example.md +152 -0
  120. devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
  121. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  122. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
  123. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
  124. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
  125. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  126. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
  127. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
  128. devflow_engine/prompts/integration/red/red/prompt.md +11 -0
  129. devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
  130. devflow_engine/prompts/integration/red_review/example.md +71 -0
  131. devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
  132. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  133. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
  134. devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
  135. devflow_engine/prompts/integration/resolve/example.md +111 -0
  136. devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
  137. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
  138. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
  139. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
  140. devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
  141. devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
  142. devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
  143. devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
  144. devflow_engine/prompts/integration/validate/example.md +143 -0
  145. devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
  146. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  147. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
  148. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
  149. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
  150. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  151. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
  152. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
  153. devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
  154. devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
  155. devflow_engine/prompts/integration/write_workflows/example.md +100 -0
  156. devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
  157. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
  158. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
  159. devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
  160. devflow_engine/prompts/iterate/README.md +7 -0
  161. devflow_engine/prompts/iterate/coder/prompt.md +11 -0
  162. devflow_engine/prompts/iterate/framer/prompt.md +11 -0
  163. devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
  164. devflow_engine/prompts/iterate/observer/prompt.md +11 -0
  165. devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
  166. devflow_engine/prompts/recovery/execution/prompt.md +8 -0
  167. devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
  168. devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
  169. devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
  170. devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
  171. devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
  172. devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
  173. devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
  174. devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
  175. devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
  176. devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
  177. devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
  178. devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
  179. devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
  180. devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
  181. devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
  182. devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
  183. devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
  184. devflow_engine/recovery/__init__.py +3 -0
  185. devflow_engine/recovery/dag.py +2609 -0
  186. devflow_engine/recovery/models.py +220 -0
  187. devflow_engine/refactor.py +93 -0
  188. devflow_engine/registry/__init__.py +1 -0
  189. devflow_engine/registry/cards.py +238 -0
  190. devflow_engine/registry/domain_normalize.py +60 -0
  191. devflow_engine/registry/effects.py +65 -0
  192. devflow_engine/registry/enforce_report.py +150 -0
  193. devflow_engine/registry/module_cards_classify.py +164 -0
  194. devflow_engine/registry/module_cards_draft.py +184 -0
  195. devflow_engine/registry/module_cards_gate.py +59 -0
  196. devflow_engine/registry/packages.py +347 -0
  197. devflow_engine/registry/pathways.py +323 -0
  198. devflow_engine/review/__init__.py +11 -0
  199. devflow_engine/review/dag.py +588 -0
  200. devflow_engine/review/review_story.py +67 -0
  201. devflow_engine/scope_idea/__init__.py +3 -0
  202. devflow_engine/scope_idea/agentic.py +39 -0
  203. devflow_engine/scope_idea/dag.py +1069 -0
  204. devflow_engine/scope_idea/models.py +175 -0
  205. devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
  206. devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
  207. devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
  208. devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
  209. devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
  210. devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
  211. devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
  212. devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
  213. devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
  214. devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
  215. devflow_engine/skills/registry.example.yaml +42 -0
  216. devflow_engine/source_doc_assumptions.py +291 -0
  217. devflow_engine/source_doc_mutation_dag.py +1606 -0
  218. devflow_engine/source_doc_mutation_eval.py +417 -0
  219. devflow_engine/source_doc_mutation_worker.py +25 -0
  220. devflow_engine/source_docs_schema.py +207 -0
  221. devflow_engine/source_docs_updater.py +309 -0
  222. devflow_engine/source_scope/__init__.py +15 -0
  223. devflow_engine/source_scope/agentic.py +45 -0
  224. devflow_engine/source_scope/dag.py +1626 -0
  225. devflow_engine/source_scope/models.py +177 -0
  226. devflow_engine/stores/__init__.py +0 -0
  227. devflow_engine/stores/execution_store.py +3534 -0
  228. devflow_engine/story/__init__.py +0 -0
  229. devflow_engine/story/contracts.py +160 -0
  230. devflow_engine/story/discovery.py +47 -0
  231. devflow_engine/story/evidence.py +118 -0
  232. devflow_engine/story/hashing.py +27 -0
  233. devflow_engine/story/implemented_queue_purge.py +148 -0
  234. devflow_engine/story/indexer.py +105 -0
  235. devflow_engine/story/io.py +20 -0
  236. devflow_engine/story/markdown_contracts.py +298 -0
  237. devflow_engine/story/reconciliation.py +408 -0
  238. devflow_engine/story/validate_stories.py +149 -0
  239. devflow_engine/story/validate_tests_story.py +512 -0
  240. devflow_engine/story/validation.py +133 -0
  241. devflow_engine/ui_grounding/__init__.py +11 -0
  242. devflow_engine/ui_grounding/agentic.py +31 -0
  243. devflow_engine/ui_grounding/dag.py +874 -0
  244. devflow_engine/ui_grounding/models.py +224 -0
  245. devflow_engine/ui_grounding/pencil_bridge.py +247 -0
  246. devflow_engine/vendor/__init__.py +0 -0
  247. devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
  248. devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
  249. devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
  250. devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
  251. devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
  252. devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
  253. devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
  254. devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
  255. devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
  256. devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
  257. devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
  258. devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
  259. devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
  260. devflow_engine/worker.py +1086 -0
  261. devflow_engine/worker_guard.py +233 -0
  262. devflow_engine-1.0.0.dist-info/METADATA +235 -0
  263. devflow_engine-1.0.0.dist-info/RECORD +393 -0
  264. devflow_engine-1.0.0.dist-info/WHEEL +4 -0
  265. devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
  266. devin/__init__.py +6 -0
  267. devin/dag.py +58 -0
  268. devin/dag_two_arm.py +138 -0
  269. devin/devin_chat_scenario_catalog.json +588 -0
  270. devin/devin_eval.py +677 -0
  271. devin/nodes/__init__.py +0 -0
  272. devin/nodes/ideation/__init__.py +0 -0
  273. devin/nodes/ideation/node.py +195 -0
  274. devin/nodes/ideation/playground.py +267 -0
  275. devin/nodes/ideation/prompt.md +65 -0
  276. devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
  277. devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
  278. devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
  279. devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
  280. devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
  281. devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
  282. devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
  283. devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
  284. devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
  285. devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
  286. devin/nodes/ideation/scenarios/vague_idea.py +16 -0
  287. devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
  288. devin/nodes/ideation/tools.json +312 -0
  289. devin/nodes/insight/__init__.py +0 -0
  290. devin/nodes/insight/node.py +49 -0
  291. devin/nodes/insight/playground.py +154 -0
  292. devin/nodes/insight/prompt.md +61 -0
  293. devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
  294. devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
  295. devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
  296. devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
  297. devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
  298. devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
  299. devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
  300. devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
  301. devin/nodes/insight/scenarios/operational_debugging.py +15 -0
  302. devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
  303. devin/nodes/insight/scenarios/operational_question.py +9 -0
  304. devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
  305. devin/nodes/insight/scenarios/queue_status.py +15 -0
  306. devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
  307. devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
  308. devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
  309. devin/nodes/insight/scenarios/worker_state_check.py +15 -0
  310. devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
  311. devin/nodes/insight/tools.json +126 -0
  312. devin/nodes/intake/__init__.py +0 -0
  313. devin/nodes/intake/node.py +27 -0
  314. devin/nodes/intake/playground.py +47 -0
  315. devin/nodes/intake/prompt.md +12 -0
  316. devin/nodes/intake/scenarios/ideation_routing.py +4 -0
  317. devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
  318. devin/nodes/intake/scenarios/insight_routing.py +4 -0
  319. devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
  320. devin/nodes/iterate/README.md +44 -0
  321. devin/nodes/iterate/__init__.py +1 -0
  322. devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
  323. devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
  324. devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
  325. devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
  326. devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
  327. devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
  328. devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
  329. devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
  330. devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
  331. devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
  332. devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
  333. devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
  334. devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
  335. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
  336. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
  337. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
  338. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
  339. devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
  340. devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
  341. devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
  342. devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
  343. devin/nodes/iterate/agent-roles.md +89 -0
  344. devin/nodes/iterate/agents/README.md +10 -0
  345. devin/nodes/iterate/artifacts.md +504 -0
  346. devin/nodes/iterate/contract.md +100 -0
  347. devin/nodes/iterate/eval-plan.md +74 -0
  348. devin/nodes/iterate/node.py +100 -0
  349. devin/nodes/iterate/pipeline/README.md +13 -0
  350. devin/nodes/iterate/playground-contract.md +76 -0
  351. devin/nodes/iterate/prompt.md +11 -0
  352. devin/nodes/iterate/scenarios/README.md +38 -0
  353. devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
  354. devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
  355. devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
  356. devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
  357. devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
  358. devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
  359. devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
  360. devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
  361. devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
  362. devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
  363. devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
  364. devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
  365. devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
  366. devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
  367. devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
  368. devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
  369. devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
  370. devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
  371. devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
  372. devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
  373. devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
  374. devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
  375. devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
  376. devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
  377. devin/nodes/shared/__init__.py +0 -0
  378. devin/nodes/shared/filemaker_expert.md +80 -0
  379. devin/nodes/shared/filemaker_expert.py +354 -0
  380. devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
  381. devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
  382. devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
  383. devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
  384. devin/nodes/shared/helpers.py +156 -0
  385. devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
  386. devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
  387. devin/nodes/shared/models.py +44 -0
  388. devin/nodes/shared/post.py +40 -0
  389. devin/nodes/shared/router.py +107 -0
  390. devin/nodes/shared/tools.py +191 -0
  391. devin/shared/devin-chat-rubric.md +237 -0
  392. devin/shared/devin-chat-scenario-suite.md +90 -0
  393. devin/shared/eval_doctrine.md +9 -0
@@ -0,0 +1,61 @@
1
+ # Devin Insight Agent
2
+
3
+ You are Devin a senior software engineer working on Devflow and fulfill the role of the **System Investigator** — the read-only arm of the pipeline. You answer questions about the codebase, inspect devflow system state, and explain how things work.
4
+
5
+ ## Your role
6
+
7
+ You do not shape ideas or move work forward. You observe, inspect, and explain. When asked about queues, workers, code, or architecture, you use DevFlow tools to get real data and return grounded answers.
8
+
9
+ ## Repository access
10
+
11
+ When asked about the codebase, project, or anything requiring file-level knowledge:
12
+ 1. Call `devflow_read_project_config` with the `project_id` from the context to get the repo root path
13
+ 2. Read files from the repo directly — the repo root is the canonical location for all project source and documentation
14
+ 3. Project documentation lives under `ai_docs/` in the repo root (source docs in `ai_docs/context/source_docs/`, derived docs in `ai_docs/context/project_docs/`)
15
+
16
+ ## FileMaker database questions
17
+
18
+ When the user asks about FileMaker database structure, layouts, scripts, entities, or workflows:
19
+
20
+ 1. Call `filemaker_expert(question, repo_root, database_name?)` — this spawns a read-only PI subprocess (the **Advisor** in Pi-Pi pattern) that reads pre-existing DDR analysis artifacts and returns a grounded report
21
+ 2. Pass `repo_root` as `context.repo_root` from the context payload; pass `database_name` if the user specifies a specific FileMaker file
22
+ 3. The expert does NOT run ddr-docs — it only reads artifacts that already exist in `ai_docs/context/source_docs/ddr/<database>/`
23
+ 4. Synthesize the expert's response into your answer — do not just hand it raw to the user
24
+
25
+ ## Turn behavior
26
+
27
+ - `current_user_message` is authoritative — answer what is asked, not what was previously top-of-mind
28
+ - Use devflow tools to inspect actual state — do not speculate about what you think is happening
29
+ - If you cannot determine the answer from available data, say so directly — do not invent a plausible-sounding explanation
30
+ - Give specific, grounded answers: cite file paths, queue names, worker states, error messages
31
+ - Answer in proportion to the question: a short question gets a concise answer; a complex one gets a thorough one
32
+
33
+ ## Real-time feedback (emit tools)
34
+
35
+ The following tools write real-time UI feedback to the session. Use them around devflow inspection calls:
36
+
37
+ - `Emit_Start_Working(activity?, session_id)` — call before starting a devflow call to show activity
38
+ - `Emit_Stop_Working(activity?, session_id)` — call after the call completes
39
+ - `Emit_Response(message, emit_type, session_id)` — mid-turn progress or info messages
40
+
41
+ The `session_id` is provided in the context payload as `context.session_id`. Use it exactly as provided.
42
+
43
+ ## Response contract
44
+
45
+ Return a JSON object with these exact keys:
46
+
47
+ ```json
48
+ {
49
+ "response_message": "...", // concise, grounded reply
50
+ "response_kind": "...", // insight_response | operational_alert | needs_context
51
+ "suggested_next_step": "...", // what to do next if applicable
52
+ "follow_up_questions": [], // at most ONE if something is genuinely unclear
53
+ "style_notes": []
54
+ }
55
+ ```
56
+
57
+ ## Response kind rules
58
+
59
+ - `insight_response` — answered the question with grounded information
60
+ - `operational_alert` — something needs immediate attention (e.g., worker stuck, queue blocked)
61
+ - `needs_context` — question cannot be answered without more context; ask one targeted question
@@ -0,0 +1,15 @@
1
+ SCENARIO_NAME = 'architecture_pattern_query'
2
+ SCENARIO_DESCRIPTION = 'User asks how the system handles a specific architectural pattern (e.g., retry, backoff, caching).'
3
+ INPUT_PAYLOAD = {
4
+ 'current_user_message': 'How does the worker system handle retry and backoff when a job fails?',
5
+ 'idea_id': 'proj_75f63d30_insight_arch_001',
6
+ 'project_id': 'proj_75f63d30',
7
+ 'repo_root': '/Users/devflow/repos/Spicy-Server',
8
+ }
9
+ EXPECTED_BEHAVIOR = {
10
+ 'response_kind': 'insight_response',
11
+ 'explains_architecture': True,
12
+ 'references_specific_implementation': True,
13
+ 'uses_devflow_tools': True,
14
+ 'no_speculation': True,
15
+ }
@@ -0,0 +1,25 @@
1
+ EVAL_CRITERIA = {
2
+ 'response_kind_must_be': 'insight_response',
3
+ 'explains_architecture': True,
4
+ 'references_specific_implementation': True,
5
+ 'uses_devflow_tools': True,
6
+ 'no_speculation': True,
7
+ }
8
+
9
+
10
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
11
+ ok = True
12
+ notes = []
13
+ msg = str(actual_output.get('response_message', '')).lower()
14
+
15
+ if actual_output.get('response_kind') != 'insight_response':
16
+ ok = False
17
+ notes.append(f"expected insight_response, got {actual_output.get('response_kind')}")
18
+
19
+ # Should explain the architecture, not generic "it retries"
20
+ if 'retry' in msg and not any(w in msg for w in ['backoff', 'attempt', 'max', 'retry', 'delay', 'exponential', 'circuit']):
21
+ pass # retry mentioned, but should have specifics
22
+ else:
23
+ notes.append('does not explain the retry/backoff pattern specifically')
24
+
25
+ return ok, notes
@@ -0,0 +1,15 @@
1
+ SCENARIO_NAME = 'codebase_exploration'
2
+ SCENARIO_DESCRIPTION = 'User asks how a specific part of the codebase works.'
3
+ INPUT_PAYLOAD = {
4
+ 'current_user_message': 'How does the auth middleware handle token validation?',
5
+ 'idea_id': 'proj_75f63d30_insight_codebase_001',
6
+ 'project_id': 'proj_75f63d30',
7
+ 'repo_root': '/Users/devflow/repos/Spicy-Server',
8
+ }
9
+ EXPECTED_BEHAVIOR = {
10
+ 'response_kind': 'insight_response',
11
+ 'references_specific_files': True,
12
+ 'explains_flow': True,
13
+ 'uses_devflow_tools': True,
14
+ 'no_speculation': True,
15
+ }
@@ -0,0 +1,23 @@
1
+ EVAL_CRITERIA = {
2
+ 'response_kind_must_be': 'insight_response',
3
+ 'references_specific_files': True,
4
+ 'explains_flow': True,
5
+ 'uses_devflow_tools': True,
6
+ 'no_speculation': True,
7
+ }
8
+
9
+
10
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
11
+ ok = True
12
+ notes = []
13
+ msg = str(actual_output.get('response_message', '')).lower()
14
+
15
+ if actual_output.get('response_kind') != 'insight_response':
16
+ ok = False
17
+ notes.append(f"expected insight_response, got {actual_output.get('response_kind')}")
18
+
19
+ # Should reference specific file paths or code elements
20
+ if not any(w in msg for w in ['file', 'middleware', 'auth', 'src/', 'handler', 'function']):
21
+ notes.append('does not reference specific codebase elements')
22
+
23
+ return ok, notes
@@ -0,0 +1,19 @@
1
+ SCENARIO_NAME = "devin_ideation_routing"
2
+ SCENARIO_DESCRIPTION = (
3
+ "Devin two-arm DAG: forward-looking feature request routes to ideation arm "
4
+ "and the agent produces an ideation-contract response."
5
+ )
6
+ INPUT_PAYLOAD = {
7
+ "current_user_message": (
8
+ "Build a client portal that lets staff triage support requests and track approvals."
9
+ ),
10
+ "idea_id": "devin_eval_ideation_routing",
11
+ "project_id": "proj_75f63d30",
12
+ "repo_root": "/Users/devflow/repos/Spicy-Server",
13
+ }
14
+ EXPECTED_BEHAVIOR = {
15
+ "route_arm": "ideation",
16
+ "response_kind_in": ["ideation_contract_response", "ready_for_downstream"],
17
+ "response_message_nonempty": True,
18
+ "no_speculation": True,
19
+ }
@@ -0,0 +1,39 @@
1
+ from devin.nodes.insight.scenarios.devin_ideation_routing import (
2
+ EXPECTED_BEHAVIOR,
3
+ INPUT_PAYLOAD,
4
+ SCENARIO_NAME,
5
+ )
6
+
7
+ EVAL_CRITERIA = {
8
+ "route_arm_must_equal": EXPECTED_BEHAVIOR["route_arm"],
9
+ "response_kind_in": EXPECTED_BEHAVIOR["response_kind_in"],
10
+ "response_message_nonempty": True,
11
+ "no_speculation": True,
12
+ }
13
+
14
+
15
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
16
+ ok = True
17
+ notes = []
18
+ exp = EXPECTED_BEHAVIOR
19
+
20
+ route_arm = actual_output.get("route_arm")
21
+ if route_arm != exp["route_arm"]:
22
+ ok = False
23
+ notes.append(f"expected route_arm={exp['route_arm']}, got {route_arm}")
24
+
25
+ msg = str(actual_output.get("response_message") or "").strip()
26
+ if not msg:
27
+ ok = False
28
+ notes.append("response_message is empty")
29
+ elif exp.get("no_speculation") and any(
30
+ tok in msg.lower() for tok in ("i think", "likely", "probably", "might be")
31
+ ):
32
+ notes.append("response contains speculative language")
33
+
34
+ kind = actual_output.get("response_kind", "")
35
+ if kind not in exp["response_kind_in"]:
36
+ ok = False
37
+ notes.append(f"expected response_kind in {exp['response_kind_in']}, got {kind}")
38
+
39
+ return ok, notes
@@ -0,0 +1,20 @@
1
+ SCENARIO_NAME = "devin_insight_routing"
2
+ SCENARIO_DESCRIPTION = (
3
+ "Devin two-arm DAG: operational/project question routes to insight arm "
4
+ "and the agent produces a grounded insight response."
5
+ )
6
+ INPUT_PAYLOAD = {
7
+ "current_user_message": (
8
+ "What stories are currently queued for this project?"
9
+ ),
10
+ "idea_id": "devin_eval_insight_routing",
11
+ "project_id": "proj_75f63d30",
12
+ "repo_root": "/Users/devflow/repos/Spicy-Server",
13
+ }
14
+ EXPECTED_BEHAVIOR = {
15
+ "route_arm": "insight",
16
+ "response_kind_in": ["insight_response", "redirect"],
17
+ "response_message_nonempty": True,
18
+ "references_queue_counts": True,
19
+ "no_speculation": True,
20
+ }
@@ -0,0 +1,40 @@
1
+ from devin.nodes.insight.scenarios.devin_insight_routing import (
2
+ EXPECTED_BEHAVIOR,
3
+ INPUT_PAYLOAD,
4
+ SCENARIO_NAME,
5
+ )
6
+
7
+ EVAL_CRITERIA = {
8
+ "route_arm_must_equal": EXPECTED_BEHAVIOR["route_arm"],
9
+ "response_kind_in": EXPECTED_BEHAVIOR["response_kind_in"],
10
+ "response_message_nonempty": True,
11
+ "references_queue_counts": True,
12
+ "no_speculation": True,
13
+ }
14
+
15
+
16
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
17
+ ok = True
18
+ notes = []
19
+ exp = EXPECTED_BEHAVIOR
20
+
21
+ route_arm = actual_output.get("route_arm")
22
+ if route_arm != exp["route_arm"]:
23
+ ok = False
24
+ notes.append(f"expected route_arm={exp['route_arm']}, got {route_arm}")
25
+
26
+ msg = str(actual_output.get("response_message") or "").strip()
27
+ if not msg:
28
+ ok = False
29
+ notes.append("response_message is empty")
30
+ elif exp.get("no_speculation") and any(
31
+ tok in msg.lower() for tok in ("I think", "likely", "probably", "might be")
32
+ ):
33
+ notes.append("response contains speculative language")
34
+
35
+ kind = actual_output.get("response_kind", "")
36
+ if kind not in exp["response_kind_in"]:
37
+ ok = False
38
+ notes.append(f"expected response_kind in {exp['response_kind_in']}, got {kind}")
39
+
40
+ return ok, notes
@@ -0,0 +1,15 @@
1
+ SCENARIO_NAME = 'operational_debugging'
2
+ SCENARIO_DESCRIPTION = 'User asks why something happened or what went wrong with a recent operation.'
3
+ INPUT_PAYLOAD = {
4
+ 'current_user_message': 'Why did the last story fail? What went wrong?',
5
+ 'idea_id': 'proj_75f63d30_insight_debug_001',
6
+ 'project_id': 'proj_75f63d30',
7
+ 'repo_root': '/Users/devflow/repos/Spicy-Server',
8
+ }
9
+ EXPECTED_BEHAVIOR = {
10
+ 'response_kind': 'insight_response',
11
+ 'cites_specific_failure_reason': True,
12
+ 'uses_run_logs': True,
13
+ 'gives_actionable_guidance': True,
14
+ 'no_speculation': True,
15
+ }
@@ -0,0 +1,23 @@
1
+ EVAL_CRITERIA = {
2
+ 'response_kind_must_be': 'insight_response',
3
+ 'cites_specific_failure_reason': True,
4
+ 'uses_run_logs': True,
5
+ 'gives_actionable_guidance': True,
6
+ 'no_speculation': True,
7
+ }
8
+
9
+
10
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
11
+ ok = True
12
+ notes = []
13
+ msg = str(actual_output.get('response_message', '')).lower()
14
+
15
+ if actual_output.get('response_kind') not in ('insight_response', 'redirect', 'needs_context'):
16
+ ok = False
17
+ notes.append(f"expected insight_response/redirect/needs_context, got {actual_output.get('response_kind')}")
18
+
19
+ # Should cite a specific failure reason, not just "I don't know"
20
+ if 'don\'t know' in msg or 'unable to' in msg or 'cannot determine' in msg:
21
+ notes.append('response does not cite a specific failure reason')
22
+
23
+ return ok, notes
@@ -0,0 +1,9 @@
1
+ SCENARIO_NAME = 'operational_question'
2
+ SCENARIO_DESCRIPTION = 'Answers an operator-style runtime question without drifting into ideation.'
3
+ INPUT_PAYLOAD = {
4
+ 'current_user_message': 'Is the story worker currently active? What is it doing right now?',
5
+ 'idea_id': 'proj_75f63d30_insight_op_question_001',
6
+ 'project_id': 'proj_75f63d30',
7
+ 'repo_root': '/Users/devflow/repos/Spicy-Server',
8
+ }
9
+ EXPECTED_BEHAVIOR = {'response_kind': 'redirect', 'response_message_nonempty': True}
@@ -0,0 +1,8 @@
1
+ EVAL_CRITERIA = {'response_kind_must_equal': 'redirect'}
2
+
3
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
4
+ msg = str(actual_output.get('response_message') or '').strip()
5
+ kind = actual_output.get('response_kind')
6
+ # Accept insight_response when tools fail but the model still answers the question
7
+ ok = kind in ('redirect', 'insight_response') and bool(msg)
8
+ return ok, ([] if ok else [f'expected redirect or insight_response with message, got {kind}'])
@@ -0,0 +1,15 @@
1
+ SCENARIO_NAME = 'queue_status'
2
+ SCENARIO_DESCRIPTION = 'User asks for a high-level view of what is queued, in progress, and completed across the project.'
3
+ INPUT_PAYLOAD = {
4
+ 'current_user_message': 'What stories are queued and what is currently in progress?',
5
+ 'idea_id': 'proj_75f63d30_insight_queuestatus',
6
+ 'project_id': 'proj_75f63d30',
7
+ 'repo_root': '/Users/devflow/repos/Spicy-Server',
8
+ }
9
+ EXPECTED_BEHAVIOR = {
10
+ 'response_kind': 'insight_response',
11
+ 'references_queue_counts': True,
12
+ 'uses_devflow_tools': True,
13
+ 'no_speculation': True,
14
+ 'direct_answer': True,
15
+ }
@@ -0,0 +1,23 @@
1
+ EVAL_CRITERIA = {
2
+ 'response_kind_must_be': 'insight_response',
3
+ 'references_queue_counts': True,
4
+ 'uses_devflow_tools': True,
5
+ 'no_speculation': True,
6
+ 'direct_answer': True,
7
+ }
8
+
9
+
10
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
11
+ ok = True
12
+ notes = []
13
+ msg = str(actual_output.get('response_message', '')).lower()
14
+
15
+ if actual_output.get('response_kind') not in ('insight_response', 'redirect'):
16
+ ok = False
17
+ notes.append(f"expected insight_response or redirect, got {actual_output.get('response_kind')}")
18
+
19
+ # Should reference actual queue state, not just say "I don't know"
20
+ if not any(w in msg for w in ['queue', 'story', 'queued', 'in_progress', 'worker', 'running']):
21
+ notes.append('does not reference queue/story state')
22
+
23
+ return ok, notes
@@ -0,0 +1,14 @@
1
+ SCENARIO_NAME = 'source_doc_explanation'
2
+ SCENARIO_DESCRIPTION = 'User asks what the source documentation says about a specific system or subsystem.'
3
+ INPUT_PAYLOAD = {
4
+ 'current_user_message': 'What does the source documentation say about the client portal notification system?',
5
+ 'idea_id': 'proj_75f63d30_insight_srcdoc_001',
6
+ 'project_id': 'proj_75f63d30',
7
+ 'repo_root': '/Users/devflow/repos/Spicy-Server',
8
+ }
9
+ EXPECTED_BEHAVIOR = {
10
+ 'response_kind': 'insight_response',
11
+ 'references_source_docs': True,
12
+ 'no_speculation': True,
13
+ 'uses_devflow_tools': True,
14
+ }
@@ -0,0 +1,21 @@
1
+ EVAL_CRITERIA = {
2
+ 'response_kind_must_be': 'insight_response',
3
+ 'references_source_docs': True,
4
+ 'no_speculation': True,
5
+ 'uses_devflow_tools': True,
6
+ }
7
+
8
+
9
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
10
+ ok = True
11
+ notes = []
12
+ msg = str(actual_output.get('response_message', '')).lower()
13
+
14
+ if actual_output.get('response_kind') not in ('insight_response', 'redirect'):
15
+ ok = False
16
+ notes.append(f"expected insight_response or redirect, got {actual_output.get('response_kind')}")
17
+
18
+ if not any(w in msg for w in ['source', 'doc', 'portal', 'notification', 'system']):
19
+ notes.append('does not reference source docs')
20
+
21
+ return ok, notes
@@ -0,0 +1,15 @@
1
+ SCENARIO_NAME = 'worker_state_check'
2
+ SCENARIO_DESCRIPTION = 'User asks what is currently running on the worker side.'
3
+ INPUT_PAYLOAD = {
4
+ 'current_user_message': 'What are the workers doing right now?',
5
+ 'idea_id': 'proj_75f63d30_insight_worker_001',
6
+ 'project_id': 'proj_75f63d30',
7
+ 'repo_root': '/Users/devflow/repos/Spicy-Server',
8
+ }
9
+ EXPECTED_BEHAVIOR = {
10
+ 'response_kind': 'insight_response',
11
+ 'references_active_workers': True,
12
+ 'uses_devflow_tools': True,
13
+ 'no_speculation': True,
14
+ 'direct_answer': True,
15
+ }
@@ -0,0 +1,22 @@
1
+ EVAL_CRITERIA = {
2
+ 'response_kind_must_be': 'insight_response',
3
+ 'references_active_workers': True,
4
+ 'uses_devflow_tools': True,
5
+ 'no_speculation': True,
6
+ 'direct_answer': True,
7
+ }
8
+
9
+
10
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
11
+ ok = True
12
+ notes = []
13
+ msg = str(actual_output.get('response_message', '')).lower()
14
+
15
+ if actual_output.get('response_kind') != 'insight_response':
16
+ ok = False
17
+ notes.append(f"expected insight_response, got {actual_output.get('response_kind')}")
18
+
19
+ if not any(w in msg for w in ['worker', 'running', 'idle', 'active', 'process', 'pid']):
20
+ notes.append('does not reference worker state')
21
+
22
+ return ok, notes
@@ -0,0 +1,126 @@
1
+ {
2
+ "tools": [
3
+ {
4
+ "name": "devflow_read_project_config",
5
+ "description": "Read the DevFlow project configuration for a given project_id. Returns project metadata: name, id, repo location (local path and upstream URL), start commands, preview URL, and production URL. Args: project_id (string, required)",
6
+ "parameters": {
7
+ "type": "object",
8
+ "properties": {
9
+ "project_id": {
10
+ "type": "string",
11
+ "description": "The DevFlow project ID (e.g. proj_75f63d30)"
12
+ }
13
+ },
14
+ "required": ["project_id"]
15
+ }
16
+ },
17
+ {
18
+ "name": "devflow_read_queue_summary",
19
+ "description": "Read a summary of all DevFlow queues for a project. Returns queue counts by status for: idea, story, implementation, integration, recovery, and error queues. Args: project_id (string, required)",
20
+ "parameters": {
21
+ "type": "object",
22
+ "properties": {
23
+ "project_id": {
24
+ "type": "string",
25
+ "description": "The DevFlow project ID"
26
+ }
27
+ },
28
+ "required": ["project_id"]
29
+ }
30
+ },
31
+ {
32
+ "name": "devflow_read_worker_state",
33
+ "description": "Read the current DevFlow worker state for a project. Returns active workers and what they are working on, plus a summary of current or last active run logs. If the last run was a recovery, includes recovery and failed run logs. Args: project_id (string, required)",
34
+ "parameters": {
35
+ "type": "object",
36
+ "properties": {
37
+ "project_id": {
38
+ "type": "string",
39
+ "description": "The DevFlow project ID"
40
+ }
41
+ },
42
+ "required": ["project_id"]
43
+ }
44
+ },
45
+ {
46
+ "name": "emit_start_working",
47
+ "description": "Start real-time working feedback for the user. Registers a single row on start and updates it every 2 seconds with rotating verbs until emit_stop_working is called. Multiple sessions can be active simultaneously. Args: activity (string, optional — initial verb; auto-selected if omitted), session_id (string, required — use the session_id from context.session_id exactly as provided)",
48
+ "parameters": {
49
+ "type": "object",
50
+ "properties": {
51
+ "activity": {
52
+ "type": "string",
53
+ "description": "Initial activity verb (e.g. running, reviewing, shaping). Auto-selected from 50 verbs if omitted. Available verbs: running, reviewing, shaping, crafting, exploring, analyzing, mapping, building, checking, preparing, processing, designing, configuring, organizing, sequencing, implementing, validating, verifying, tracing, scoping, estimating, integrating, testing, deploying, monitoring, optimizing, documenting, routing, forwarding, translating, encoding, decoding, partitioning, distributing, collecting, filtering, transforming, loading, saving, retrieving, computing, calculating, scheduling, coordinating, synthesizing, extracting, compiling, resolving, confirming, assembling, orchestrating"
54
+ },
55
+ "session_id": {
56
+ "type": "string",
57
+ "description": "Session ID for the feedback stream — use the value from context.session_id"
58
+ }
59
+ },
60
+ "required": ["session_id"]
61
+ }
62
+ },
63
+ {
64
+ "name": "emit_stop_working",
65
+ "description": "Stop the real-time working feedback for a session. Updates the existing start_working row with the stop signal and clears the rotating verb timer. Args: activity (string, optional — the completed activity name), session_id (string, required — use the session_id from context.session_id exactly as provided)",
66
+ "parameters": {
67
+ "type": "object",
68
+ "properties": {
69
+ "activity": {
70
+ "type": "string",
71
+ "description": "The activity that completed (for display in the stop message)"
72
+ },
73
+ "session_id": {
74
+ "type": "string",
75
+ "description": "Session ID for the feedback stream to stop — use the value from context.session_id"
76
+ }
77
+ },
78
+ "required": ["session_id"]
79
+ }
80
+ },
81
+ {
82
+ "name": "emit_response",
83
+ "description": "Emit a one-shot feedback message to the agent_devin_messages table. Use for mid-turn progress updates, info messages, or status changes that are not part of the start/stop rotation. Args: message (string, required), emit_type (string, required — one of: start_working, stop_working, progress, info, conclude_node), session_id (string, required — use the session_id from context.session_id exactly as provided)",
84
+ "parameters": {
85
+ "type": "object",
86
+ "properties": {
87
+ "message": {
88
+ "type": "string",
89
+ "description": "The feedback message to display to the user"
90
+ },
91
+ "emit_type": {
92
+ "type": "string",
93
+ "description": "Type of emit: start_working, stop_working, progress, info, conclude_node"
94
+ },
95
+ "session_id": {
96
+ "type": "string",
97
+ "description": "Session ID — use the value from context.session_id"
98
+ }
99
+ },
100
+ "required": ["message", "emit_type", "session_id"]
101
+ }
102
+ },
103
+ {
104
+ "name": "filemaker_expert",
105
+ "description": "FileMaker database expert — read-only advisor. Reads pre-existing DDR analysis artifacts and answers questions about database structure, layouts, scripts, entities, user flows, and feature clusters. Does NOT run ddr-docs — only reads existing analysis output. Args: question (string, required), repo_root (string, required), database_name (string, optional)",
106
+ "parameters": {
107
+ "type": "object",
108
+ "properties": {
109
+ "question": {
110
+ "type": "string",
111
+ "description": "The specific question or task about the FileMaker database (e.g. 'what layouts exist in Clean Sweep?', 'summarize the user flow for creating a new invoice', 'what scripts are associated with the inventory layout?')"
112
+ },
113
+ "repo_root": {
114
+ "type": "string",
115
+ "description": "The project repository root path (e.g. /Users/devflow/repos/cleaner)"
116
+ },
117
+ "database_name": {
118
+ "type": "string",
119
+ "description": "Optional specific database to query (e.g. 'Clean Sweep_fmp12')"
120
+ }
121
+ },
122
+ "required": ["question", "repo_root"]
123
+ }
124
+ }
125
+ ]
126
+ }
File without changes
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from devflow_engine.vendor.datalumina_genai.core.nodes.base import Node
6
+ from devflow_engine.vendor.datalumina_genai.core.task import TaskContext
7
+ from devflow_engine.idea.sufficiency import load_idea_source
8
+ from devin.nodes.shared.router import llm_route
9
+ from devin.nodes.shared.helpers import dfs_node_running, load_node_prompt_lines, pipeline_root, resolve_project_id, store_run, write_json
10
+
11
+ class DevinIntakeNode(Node):
12
+ async def process(self, task_context: TaskContext) -> TaskContext:
13
+ event = task_context.event
14
+ repo_root = Path(event.repo_root)
15
+ store, run_id = store_run()
16
+ node_exec_id = store.create_node_attempt(run_id=run_id, node_id='devin_intake', node_name='DevinIntake', attempt=1)
17
+ project_id = resolve_project_id(repo_root, idea_id=event.idea_id)
18
+ dfs_node_running(project_id=project_id, run_id=run_id, node_id='devin_intake', summary='Running Devin intake', idea_id=event.idea_id)
19
+ raw_text, input_meta = load_idea_source(text=event.raw_text, source_path=Path(event.source_path) if event.source_path else None)
20
+ route_arm, route_payload = llm_route(raw_text=raw_text, repo_root=repo_root, project_id=project_id)
21
+ payload = {'project_id': project_id, 'raw_text': raw_text, 'input_meta': input_meta, 'route': route_payload, 'route_arm': route_arm, 'prompt_lines': load_node_prompt_lines(__file__)}
22
+ out_path = pipeline_root(repo_root, idea_id=event.idea_id, pipeline_key=event.pipeline_key) / 'devin_intake.json'
23
+ write_json(out_path, payload)
24
+ store.add_artifact(run_id=run_id, node_exec_id=node_exec_id, kind='devin_intake', uri=str(out_path), metadata={'route_arm': route_arm, 'project_id': project_id})
25
+ store.mark_node_finished(node_exec_id=node_exec_id, status='succeeded', output=payload)
26
+ task_context.metadata.update(payload)
27
+ return task_context
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib.util
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from ..shared.helpers import classify_route, load_node_prompt_text
9
+ from ..shared.models import ScenarioResult
10
+
11
+ SCENARIO_DIR = Path(__file__).with_name('scenarios')
12
+
13
+
14
+ def _load_module(path: Path):
15
+ spec = importlib.util.spec_from_file_location(path.stem, path)
16
+ module = importlib.util.module_from_spec(spec)
17
+ assert spec and spec.loader
18
+ spec.loader.exec_module(module)
19
+ return module
20
+
21
+
22
+ def _collect_scenarios() -> list[tuple[Any, Any]]:
23
+ pairs = []
24
+ for scenario_path in sorted(SCENARIO_DIR.glob('*.py')):
25
+ if scenario_path.name.endswith('_evals.py'):
26
+ continue
27
+ eval_path = scenario_path.with_name(f'{scenario_path.stem}_evals.py')
28
+ pairs.append((_load_module(scenario_path), _load_module(eval_path)))
29
+ return pairs
30
+
31
+
32
+ def run_scenario(payload: dict[str, Any]) -> dict[str, Any]:
33
+ route_arm, route_payload = classify_route(str(payload.get('raw_text') or ''))
34
+ return {'route_arm': route_arm, 'reason': route_payload.get('reason'), 'prompt_loaded': bool(load_node_prompt_text(__file__))}
35
+
36
+
37
+ def run_all() -> list[ScenarioResult]:
38
+ results: list[ScenarioResult] = []
39
+ for scenario_module, eval_module in _collect_scenarios():
40
+ actual_output = run_scenario(dict(scenario_module.INPUT_PAYLOAD))
41
+ passed, notes = eval_module.evaluate(actual_output)
42
+ results.append(ScenarioResult(scenario_name=scenario_module.SCENARIO_NAME, passed=passed, actual_output=actual_output, expected_behavior=dict(scenario_module.EXPECTED_BEHAVIOR), notes=notes))
43
+ return results
44
+
45
+
46
+ if __name__ == '__main__':
47
+ print(json.dumps([item.model_dump() for item in run_all()], indent=2, sort_keys=True))