devflow-engine 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. devflow_engine/__init__.py +3 -0
  2. devflow_engine/agentic_prompts.py +100 -0
  3. devflow_engine/agentic_runtime.py +398 -0
  4. devflow_engine/api_key_flow_harness.py +539 -0
  5. devflow_engine/api_keys.py +357 -0
  6. devflow_engine/bootstrap/__init__.py +2 -0
  7. devflow_engine/bootstrap/provision_from_template.py +84 -0
  8. devflow_engine/cli/__init__.py +0 -0
  9. devflow_engine/cli/app.py +7270 -0
  10. devflow_engine/core/__init__.py +0 -0
  11. devflow_engine/core/config.py +86 -0
  12. devflow_engine/core/logging.py +29 -0
  13. devflow_engine/core/paths.py +45 -0
  14. devflow_engine/core/toml_kv.py +33 -0
  15. devflow_engine/devflow_event_worker.py +1292 -0
  16. devflow_engine/devflow_state.py +201 -0
  17. devflow_engine/devin2/__init__.py +9 -0
  18. devflow_engine/devin2/agent_definition.py +120 -0
  19. devflow_engine/devin2/pi_runner.py +204 -0
  20. devflow_engine/devin_orchestration.py +69 -0
  21. devflow_engine/docs/prompts/anti-patterns.md +42 -0
  22. devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
  23. devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
  24. devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
  25. devflow_engine/doctor/__init__.py +2 -0
  26. devflow_engine/doctor/triage.py +140 -0
  27. devflow_engine/error/__init__.py +0 -0
  28. devflow_engine/error/remediation.py +21 -0
  29. devflow_engine/errors/error_solver_dag.py +522 -0
  30. devflow_engine/errors/runtime_observability.py +67 -0
  31. devflow_engine/idea/__init__.py +4 -0
  32. devflow_engine/idea/actors.py +481 -0
  33. devflow_engine/idea/agentic.py +465 -0
  34. devflow_engine/idea/analyze.py +93 -0
  35. devflow_engine/idea/devin_chat_dag.py +1 -0
  36. devflow_engine/idea/diff.py +99 -0
  37. devflow_engine/idea/drafts.py +446 -0
  38. devflow_engine/idea/idea_creation_dag.py +643 -0
  39. devflow_engine/idea/ideation_enrichment.py +355 -0
  40. devflow_engine/idea/ideation_enrichment_worker.py +19 -0
  41. devflow_engine/idea/paths.py +28 -0
  42. devflow_engine/idea/promote.py +53 -0
  43. devflow_engine/idea/redaction.py +27 -0
  44. devflow_engine/idea/repo_tools.py +1277 -0
  45. devflow_engine/idea/response_mode.py +30 -0
  46. devflow_engine/idea/story_pipeline.py +1585 -0
  47. devflow_engine/idea/sufficiency.py +376 -0
  48. devflow_engine/idea/traditional_stories.py +1257 -0
  49. devflow_engine/implementation/__init__.py +0 -0
  50. devflow_engine/implementation/alembic_preflight.py +700 -0
  51. devflow_engine/implementation/dag.py +8450 -0
  52. devflow_engine/implementation/green_gate.py +93 -0
  53. devflow_engine/implementation/prompts.py +108 -0
  54. devflow_engine/implementation/test_runtime.py +623 -0
  55. devflow_engine/integration/__init__.py +19 -0
  56. devflow_engine/integration/agentic.py +66 -0
  57. devflow_engine/integration/dag.py +3539 -0
  58. devflow_engine/integration/prompts.py +114 -0
  59. devflow_engine/integration/supabase_schema.sql +31 -0
  60. devflow_engine/integration/supabase_sync.py +177 -0
  61. devflow_engine/llm/__init__.py +1 -0
  62. devflow_engine/llm/cli_one_shot.py +84 -0
  63. devflow_engine/llm/cli_stream.py +371 -0
  64. devflow_engine/llm/execution_context.py +26 -0
  65. devflow_engine/llm/invoke.py +1322 -0
  66. devflow_engine/llm/provider_api.py +304 -0
  67. devflow_engine/llm/repo_knowledge.py +588 -0
  68. devflow_engine/llm_primitives.py +315 -0
  69. devflow_engine/orchestration.py +62 -0
  70. devflow_engine/planning/__init__.py +0 -0
  71. devflow_engine/planning/analyze_repo.py +92 -0
  72. devflow_engine/planning/render_drafts.py +133 -0
  73. devflow_engine/playground/__init__.py +0 -0
  74. devflow_engine/playground/hooks.py +26 -0
  75. devflow_engine/playwright_workflow/__init__.py +5 -0
  76. devflow_engine/playwright_workflow/dag.py +1317 -0
  77. devflow_engine/process/__init__.py +5 -0
  78. devflow_engine/process/dag.py +59 -0
  79. devflow_engine/project_registration/__init__.py +3 -0
  80. devflow_engine/project_registration/dag.py +1581 -0
  81. devflow_engine/project_registry.py +109 -0
  82. devflow_engine/prompts/devin/generic/prompt.md +6 -0
  83. devflow_engine/prompts/devin/ideation/prompt.md +263 -0
  84. devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
  85. devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
  86. devflow_engine/prompts/devin/insight/prompt.md +11 -0
  87. devflow_engine/prompts/devin/insight/scenarios.md +5 -0
  88. devflow_engine/prompts/devin/intake/prompt.md +15 -0
  89. devflow_engine/prompts/devin/iterate/prompt.md +12 -0
  90. devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
  91. devflow_engine/prompts/devin/shared/principles.md +246 -0
  92. devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
  93. devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
  94. devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
  95. devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
  96. devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
  97. devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
  98. devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
  99. devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
  100. devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
  101. devflow_engine/prompts/implementation/red/prompt.md +27 -0
  102. devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
  103. devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
  104. devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
  105. devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
  106. devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
  107. devflow_engine/prompts/integration/README.md +185 -0
  108. devflow_engine/prompts/integration/green/example.md +67 -0
  109. devflow_engine/prompts/integration/green/green/prompt.md +10 -0
  110. devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
  111. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
  112. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
  113. devflow_engine/prompts/integration/green_enrich/example.md +79 -0
  114. devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
  115. devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
  116. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
  117. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  118. devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
  119. devflow_engine/prompts/integration/red/example.md +152 -0
  120. devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
  121. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  122. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
  123. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
  124. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
  125. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  126. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
  127. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
  128. devflow_engine/prompts/integration/red/red/prompt.md +11 -0
  129. devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
  130. devflow_engine/prompts/integration/red_review/example.md +71 -0
  131. devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
  132. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  133. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
  134. devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
  135. devflow_engine/prompts/integration/resolve/example.md +111 -0
  136. devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
  137. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
  138. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
  139. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
  140. devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
  141. devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
  142. devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
  143. devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
  144. devflow_engine/prompts/integration/validate/example.md +143 -0
  145. devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
  146. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  147. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
  148. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
  149. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
  150. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  151. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
  152. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
  153. devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
  154. devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
  155. devflow_engine/prompts/integration/write_workflows/example.md +100 -0
  156. devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
  157. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
  158. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
  159. devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
  160. devflow_engine/prompts/iterate/README.md +7 -0
  161. devflow_engine/prompts/iterate/coder/prompt.md +11 -0
  162. devflow_engine/prompts/iterate/framer/prompt.md +11 -0
  163. devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
  164. devflow_engine/prompts/iterate/observer/prompt.md +11 -0
  165. devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
  166. devflow_engine/prompts/recovery/execution/prompt.md +8 -0
  167. devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
  168. devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
  169. devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
  170. devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
  171. devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
  172. devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
  173. devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
  174. devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
  175. devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
  176. devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
  177. devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
  178. devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
  179. devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
  180. devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
  181. devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
  182. devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
  183. devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
  184. devflow_engine/recovery/__init__.py +3 -0
  185. devflow_engine/recovery/dag.py +2609 -0
  186. devflow_engine/recovery/models.py +220 -0
  187. devflow_engine/refactor.py +93 -0
  188. devflow_engine/registry/__init__.py +1 -0
  189. devflow_engine/registry/cards.py +238 -0
  190. devflow_engine/registry/domain_normalize.py +60 -0
  191. devflow_engine/registry/effects.py +65 -0
  192. devflow_engine/registry/enforce_report.py +150 -0
  193. devflow_engine/registry/module_cards_classify.py +164 -0
  194. devflow_engine/registry/module_cards_draft.py +184 -0
  195. devflow_engine/registry/module_cards_gate.py +59 -0
  196. devflow_engine/registry/packages.py +347 -0
  197. devflow_engine/registry/pathways.py +323 -0
  198. devflow_engine/review/__init__.py +11 -0
  199. devflow_engine/review/dag.py +588 -0
  200. devflow_engine/review/review_story.py +67 -0
  201. devflow_engine/scope_idea/__init__.py +3 -0
  202. devflow_engine/scope_idea/agentic.py +39 -0
  203. devflow_engine/scope_idea/dag.py +1069 -0
  204. devflow_engine/scope_idea/models.py +175 -0
  205. devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
  206. devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
  207. devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
  208. devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
  209. devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
  210. devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
  211. devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
  212. devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
  213. devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
  214. devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
  215. devflow_engine/skills/registry.example.yaml +42 -0
  216. devflow_engine/source_doc_assumptions.py +291 -0
  217. devflow_engine/source_doc_mutation_dag.py +1606 -0
  218. devflow_engine/source_doc_mutation_eval.py +417 -0
  219. devflow_engine/source_doc_mutation_worker.py +25 -0
  220. devflow_engine/source_docs_schema.py +207 -0
  221. devflow_engine/source_docs_updater.py +309 -0
  222. devflow_engine/source_scope/__init__.py +15 -0
  223. devflow_engine/source_scope/agentic.py +45 -0
  224. devflow_engine/source_scope/dag.py +1626 -0
  225. devflow_engine/source_scope/models.py +177 -0
  226. devflow_engine/stores/__init__.py +0 -0
  227. devflow_engine/stores/execution_store.py +3534 -0
  228. devflow_engine/story/__init__.py +0 -0
  229. devflow_engine/story/contracts.py +160 -0
  230. devflow_engine/story/discovery.py +47 -0
  231. devflow_engine/story/evidence.py +118 -0
  232. devflow_engine/story/hashing.py +27 -0
  233. devflow_engine/story/implemented_queue_purge.py +148 -0
  234. devflow_engine/story/indexer.py +105 -0
  235. devflow_engine/story/io.py +20 -0
  236. devflow_engine/story/markdown_contracts.py +298 -0
  237. devflow_engine/story/reconciliation.py +408 -0
  238. devflow_engine/story/validate_stories.py +149 -0
  239. devflow_engine/story/validate_tests_story.py +512 -0
  240. devflow_engine/story/validation.py +133 -0
  241. devflow_engine/ui_grounding/__init__.py +11 -0
  242. devflow_engine/ui_grounding/agentic.py +31 -0
  243. devflow_engine/ui_grounding/dag.py +874 -0
  244. devflow_engine/ui_grounding/models.py +224 -0
  245. devflow_engine/ui_grounding/pencil_bridge.py +247 -0
  246. devflow_engine/vendor/__init__.py +0 -0
  247. devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
  248. devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
  249. devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
  250. devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
  251. devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
  252. devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
  253. devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
  254. devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
  255. devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
  256. devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
  257. devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
  258. devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
  259. devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
  260. devflow_engine/worker.py +1086 -0
  261. devflow_engine/worker_guard.py +233 -0
  262. devflow_engine-1.0.0.dist-info/METADATA +235 -0
  263. devflow_engine-1.0.0.dist-info/RECORD +393 -0
  264. devflow_engine-1.0.0.dist-info/WHEEL +4 -0
  265. devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
  266. devin/__init__.py +6 -0
  267. devin/dag.py +58 -0
  268. devin/dag_two_arm.py +138 -0
  269. devin/devin_chat_scenario_catalog.json +588 -0
  270. devin/devin_eval.py +677 -0
  271. devin/nodes/__init__.py +0 -0
  272. devin/nodes/ideation/__init__.py +0 -0
  273. devin/nodes/ideation/node.py +195 -0
  274. devin/nodes/ideation/playground.py +267 -0
  275. devin/nodes/ideation/prompt.md +65 -0
  276. devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
  277. devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
  278. devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
  279. devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
  280. devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
  281. devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
  282. devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
  283. devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
  284. devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
  285. devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
  286. devin/nodes/ideation/scenarios/vague_idea.py +16 -0
  287. devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
  288. devin/nodes/ideation/tools.json +312 -0
  289. devin/nodes/insight/__init__.py +0 -0
  290. devin/nodes/insight/node.py +49 -0
  291. devin/nodes/insight/playground.py +154 -0
  292. devin/nodes/insight/prompt.md +61 -0
  293. devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
  294. devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
  295. devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
  296. devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
  297. devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
  298. devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
  299. devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
  300. devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
  301. devin/nodes/insight/scenarios/operational_debugging.py +15 -0
  302. devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
  303. devin/nodes/insight/scenarios/operational_question.py +9 -0
  304. devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
  305. devin/nodes/insight/scenarios/queue_status.py +15 -0
  306. devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
  307. devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
  308. devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
  309. devin/nodes/insight/scenarios/worker_state_check.py +15 -0
  310. devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
  311. devin/nodes/insight/tools.json +126 -0
  312. devin/nodes/intake/__init__.py +0 -0
  313. devin/nodes/intake/node.py +27 -0
  314. devin/nodes/intake/playground.py +47 -0
  315. devin/nodes/intake/prompt.md +12 -0
  316. devin/nodes/intake/scenarios/ideation_routing.py +4 -0
  317. devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
  318. devin/nodes/intake/scenarios/insight_routing.py +4 -0
  319. devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
  320. devin/nodes/iterate/README.md +44 -0
  321. devin/nodes/iterate/__init__.py +1 -0
  322. devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
  323. devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
  324. devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
  325. devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
  326. devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
  327. devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
  328. devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
  329. devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
  330. devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
  331. devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
  332. devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
  333. devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
  334. devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
  335. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
  336. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
  337. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
  338. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
  339. devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
  340. devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
  341. devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
  342. devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
  343. devin/nodes/iterate/agent-roles.md +89 -0
  344. devin/nodes/iterate/agents/README.md +10 -0
  345. devin/nodes/iterate/artifacts.md +504 -0
  346. devin/nodes/iterate/contract.md +100 -0
  347. devin/nodes/iterate/eval-plan.md +74 -0
  348. devin/nodes/iterate/node.py +100 -0
  349. devin/nodes/iterate/pipeline/README.md +13 -0
  350. devin/nodes/iterate/playground-contract.md +76 -0
  351. devin/nodes/iterate/prompt.md +11 -0
  352. devin/nodes/iterate/scenarios/README.md +38 -0
  353. devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
  354. devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
  355. devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
  356. devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
  357. devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
  358. devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
  359. devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
  360. devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
  361. devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
  362. devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
  363. devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
  364. devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
  365. devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
  366. devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
  367. devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
  368. devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
  369. devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
  370. devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
  371. devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
  372. devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
  373. devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
  374. devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
  375. devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
  376. devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
  377. devin/nodes/shared/__init__.py +0 -0
  378. devin/nodes/shared/filemaker_expert.md +80 -0
  379. devin/nodes/shared/filemaker_expert.py +354 -0
  380. devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
  381. devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
  382. devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
  383. devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
  384. devin/nodes/shared/helpers.py +156 -0
  385. devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
  386. devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
  387. devin/nodes/shared/models.py +44 -0
  388. devin/nodes/shared/post.py +40 -0
  389. devin/nodes/shared/router.py +107 -0
  390. devin/nodes/shared/tools.py +191 -0
  391. devin/shared/devin-chat-rubric.md +237 -0
  392. devin/shared/devin-chat-scenario-suite.md +90 -0
  393. devin/shared/eval_doctrine.md +9 -0
@@ -0,0 +1,512 @@
1
+ #!/usr/bin/env python3
2
+ """Story-scoped deterministic validation for contract tests.
3
+
4
+ This is intentionally a *scoped* variant of scripts/validate_tests.py.
5
+
6
+ Constraints:
7
+ - Offline + deterministic
8
+ - Static analysis only (no pytest execution required)
9
+ - Does NOT modify or depend on the behavior of validate_tests.py
10
+
11
+ Exit codes:
12
+ 0 = OK
13
+ 2 = Validation failed (issues found)
14
+
15
+ What it validates (scoped to one story_id):
16
+ - Tests for this story must include coherent marker triple:
17
+ - @pytest.mark.story_id("...")
18
+ - @pytest.mark.story_uuid("...")
19
+ - @pytest.mark.plane("...")
20
+ - The story_id must exist in canonical story docs.
21
+ - story_uuid markers must match the canonical story uuid.
22
+ - Plane coverage must include each required plane declared in the story contract.
23
+ - Anti-patterns (best-effort heuristics) within tests for this story.
24
+
25
+ Note: It does NOT attempt repo-wide coverage; that remains the job of validate_tests.py.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import argparse
31
+ import ast
32
+ import json
33
+ import re
34
+ from dataclasses import asdict, dataclass
35
+ from pathlib import Path
36
+ from typing import Iterable
37
+
38
+ from devflow_engine.story.markdown_contracts import parse_story_contracts_from_markdown
39
+
40
+
41
+ MARK_STORY_ID_RE = re.compile(r"(?:@)?pytest\.mark\.story_id\(([^\)]+)\)")
42
+ MARK_STORY_UUID_RE = re.compile(r"(?:@)?pytest\.mark\.story_uuid\(([^\)]+)\)")
43
+ MARK_PLANE_RE = re.compile(r"(?:@)?pytest\.mark\.plane\(([^\)]+)\)")
44
+ COMMENT_STORY_ID_RE = re.compile(r"@story_id:\s*([^\s]+)")
45
+ COMMENT_STORY_UUID_RE = re.compile(r"@story_uuid:\s*([^\s]+)")
46
+ COMMENT_PLANE_RE = re.compile(r"@plane:\s*([^\s]+)")
47
+ PY_CONST_RE = re.compile(r'^([A-Z_][A-Z0-9_]*)\s*=\s*["\']([^"\']+)["\']\s*$')
48
+ DEF_TEST_RE = re.compile(r"^def\s+(test_[a-zA-Z0-9_]+)\s*\(")
49
+ JS_TEST_RE = re.compile(r"\b(it|test)\s*\(")
50
+
51
+ MOCK_ASSERT_RE = re.compile(
52
+ r"assert\s+.*\.(called|call_count|assert_called|assert_called_once|assert_called_with|mock_calls)\b"
53
+ )
54
+ MOCK_IMPORT_RE = re.compile(r"from\s+unittest\.mock\s+import\s+|import\s+unittest\.mock")
55
+ PATCH_RE = re.compile(r"\bpatch\(")
56
+ TEST_FILE_SUFFIXES = {".py", ".js", ".ts", ".jsx", ".tsx", ".mjs", ".cjs"}
57
+ FORBIDDEN_COMPLETE_DROP_RESET_PATTERNS: tuple[tuple[str, re.Pattern[str]], ...] = (
58
+ ("DROP SCHEMA", re.compile(r"\bDROP\s+SCHEMA\b", re.IGNORECASE)),
59
+ ("DROP DATABASE", re.compile(r"\bDROP\s+DATABASE\b", re.IGNORECASE)),
60
+ ("DROP OWNED BY", re.compile(r"\bDROP\s+OWNED\s+BY\b", re.IGNORECASE)),
61
+ ("DROP ALL TABLES", re.compile(r"\bDROP\s+TABLES?\b", re.IGNORECASE)),
62
+ (
63
+ "metadata.drop_all",
64
+ re.compile(r"\b(?:[A-Za-z_][A-Za-z0-9_]*\.)?metadata\.drop_all\s*\(", re.IGNORECASE),
65
+ ),
66
+ ("db.drop_all", re.compile(r"\bdb\.drop_all\s*\(", re.IGNORECASE)),
67
+ )
68
+ REWRITE_REQUIRED_CODES = {
69
+ "forbidden_complete_drop_reset_pattern",
70
+ "invalid_python_test_file",
71
+ }
72
+
73
+
74
+ @dataclass(frozen=True)
75
+ class Issue:
76
+ code: str
77
+ message: str
78
+ path: str
79
+ line: int | None = None
80
+ context: str | None = None
81
+
82
+
83
+ def _iter_story_files(user_stories_dir: Path) -> Iterable[Path]:
84
+ for p in sorted(user_stories_dir.rglob("*.md")):
85
+ if p.name.startswith("."):
86
+ continue
87
+ yield p
88
+
89
+
90
+ def _iter_story_json_files(repo_root: Path) -> Iterable[Path]:
91
+ ideas_root = repo_root / ".devflow" / "ideas"
92
+ if not ideas_root.exists():
93
+ return []
94
+ return sorted(ideas_root.glob("*/devflow_story_sets/*/story_*.json"))
95
+
96
+
97
+ def _load_story_from_json(repo_root: Path, story_id: str) -> dict[str, object] | None:
98
+ for p in _iter_story_json_files(repo_root):
99
+ try:
100
+ payload = json.loads(p.read_text("utf-8"))
101
+ except Exception:
102
+ continue
103
+ if str(payload.get("story_id") or "") != story_id:
104
+ continue
105
+ return {
106
+ "story_id": str(payload.get("story_id") or ""),
107
+ "story_uuid": str(payload.get("story_uuid") or ""),
108
+ "required_planes": [str(x) for x in (payload.get("required_planes") or [])],
109
+ "path": str(p),
110
+ "source_kind": "compiled_story_json",
111
+ }
112
+ return None
113
+
114
+
115
+ def _load_story_from_markdown(user_stories_dir: Path, story_id: str) -> dict[str, object] | None:
116
+ for p in _iter_story_files(user_stories_dir):
117
+ md = p.read_text("utf-8")
118
+ blocks = parse_story_contracts_from_markdown(md, source_path=str(p))
119
+ for b in blocks:
120
+ c = b.contract
121
+ if c.story_id == story_id:
122
+ return {
123
+ "story_id": c.story_id,
124
+ "story_uuid": c.story_uuid,
125
+ "required_planes": list(c.required_planes or []),
126
+ "path": str(p),
127
+ "source_kind": "canonical_markdown",
128
+ }
129
+ return None
130
+
131
+
132
+ def _load_story(repo_root: Path, user_stories_dir: Path, story_id: str) -> dict[str, object] | None:
133
+ story = _load_story_from_json(repo_root, story_id)
134
+ if story is not None:
135
+ return story
136
+ return _load_story_from_markdown(user_stories_dir, story_id)
137
+
138
+
139
+ def _resolve_marker_value(raw: str, constants: dict[str, str]) -> str | None:
140
+ token = raw.strip()
141
+ if token.startswith(('"', "'")) and token.endswith(('"', "'")) and len(token) >= 2:
142
+ return token[1:-1]
143
+ return constants.get(token)
144
+
145
+
146
+ def _iter_candidate_test_files(root: Path) -> Iterable[Path]:
147
+ if root.is_file():
148
+ if root.name.startswith(".") or root.suffix not in TEST_FILE_SUFFIXES:
149
+ return []
150
+ return [root]
151
+ if not root.exists() or not root.is_dir():
152
+ return []
153
+ files: list[Path] = []
154
+ for p in sorted(root.rglob("*")):
155
+ if p.name.startswith(".") or not p.is_file() or p.suffix not in TEST_FILE_SUFFIXES:
156
+ continue
157
+ files.append(p)
158
+ return files
159
+
160
+
161
+ def _resolve_test_files(*, repo_root: Path, tests_dir: Path, test_paths: list[str] | None) -> list[Path]:
162
+ if not test_paths:
163
+ return list(_iter_candidate_test_files(tests_dir))
164
+
165
+ seen: set[Path] = set()
166
+ resolved: list[Path] = []
167
+ for raw_path in test_paths:
168
+ candidate_root = Path(raw_path)
169
+ if not candidate_root.is_absolute():
170
+ candidate_root = repo_root / candidate_root
171
+ for candidate in _iter_candidate_test_files(candidate_root.resolve()):
172
+ normalized = candidate.resolve()
173
+ if normalized in seen:
174
+ continue
175
+ seen.add(normalized)
176
+ resolved.append(normalized)
177
+ return resolved
178
+
179
+
180
+ def _find_forbidden_complete_drop_reset_issue(*, path: str, text: str) -> Issue | None:
181
+ for line_number, line in enumerate(text.splitlines(), start=1):
182
+ stripped = line.strip()
183
+ if stripped.startswith(("#", "//")):
184
+ continue
185
+ for label, pattern in FORBIDDEN_COMPLETE_DROP_RESET_PATTERNS:
186
+ if not pattern.search(line):
187
+ continue
188
+ return Issue(
189
+ code="forbidden_complete_drop_reset_pattern",
190
+ message=(
191
+ "Forbidden complete-drop reset pattern detected in story-scoped Red tests "
192
+ f"({label}). Rewrite required: replace destructive reset logic with story-scoped setup/cleanup."
193
+ ),
194
+ path=path,
195
+ line=line_number,
196
+ context=line.strip()[:200],
197
+ )
198
+ return None
199
+
200
+
201
+ def _validate_python_test_file(*, path: str, text: str) -> Issue | None:
202
+ try:
203
+ module = ast.parse(text, filename=path)
204
+ compile(module, path, "exec")
205
+ except SyntaxError as exc:
206
+ return Issue(
207
+ code="invalid_python_test_file",
208
+ message=(
209
+ "Python story-scoped test file is not parseable/compilable. "
210
+ f"Rewrite required before Red can proceed: {exc.msg}."
211
+ ),
212
+ path=path,
213
+ line=exc.lineno,
214
+ context=(exc.text or "").strip()[:200] or None,
215
+ )
216
+ except ValueError as exc:
217
+ return Issue(
218
+ code="invalid_python_test_file",
219
+ message=(
220
+ "Python story-scoped test file failed deterministic compile validation. "
221
+ f"Rewrite required before Red can proceed: {exc}."
222
+ ),
223
+ path=path,
224
+ context=str(exc)[:200] or None,
225
+ )
226
+ return None
227
+
228
+
229
+ def _scan_tests_for_story(
230
+ test_files: Iterable[Path],
231
+ story_id: str,
232
+ *,
233
+ explicitly_scoped_paths: bool = False,
234
+ ) -> tuple[list[Issue], set[str], set[str]]:
235
+ issues: list[Issue] = []
236
+ planes_seen: set[str] = set()
237
+ uuids_seen: set[str] = set()
238
+
239
+ for p in sorted(test_files):
240
+ rel = str(p)
241
+ text = p.read_text("utf-8")
242
+ lines = text.splitlines()
243
+ constants: dict[str, str] = {}
244
+ story_scoped_candidate = explicitly_scoped_paths or story_id in text
245
+ if p.suffix == ".py" and story_scoped_candidate:
246
+ invalid_python_issue = _validate_python_test_file(path=rel, text=text)
247
+ if invalid_python_issue is not None:
248
+ issues.append(invalid_python_issue)
249
+ continue
250
+ if story_id in text:
251
+ forbidden_reset_issue = _find_forbidden_complete_drop_reset_issue(path=rel, text=text)
252
+ if forbidden_reset_issue is not None:
253
+ issues.append(forbidden_reset_issue)
254
+ for line in lines:
255
+ m_const = PY_CONST_RE.match(line.strip())
256
+ if m_const:
257
+ constants[m_const.group(1)] = m_const.group(2)
258
+
259
+ current_story_id: str | None = None
260
+ current_story_uuid: str | None = None
261
+ current_planes: set[str] = set()
262
+ current_has_target_story = False
263
+
264
+ def flush(boundary_line: int) -> None:
265
+ nonlocal current_story_id, current_story_uuid, current_planes, current_has_target_story
266
+ if not current_has_target_story:
267
+ current_story_id = None
268
+ current_story_uuid = None
269
+ current_planes = set()
270
+ current_has_target_story = False
271
+ return
272
+
273
+ if current_story_id is None:
274
+ issues.append(
275
+ Issue(
276
+ code="missing_story_id_marker",
277
+ message=f'Test for story {story_id!r} is missing @pytest.mark.story_id("...") marker.',
278
+ path=rel,
279
+ line=boundary_line,
280
+ )
281
+ )
282
+ if current_story_uuid is None:
283
+ issues.append(
284
+ Issue(
285
+ code="missing_story_uuid_marker",
286
+ message=f'Test for story {story_id!r} is missing @pytest.mark.story_uuid("...") marker.',
287
+ path=rel,
288
+ line=boundary_line,
289
+ )
290
+ )
291
+ if not current_planes:
292
+ issues.append(
293
+ Issue(
294
+ code="missing_plane_marker",
295
+ message=f'Test for story {story_id!r} is missing @pytest.mark.plane("...") marker.',
296
+ path=rel,
297
+ line=boundary_line,
298
+ )
299
+ )
300
+
301
+ if current_story_uuid is not None:
302
+ uuids_seen.add(current_story_uuid)
303
+ planes_seen.update(current_planes)
304
+
305
+ current_story_id = None
306
+ current_story_uuid = None
307
+ current_planes = set()
308
+ current_has_target_story = False
309
+
310
+ if story_id in text and (MOCK_IMPORT_RE.search(text) or PATCH_RE.search(text)):
311
+ for i, line in enumerate(lines, start=1):
312
+ if MOCK_ASSERT_RE.search(line):
313
+ issues.append(
314
+ Issue(
315
+ code="testing_anti_pattern_mock_assertion",
316
+ message=(
317
+ "Possible anti-pattern: asserting on mock call behavior "
318
+ "(tests should verify real behavior, not that mocks were called)."
319
+ ),
320
+ path=rel,
321
+ line=i,
322
+ context=line.strip()[:200],
323
+ )
324
+ )
325
+
326
+ for i, line in enumerate(lines, start=1):
327
+ m_id = MARK_STORY_ID_RE.search(line)
328
+ if m_id:
329
+ resolved = _resolve_marker_value(m_id.group(1), constants)
330
+ if resolved is not None:
331
+ current_story_id = resolved
332
+ current_has_target_story = current_story_id == story_id
333
+ else:
334
+ c_id = COMMENT_STORY_ID_RE.search(line)
335
+ if c_id:
336
+ current_story_id = c_id.group(1)
337
+ current_has_target_story = current_story_id == story_id
338
+
339
+ m_uuid = MARK_STORY_UUID_RE.search(line)
340
+ if m_uuid:
341
+ resolved = _resolve_marker_value(m_uuid.group(1), constants)
342
+ if resolved is not None:
343
+ current_story_uuid = resolved
344
+ else:
345
+ c_uuid = COMMENT_STORY_UUID_RE.search(line)
346
+ if c_uuid:
347
+ current_story_uuid = c_uuid.group(1)
348
+
349
+ for m_plane in MARK_PLANE_RE.finditer(line):
350
+ resolved = _resolve_marker_value(m_plane.group(1), constants)
351
+ if resolved is not None:
352
+ current_planes.add(resolved)
353
+ if not MARK_PLANE_RE.search(line):
354
+ c_plane = COMMENT_PLANE_RE.search(line)
355
+ if c_plane:
356
+ current_planes.add(c_plane.group(1))
357
+
358
+ stripped = line.strip()
359
+ if DEF_TEST_RE.match(stripped) or JS_TEST_RE.search(stripped):
360
+ flush(i)
361
+
362
+ flush(len(lines) if lines else 1)
363
+
364
+ return issues, planes_seen, uuids_seen
365
+
366
+
367
+ def validate(
368
+ *,
369
+ repo_root: Path,
370
+ story_id: str,
371
+ test_paths: list[str] | None = None,
372
+ plane_scope: list[str] | None = None,
373
+ ) -> tuple[int, list[Issue]]:
374
+ """Validate contract tests scoped to a single story_id.
375
+
376
+ Args:
377
+ plane_scope: When provided, restricts the orphan_plane_coverage check to
378
+ only these planes instead of the full story.required_planes list.
379
+ Use this when validating a single bundle in a multi-plane story so that
380
+ sibling bundles' planes do not cause spurious repair triggers.
381
+ """
382
+ user_stories_dir = repo_root / "ai_docs" / "context" / "v2" / "project_docs" / "user_stories"
383
+ tests_dir = repo_root / "tests"
384
+
385
+ issues: list[Issue] = []
386
+
387
+ story = _load_story(repo_root, user_stories_dir, story_id)
388
+ if story is None:
389
+ issues.append(
390
+ Issue(
391
+ code="unknown_story_id",
392
+ message=f"story_id {story_id!r} does not exist in canonical story docs.",
393
+ path=str(user_stories_dir),
394
+ )
395
+ )
396
+ return 2, issues
397
+
398
+ resolved_test_files = _resolve_test_files(repo_root=repo_root, tests_dir=tests_dir, test_paths=test_paths)
399
+ scan_issues, planes_seen, uuids_seen = _scan_tests_for_story(
400
+ resolved_test_files,
401
+ story_id,
402
+ explicitly_scoped_paths=bool(test_paths),
403
+ )
404
+ issues.extend(scan_issues)
405
+
406
+ expected_uuid = str(story.get("story_uuid") or "")
407
+ if expected_uuid and uuids_seen and expected_uuid not in uuids_seen:
408
+ issues.append(
409
+ Issue(
410
+ code="story_uuid_mismatch",
411
+ message=(
412
+ f"Tests for story_id {story_id!r} declare story_uuid(s) {sorted(uuids_seen)!r} "
413
+ f"but story doc uuid is {expected_uuid!r}."
414
+ ),
415
+ path=str(tests_dir if not test_paths else repo_root),
416
+ )
417
+ )
418
+
419
+ # When plane_scope is provided (per-bundle validation), only check the planes
420
+ # owned by this bundle. This prevents spurious orphan_plane_coverage failures
421
+ # when sibling bundles are still running in parallel and haven't written their
422
+ # tests yet. Full cross-plane coverage is verified at the story-level
423
+ # reconciliation step after all bundles complete.
424
+ story_required = [str(p) for p in (story.get("required_planes") or [])]
425
+ if plane_scope is not None:
426
+ required = [p for p in story_required if p in plane_scope]
427
+ else:
428
+ required = story_required
429
+ missing_planes = [p for p in required if p not in planes_seen]
430
+ if missing_planes:
431
+ issues.append(
432
+ Issue(
433
+ code="orphan_plane_coverage",
434
+ message=(
435
+ f"Orphan plane coverage: story_id {story_id!r} requires plane(s) {missing_planes!r} "
436
+ f"but tests only cover {sorted(planes_seen)!r}."
437
+ ),
438
+ path=str(story.get("path") or user_stories_dir),
439
+ )
440
+ )
441
+
442
+ exit_code = 0 if not issues else 2
443
+ return exit_code, issues
444
+
445
+
446
+ def main() -> int:
447
+ ap = argparse.ArgumentParser(description="Validate contract tests scoped to a single story_id")
448
+ ap.add_argument("story_id", help="Story id (e.g. DF2-IMPL-620)")
449
+ ap.add_argument(
450
+ "--repo-root",
451
+ default=".",
452
+ help="Path to devflow_engine repo root (default: inferred)",
453
+ )
454
+ ap.add_argument(
455
+ "--json",
456
+ dest="json_path",
457
+ default=None,
458
+ help="Write JSON report to path",
459
+ )
460
+ ap.add_argument(
461
+ "--test-path",
462
+ dest="test_paths",
463
+ action="append",
464
+ default=None,
465
+ help="Explicit test file/directory to scan (repeatable). Defaults to repo_root/tests when omitted.",
466
+ )
467
+ ap.add_argument(
468
+ "--plane",
469
+ dest="plane_scope",
470
+ action="append",
471
+ default=None,
472
+ help=(
473
+ "Restrict orphan_plane_coverage check to these planes only (repeatable). "
474
+ "Use when validating a single bundle in a multi-plane story to avoid "
475
+ "spurious failures while sibling bundles are still running."
476
+ ),
477
+ )
478
+ args = ap.parse_args()
479
+
480
+ repo_root = Path(args.repo_root).resolve()
481
+ code, issues = validate(
482
+ repo_root=repo_root,
483
+ story_id=str(args.story_id),
484
+ test_paths=args.test_paths,
485
+ plane_scope=args.plane_scope,
486
+ )
487
+
488
+ if args.json_path:
489
+ rewrite_required = any(i.code in REWRITE_REQUIRED_CODES for i in issues)
490
+ out = {
491
+ "story_id": str(args.story_id),
492
+ "issue_count": len(issues),
493
+ "issues": [asdict(i) for i in issues],
494
+ "errors": [asdict(i) for i in issues],
495
+ "outcome": "failed" if issues else "ok",
496
+ "rewrite_required": rewrite_required,
497
+ }
498
+ Path(args.json_path).write_text(json.dumps(out, indent=2, sort_keys=True) + "\n", encoding="utf-8")
499
+
500
+ if issues:
501
+ print(f"STORY TEST VALIDATION FAILED: {len(issues)} issue(s)")
502
+ for i in issues:
503
+ loc = f"{i.path}:{i.line}" if i.line else i.path
504
+ print(f"- {i.code}: {loc}: {i.message}")
505
+ return 2
506
+
507
+ print("ok")
508
+ return 0
509
+
510
+
511
+ if __name__ == "__main__":
512
+ raise SystemExit(main())
@@ -0,0 +1,133 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ from .discovery import get_story_source_paths
7
+ from .markdown_contracts import MANDATORY_CONTRACT_FORMATION_MODE_LINE, parse_story_contracts_from_markdown
8
+ from .markdown_contracts import is_uuid4
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class StoryValidationDiagnostic:
13
+ level: str # "error" | "warning"
14
+ code: str
15
+ message: str
16
+ path: str
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class StoryValidationResult:
21
+ ok: bool
22
+ diagnostics: list[StoryValidationDiagnostic]
23
+
24
+
25
+ _REQUIRED_HEADER_KEYS: list[str] = ["story_uuid", "story_id", "title", "required_planes"]
26
+ _REQUIRED_SECTIONS: list[str] = ["Intent", "Scope", "Acceptance criteria", "Contract test specs"]
27
+
28
+
29
+ def _has_header_key(raw_text: str, key: str) -> bool:
30
+ # Search line-start key with optional whitespace.
31
+ prefix = f"{key}:"
32
+ for ln in raw_text.splitlines():
33
+ if ln.strip().startswith(prefix):
34
+ return True
35
+ return False
36
+
37
+
38
+ def _has_section(raw_text: str, section_name: str) -> bool:
39
+ needle = f"## {section_name}"
40
+ return any(ln.strip() == needle for ln in raw_text.splitlines())
41
+
42
+
43
+ def validate_story_sources(repo_root: Path) -> StoryValidationResult:
44
+ """Validate all story sources in repo.
45
+
46
+ Area 4 contract differences vs low-level contract validator:
47
+ - missing plane_oracles is a WARNING (recommended), not a hard error
48
+ - required markdown sections must exist (Intent/Scope/Acceptance criteria/Contract test specs)
49
+
50
+ Returns a result object with stable diagnostic codes.
51
+ """
52
+
53
+ repo_root = Path(repo_root)
54
+ diags: list[StoryValidationDiagnostic] = []
55
+
56
+ for p in get_story_source_paths(repo_root):
57
+ text = p.read_text(encoding="utf-8")
58
+ blocks = parse_story_contracts_from_markdown(text, source_path=str(p))
59
+ if not blocks:
60
+ diags.append(
61
+ StoryValidationDiagnostic(
62
+ level="error",
63
+ code="NO_STORY_BLOCKS",
64
+ message="No story_uuid blocks found in markdown",
65
+ path=str(p),
66
+ )
67
+ )
68
+ continue
69
+
70
+ for b in blocks:
71
+ c = b.contract
72
+ raw = c.raw_text
73
+ base_path = f"{c.source_path}:{b.start_line}"
74
+
75
+ # Header keys must be present in source text (not just parse output).
76
+ for k in _REQUIRED_HEADER_KEYS:
77
+ if not _has_header_key(raw, k):
78
+ diags.append(
79
+ StoryValidationDiagnostic(
80
+ level="error",
81
+ code=f"MISSING_HEADER_KEY:{k}",
82
+ message=f"Missing required header key: {k}",
83
+ path=base_path,
84
+ )
85
+ )
86
+
87
+ # UUID validity
88
+ if c.story_uuid and not is_uuid4(c.story_uuid):
89
+ diags.append(
90
+ StoryValidationDiagnostic(
91
+ level="error",
92
+ code="INVALID_UUID",
93
+ message="story_uuid must be a uuid-v4 string",
94
+ path=f"{base_path}:story_uuid",
95
+ )
96
+ )
97
+
98
+ # Contract formation mode line must be present.
99
+ if MANDATORY_CONTRACT_FORMATION_MODE_LINE not in raw:
100
+ diags.append(
101
+ StoryValidationDiagnostic(
102
+ level="error",
103
+ code="MISSING_CONTRACT_FORMATION_MODE_LINE",
104
+ message="Missing mandatory contract formation mode line",
105
+ path=base_path,
106
+ )
107
+ )
108
+
109
+ # Required sections
110
+ for sec in _REQUIRED_SECTIONS:
111
+ if not _has_section(raw, sec):
112
+ diags.append(
113
+ StoryValidationDiagnostic(
114
+ level="error",
115
+ code=f"MISSING_SECTION:{sec}",
116
+ message=f"Missing required section: {sec}",
117
+ path=base_path,
118
+ )
119
+ )
120
+
121
+ # plane_oracles recommended (warning)
122
+ if "plane_oracles:" not in raw:
123
+ diags.append(
124
+ StoryValidationDiagnostic(
125
+ level="warning",
126
+ code="MISSING_RECOMMENDED_FIELD:plane_oracles",
127
+ message="Recommended field plane_oracles is missing",
128
+ path=f"{base_path}:plane_oracles",
129
+ )
130
+ )
131
+
132
+ ok = not any(d.level == "error" for d in diags)
133
+ return StoryValidationResult(ok=ok, diagnostics=diags)
@@ -0,0 +1,11 @@
1
+ from .dag import DAG_ID, UIGroundingDagResult, run_ui_grounding_dag
2
+ from .pencil_bridge import PencilPreflightArtifact, PencilPreflightResult, run_pencil_preflight
3
+
4
+ __all__ = [
5
+ "DAG_ID",
6
+ "UIGroundingDagResult",
7
+ "PencilPreflightArtifact",
8
+ "PencilPreflightResult",
9
+ "run_pencil_preflight",
10
+ "run_ui_grounding_dag",
11
+ ]
@@ -0,0 +1,31 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from pydantic import BaseModel
8
+
9
+ from ..agentic_prompts import load_agentic_prompt_lines
10
+ from ..agentic_runtime import AgentRunEnvelope, run_agent_step
11
+
12
+
13
+ UI_GROUNDING_DOCTRINE = load_agentic_prompt_lines("ui_grounding_doctrine")
14
+
15
+
16
+ def run_ui_grounding_agent_step(*, repo_root: Path, stage_name: str, output_model: type[BaseModel], context_payload: dict[str, Any], guidance: list[str], timeout_seconds: int | None = None) -> tuple[BaseModel, AgentRunEnvelope]:
17
+ return run_agent_step(
18
+ repo_root=repo_root,
19
+ stage_name=f"ui_grounding_{stage_name}",
20
+ output_model=output_model,
21
+ context_payload=context_payload,
22
+ guidance=UI_GROUNDING_DOCTRINE + guidance,
23
+ timeout_seconds=timeout_seconds,
24
+ )
25
+
26
+
27
+ def persist_agent_run(*, pipeline_root: Path, node_id: str, envelope: AgentRunEnvelope) -> Path:
28
+ path = pipeline_root / "agent_runs" / f"{node_id}.json"
29
+ path.parent.mkdir(parents=True, exist_ok=True)
30
+ path.write_text(json.dumps(envelope.model_dump(), indent=2, sort_keys=True) + "\n", encoding="utf-8")
31
+ return path