devflow-engine 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. devflow_engine/__init__.py +3 -0
  2. devflow_engine/agentic_prompts.py +100 -0
  3. devflow_engine/agentic_runtime.py +398 -0
  4. devflow_engine/api_key_flow_harness.py +539 -0
  5. devflow_engine/api_keys.py +357 -0
  6. devflow_engine/bootstrap/__init__.py +2 -0
  7. devflow_engine/bootstrap/provision_from_template.py +84 -0
  8. devflow_engine/cli/__init__.py +0 -0
  9. devflow_engine/cli/app.py +7270 -0
  10. devflow_engine/core/__init__.py +0 -0
  11. devflow_engine/core/config.py +86 -0
  12. devflow_engine/core/logging.py +29 -0
  13. devflow_engine/core/paths.py +45 -0
  14. devflow_engine/core/toml_kv.py +33 -0
  15. devflow_engine/devflow_event_worker.py +1292 -0
  16. devflow_engine/devflow_state.py +201 -0
  17. devflow_engine/devin2/__init__.py +9 -0
  18. devflow_engine/devin2/agent_definition.py +120 -0
  19. devflow_engine/devin2/pi_runner.py +204 -0
  20. devflow_engine/devin_orchestration.py +69 -0
  21. devflow_engine/docs/prompts/anti-patterns.md +42 -0
  22. devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
  23. devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
  24. devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
  25. devflow_engine/doctor/__init__.py +2 -0
  26. devflow_engine/doctor/triage.py +140 -0
  27. devflow_engine/error/__init__.py +0 -0
  28. devflow_engine/error/remediation.py +21 -0
  29. devflow_engine/errors/error_solver_dag.py +522 -0
  30. devflow_engine/errors/runtime_observability.py +67 -0
  31. devflow_engine/idea/__init__.py +4 -0
  32. devflow_engine/idea/actors.py +481 -0
  33. devflow_engine/idea/agentic.py +465 -0
  34. devflow_engine/idea/analyze.py +93 -0
  35. devflow_engine/idea/devin_chat_dag.py +1 -0
  36. devflow_engine/idea/diff.py +99 -0
  37. devflow_engine/idea/drafts.py +446 -0
  38. devflow_engine/idea/idea_creation_dag.py +643 -0
  39. devflow_engine/idea/ideation_enrichment.py +355 -0
  40. devflow_engine/idea/ideation_enrichment_worker.py +19 -0
  41. devflow_engine/idea/paths.py +28 -0
  42. devflow_engine/idea/promote.py +53 -0
  43. devflow_engine/idea/redaction.py +27 -0
  44. devflow_engine/idea/repo_tools.py +1277 -0
  45. devflow_engine/idea/response_mode.py +30 -0
  46. devflow_engine/idea/story_pipeline.py +1585 -0
  47. devflow_engine/idea/sufficiency.py +376 -0
  48. devflow_engine/idea/traditional_stories.py +1257 -0
  49. devflow_engine/implementation/__init__.py +0 -0
  50. devflow_engine/implementation/alembic_preflight.py +700 -0
  51. devflow_engine/implementation/dag.py +8450 -0
  52. devflow_engine/implementation/green_gate.py +93 -0
  53. devflow_engine/implementation/prompts.py +108 -0
  54. devflow_engine/implementation/test_runtime.py +623 -0
  55. devflow_engine/integration/__init__.py +19 -0
  56. devflow_engine/integration/agentic.py +66 -0
  57. devflow_engine/integration/dag.py +3539 -0
  58. devflow_engine/integration/prompts.py +114 -0
  59. devflow_engine/integration/supabase_schema.sql +31 -0
  60. devflow_engine/integration/supabase_sync.py +177 -0
  61. devflow_engine/llm/__init__.py +1 -0
  62. devflow_engine/llm/cli_one_shot.py +84 -0
  63. devflow_engine/llm/cli_stream.py +371 -0
  64. devflow_engine/llm/execution_context.py +26 -0
  65. devflow_engine/llm/invoke.py +1322 -0
  66. devflow_engine/llm/provider_api.py +304 -0
  67. devflow_engine/llm/repo_knowledge.py +588 -0
  68. devflow_engine/llm_primitives.py +315 -0
  69. devflow_engine/orchestration.py +62 -0
  70. devflow_engine/planning/__init__.py +0 -0
  71. devflow_engine/planning/analyze_repo.py +92 -0
  72. devflow_engine/planning/render_drafts.py +133 -0
  73. devflow_engine/playground/__init__.py +0 -0
  74. devflow_engine/playground/hooks.py +26 -0
  75. devflow_engine/playwright_workflow/__init__.py +5 -0
  76. devflow_engine/playwright_workflow/dag.py +1317 -0
  77. devflow_engine/process/__init__.py +5 -0
  78. devflow_engine/process/dag.py +59 -0
  79. devflow_engine/project_registration/__init__.py +3 -0
  80. devflow_engine/project_registration/dag.py +1581 -0
  81. devflow_engine/project_registry.py +109 -0
  82. devflow_engine/prompts/devin/generic/prompt.md +6 -0
  83. devflow_engine/prompts/devin/ideation/prompt.md +263 -0
  84. devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
  85. devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
  86. devflow_engine/prompts/devin/insight/prompt.md +11 -0
  87. devflow_engine/prompts/devin/insight/scenarios.md +5 -0
  88. devflow_engine/prompts/devin/intake/prompt.md +15 -0
  89. devflow_engine/prompts/devin/iterate/prompt.md +12 -0
  90. devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
  91. devflow_engine/prompts/devin/shared/principles.md +246 -0
  92. devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
  93. devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
  94. devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
  95. devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
  96. devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
  97. devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
  98. devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
  99. devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
  100. devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
  101. devflow_engine/prompts/implementation/red/prompt.md +27 -0
  102. devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
  103. devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
  104. devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
  105. devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
  106. devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
  107. devflow_engine/prompts/integration/README.md +185 -0
  108. devflow_engine/prompts/integration/green/example.md +67 -0
  109. devflow_engine/prompts/integration/green/green/prompt.md +10 -0
  110. devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
  111. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
  112. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
  113. devflow_engine/prompts/integration/green_enrich/example.md +79 -0
  114. devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
  115. devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
  116. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
  117. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  118. devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
  119. devflow_engine/prompts/integration/red/example.md +152 -0
  120. devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
  121. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  122. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
  123. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
  124. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
  125. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  126. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
  127. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
  128. devflow_engine/prompts/integration/red/red/prompt.md +11 -0
  129. devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
  130. devflow_engine/prompts/integration/red_review/example.md +71 -0
  131. devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
  132. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  133. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
  134. devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
  135. devflow_engine/prompts/integration/resolve/example.md +111 -0
  136. devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
  137. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
  138. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
  139. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
  140. devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
  141. devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
  142. devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
  143. devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
  144. devflow_engine/prompts/integration/validate/example.md +143 -0
  145. devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
  146. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  147. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
  148. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
  149. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
  150. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  151. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
  152. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
  153. devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
  154. devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
  155. devflow_engine/prompts/integration/write_workflows/example.md +100 -0
  156. devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
  157. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
  158. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
  159. devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
  160. devflow_engine/prompts/iterate/README.md +7 -0
  161. devflow_engine/prompts/iterate/coder/prompt.md +11 -0
  162. devflow_engine/prompts/iterate/framer/prompt.md +11 -0
  163. devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
  164. devflow_engine/prompts/iterate/observer/prompt.md +11 -0
  165. devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
  166. devflow_engine/prompts/recovery/execution/prompt.md +8 -0
  167. devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
  168. devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
  169. devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
  170. devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
  171. devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
  172. devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
  173. devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
  174. devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
  175. devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
  176. devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
  177. devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
  178. devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
  179. devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
  180. devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
  181. devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
  182. devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
  183. devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
  184. devflow_engine/recovery/__init__.py +3 -0
  185. devflow_engine/recovery/dag.py +2609 -0
  186. devflow_engine/recovery/models.py +220 -0
  187. devflow_engine/refactor.py +93 -0
  188. devflow_engine/registry/__init__.py +1 -0
  189. devflow_engine/registry/cards.py +238 -0
  190. devflow_engine/registry/domain_normalize.py +60 -0
  191. devflow_engine/registry/effects.py +65 -0
  192. devflow_engine/registry/enforce_report.py +150 -0
  193. devflow_engine/registry/module_cards_classify.py +164 -0
  194. devflow_engine/registry/module_cards_draft.py +184 -0
  195. devflow_engine/registry/module_cards_gate.py +59 -0
  196. devflow_engine/registry/packages.py +347 -0
  197. devflow_engine/registry/pathways.py +323 -0
  198. devflow_engine/review/__init__.py +11 -0
  199. devflow_engine/review/dag.py +588 -0
  200. devflow_engine/review/review_story.py +67 -0
  201. devflow_engine/scope_idea/__init__.py +3 -0
  202. devflow_engine/scope_idea/agentic.py +39 -0
  203. devflow_engine/scope_idea/dag.py +1069 -0
  204. devflow_engine/scope_idea/models.py +175 -0
  205. devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
  206. devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
  207. devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
  208. devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
  209. devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
  210. devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
  211. devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
  212. devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
  213. devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
  214. devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
  215. devflow_engine/skills/registry.example.yaml +42 -0
  216. devflow_engine/source_doc_assumptions.py +291 -0
  217. devflow_engine/source_doc_mutation_dag.py +1606 -0
  218. devflow_engine/source_doc_mutation_eval.py +417 -0
  219. devflow_engine/source_doc_mutation_worker.py +25 -0
  220. devflow_engine/source_docs_schema.py +207 -0
  221. devflow_engine/source_docs_updater.py +309 -0
  222. devflow_engine/source_scope/__init__.py +15 -0
  223. devflow_engine/source_scope/agentic.py +45 -0
  224. devflow_engine/source_scope/dag.py +1626 -0
  225. devflow_engine/source_scope/models.py +177 -0
  226. devflow_engine/stores/__init__.py +0 -0
  227. devflow_engine/stores/execution_store.py +3534 -0
  228. devflow_engine/story/__init__.py +0 -0
  229. devflow_engine/story/contracts.py +160 -0
  230. devflow_engine/story/discovery.py +47 -0
  231. devflow_engine/story/evidence.py +118 -0
  232. devflow_engine/story/hashing.py +27 -0
  233. devflow_engine/story/implemented_queue_purge.py +148 -0
  234. devflow_engine/story/indexer.py +105 -0
  235. devflow_engine/story/io.py +20 -0
  236. devflow_engine/story/markdown_contracts.py +298 -0
  237. devflow_engine/story/reconciliation.py +408 -0
  238. devflow_engine/story/validate_stories.py +149 -0
  239. devflow_engine/story/validate_tests_story.py +512 -0
  240. devflow_engine/story/validation.py +133 -0
  241. devflow_engine/ui_grounding/__init__.py +11 -0
  242. devflow_engine/ui_grounding/agentic.py +31 -0
  243. devflow_engine/ui_grounding/dag.py +874 -0
  244. devflow_engine/ui_grounding/models.py +224 -0
  245. devflow_engine/ui_grounding/pencil_bridge.py +247 -0
  246. devflow_engine/vendor/__init__.py +0 -0
  247. devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
  248. devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
  249. devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
  250. devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
  251. devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
  252. devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
  253. devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
  254. devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
  255. devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
  256. devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
  257. devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
  258. devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
  259. devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
  260. devflow_engine/worker.py +1086 -0
  261. devflow_engine/worker_guard.py +233 -0
  262. devflow_engine-1.0.0.dist-info/METADATA +235 -0
  263. devflow_engine-1.0.0.dist-info/RECORD +393 -0
  264. devflow_engine-1.0.0.dist-info/WHEEL +4 -0
  265. devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
  266. devin/__init__.py +6 -0
  267. devin/dag.py +58 -0
  268. devin/dag_two_arm.py +138 -0
  269. devin/devin_chat_scenario_catalog.json +588 -0
  270. devin/devin_eval.py +677 -0
  271. devin/nodes/__init__.py +0 -0
  272. devin/nodes/ideation/__init__.py +0 -0
  273. devin/nodes/ideation/node.py +195 -0
  274. devin/nodes/ideation/playground.py +267 -0
  275. devin/nodes/ideation/prompt.md +65 -0
  276. devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
  277. devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
  278. devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
  279. devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
  280. devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
  281. devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
  282. devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
  283. devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
  284. devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
  285. devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
  286. devin/nodes/ideation/scenarios/vague_idea.py +16 -0
  287. devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
  288. devin/nodes/ideation/tools.json +312 -0
  289. devin/nodes/insight/__init__.py +0 -0
  290. devin/nodes/insight/node.py +49 -0
  291. devin/nodes/insight/playground.py +154 -0
  292. devin/nodes/insight/prompt.md +61 -0
  293. devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
  294. devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
  295. devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
  296. devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
  297. devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
  298. devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
  299. devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
  300. devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
  301. devin/nodes/insight/scenarios/operational_debugging.py +15 -0
  302. devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
  303. devin/nodes/insight/scenarios/operational_question.py +9 -0
  304. devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
  305. devin/nodes/insight/scenarios/queue_status.py +15 -0
  306. devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
  307. devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
  308. devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
  309. devin/nodes/insight/scenarios/worker_state_check.py +15 -0
  310. devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
  311. devin/nodes/insight/tools.json +126 -0
  312. devin/nodes/intake/__init__.py +0 -0
  313. devin/nodes/intake/node.py +27 -0
  314. devin/nodes/intake/playground.py +47 -0
  315. devin/nodes/intake/prompt.md +12 -0
  316. devin/nodes/intake/scenarios/ideation_routing.py +4 -0
  317. devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
  318. devin/nodes/intake/scenarios/insight_routing.py +4 -0
  319. devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
  320. devin/nodes/iterate/README.md +44 -0
  321. devin/nodes/iterate/__init__.py +1 -0
  322. devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
  323. devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
  324. devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
  325. devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
  326. devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
  327. devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
  328. devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
  329. devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
  330. devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
  331. devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
  332. devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
  333. devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
  334. devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
  335. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
  336. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
  337. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
  338. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
  339. devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
  340. devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
  341. devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
  342. devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
  343. devin/nodes/iterate/agent-roles.md +89 -0
  344. devin/nodes/iterate/agents/README.md +10 -0
  345. devin/nodes/iterate/artifacts.md +504 -0
  346. devin/nodes/iterate/contract.md +100 -0
  347. devin/nodes/iterate/eval-plan.md +74 -0
  348. devin/nodes/iterate/node.py +100 -0
  349. devin/nodes/iterate/pipeline/README.md +13 -0
  350. devin/nodes/iterate/playground-contract.md +76 -0
  351. devin/nodes/iterate/prompt.md +11 -0
  352. devin/nodes/iterate/scenarios/README.md +38 -0
  353. devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
  354. devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
  355. devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
  356. devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
  357. devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
  358. devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
  359. devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
  360. devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
  361. devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
  362. devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
  363. devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
  364. devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
  365. devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
  366. devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
  367. devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
  368. devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
  369. devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
  370. devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
  371. devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
  372. devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
  373. devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
  374. devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
  375. devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
  376. devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
  377. devin/nodes/shared/__init__.py +0 -0
  378. devin/nodes/shared/filemaker_expert.md +80 -0
  379. devin/nodes/shared/filemaker_expert.py +354 -0
  380. devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
  381. devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
  382. devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
  383. devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
  384. devin/nodes/shared/helpers.py +156 -0
  385. devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
  386. devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
  387. devin/nodes/shared/models.py +44 -0
  388. devin/nodes/shared/post.py +40 -0
  389. devin/nodes/shared/router.py +107 -0
  390. devin/nodes/shared/tools.py +191 -0
  391. devin/shared/devin-chat-rubric.md +237 -0
  392. devin/shared/devin-chat-scenario-suite.md +90 -0
  393. devin/shared/eval_doctrine.md +9 -0
@@ -0,0 +1,12 @@
1
+ # Iterator harness and playground
2
+
3
+ The harness should be able to inspect:
4
+ - readiness decisions
5
+ - respawn decisions
6
+ - terminal verdict selection
7
+ - consistency between verdict, artifacts, and observed verification state
8
+
9
+ Key fixtures:
10
+ - near miss then repair success
11
+ - missing truth blocks coding
12
+ - unauthorized scope growth forces promotion
@@ -0,0 +1,20 @@
1
+ # Observer, objectives and requirements
2
+
3
+ ## Objective
4
+
5
+ Convert the framed task into observable truth that can safely govern coding and completion judgment.
6
+
7
+ ## Requirements
8
+
9
+ - inspect logs, traces, state, or repro surfaces relevant to the framed task
10
+ - confirm an error when evidence exists
11
+ - report `not_confirmed` or `inconclusive` honestly when it does not
12
+ - define a bounded failing seam for targeted improvements when direct repro is not the right frame
13
+ - document the expected green condition
14
+ - recommend whether the task is ready for Coder
15
+
16
+ ## Derived non-goals
17
+
18
+ - do not implement fixes
19
+ - do not invent evidence
20
+ - do not claim completion
@@ -0,0 +1,8 @@
1
+ # Observer evals
2
+
3
+ - confirms a reported failure when logs or repro evidence support it
4
+ - reports `not_confirmed` honestly when the issue cannot be reproduced
5
+ - creates a bounded red seam for a targeted improvement
6
+ - provides repro steps another role can execute
7
+ - records the expected green condition clearly
8
+ - returns `needs_more_context` when truth is genuinely insufficient
@@ -0,0 +1,14 @@
1
+ # Observer tools and boundaries
2
+
3
+ ## Needed capabilities
4
+
5
+ - inspect logs and traces
6
+ - run bounded repro checks
7
+ - inspect relevant repo or runtime surfaces needed to define a failing seam
8
+ - write the observation artifact
9
+
10
+ ## Boundary rules
11
+
12
+ - should not silently patch code while observing
13
+ - should not blur evidence with assumptions
14
+ - should not escalate weak signals into false certainty
@@ -0,0 +1,13 @@
1
+ # Observer harness and playground
2
+
3
+ The harness should inspect:
4
+ - evidence summaries
5
+ - repro steps
6
+ - repeatability status
7
+ - green-condition definition
8
+ - readiness verdict
9
+
10
+ Key fixtures:
11
+ - error confirmed by logs
12
+ - user-reported error not confirmed
13
+ - targeted improvement with a red seam
@@ -0,0 +1,89 @@
1
+ # Iterate agent roles and boundaries
2
+
3
+ ## System shape
4
+
5
+ The iterate lane has four named agents, but not four equal peers.
6
+
7
+ - `Iterator` is the accountable lane owner
8
+ - `Framer` and `Observer` are specialist advisors
9
+ - `Coder` is a supervised implementation worker
10
+
11
+ That role split should stay visible in every stage of the design pipeline.
12
+
13
+ ## Iterator
14
+
15
+ Primary owner of the task.
16
+
17
+ Owns:
18
+ - iterate-lane accountability
19
+ - scope discipline
20
+ - advisor coordination
21
+ - readiness judgment before coding
22
+ - spawning and supervising Coder
23
+ - final validation and completion judgment
24
+ - promotion or blockage decisions
25
+
26
+ Must not:
27
+ - act as the primary code writer
28
+ - redefine Framer or Observer truth casually
29
+ - claim success without verification evidence
30
+
31
+ ## Framer
32
+
33
+ Turns raw user text and relevant history into a bounded task artifact.
34
+
35
+ Owns:
36
+ - task typing
37
+ - locating the likely surface
38
+ - separating facts from assumptions
39
+ - writing explicit success criteria
40
+ - identifying blocking versus nonblocking unknowns
41
+ - recommending stay iterate, investigate first, or promote to idea
42
+
43
+ Must not:
44
+ - do observation work that belongs to Observer
45
+ - implement code
46
+ - broaden scope beyond the ask
47
+
48
+ ## Observer
49
+
50
+ Converts the framed task into observable truth.
51
+
52
+ Owns:
53
+ - log inspection
54
+ - minimal repro attempts for errors
55
+ - bounded failing seams for improvements
56
+ - repeatability judgment
57
+ - explicit green-condition definition
58
+ - recommendation on coding readiness
59
+
60
+ Must not:
61
+ - silently fix the issue
62
+ - invent evidence
63
+ - claim completion
64
+
65
+ ## Coder
66
+
67
+ Supervised implementation worker.
68
+
69
+ Owns:
70
+ - implementing the scoped delta
71
+ - using task and observation artifacts as the contract
72
+ - running the narrowest valid verification seam
73
+ - reporting what changed, what passed, what failed, and what remains blocked
74
+
75
+ Must not:
76
+ - rewrite the task contract
77
+ - broaden scope because cleanup seems appealing
78
+ - self-certify completion without Iterator validation
79
+
80
+ ## Ownership rule
81
+
82
+ Iterator owns truth and completion judgment.
83
+ Coder owns implementation attempts.
84
+ Framer and Observer are specialized advisors whose artifacts constrain the coding loop.
85
+
86
+ ## Where the detailed design now lives
87
+
88
+ - cross-agent stage docs: `pipeline/`
89
+ - per-agent stage docs: `agents/`
@@ -0,0 +1,10 @@
1
+ # Per-agent design docs
2
+
3
+ Each agent has its own folder with the same ordered design stages:
4
+
5
+ 1. objectives and requirements
6
+ 2. evals
7
+ 3. tools and boundaries
8
+ 4. harness and playground
9
+
10
+ This keeps review aligned with Marcus's pipeline while still making each role inspectable on its own.
@@ -0,0 +1,504 @@
1
+ # Iterate artifact contracts
2
+
3
+ ## Why artifacts matter in this lane
4
+
5
+ The iterate lane is not a loose conversation between four peers. It is an advisor-primary loop with one accountable owner.
6
+
7
+ Artifacts are what keep that structure real:
8
+ - `Framer` converts the ask into a bounded task contract
9
+ - `Observer` converts the task into observable truth
10
+ - `Coder` records supervised implementation attempts
11
+ - `Iterator` decides readiness, respawns, blockage, promotion, and final disposition
12
+
13
+ If those judgments are not expressed in shared artifacts, the lane will drift into hidden state and role collapse.
14
+
15
+ ## Pipeline position
16
+
17
+ These artifact contracts support Marcus's pipeline order:
18
+ 1. objectives and requirements define what truth the lane needs
19
+ 2. evals define how weak or dishonest artifacts would fail
20
+ 3. tools and boundaries define who can create and modify which artifact
21
+ 4. harness and playground later test the artifact flow
22
+
23
+ This file therefore describes the shared contract surface that the four-agent model depends on.
24
+
25
+ ## Canonical task primitive
26
+
27
+ `IterateTask` is smaller than a story. It is a targeted change packet for work that still fits iterate.
28
+
29
+ Suggested base shape:
30
+
31
+ ```yaml
32
+ IterateTask:
33
+ task_id: string
34
+ project_id: string
35
+ source_message: string
36
+ turn_history: []
37
+ task_type: error_fix | quick_change | targeted_improvement
38
+ where:
39
+ surface: string | null
40
+ route_hint: string | null
41
+ component_hint: string | null
42
+ file_hint: string | null
43
+ function_hint: string | null
44
+ current_behavior: string
45
+ desired_behavior: string
46
+ success_criteria: string[]
47
+ constraints: string[]
48
+ assumptions: string[]
49
+ blocking_unknowns: string[]
50
+ nonblocking_unknowns: string[]
51
+ promotion_recommendation: stay_iterate | investigate_first | promote_to_idea
52
+ expected_user_outcome: string | null
53
+ what_happened: string | null
54
+ log_hint: string | null
55
+ repro_hint: string | null
56
+ task_details:
57
+ error_fix?:
58
+ suspected_failure_mode: string | null
59
+ user_reported_error_text: string | null
60
+ quick_change?:
61
+ requested_delta_summary: string
62
+ acceptance_examples: string[]
63
+ targeted_improvement?:
64
+ target_metric_or_quality: string | null
65
+ bounded_red_seam_hint: string | null
66
+ ```
67
+
68
+ The payload above is the business body of the task. Durable artifact files should wrap that body in a small revision envelope rather than inventing a different schema per write.
69
+ ## Persisted location decision
70
+
71
+ Iterate artifacts should live under a lane-owned root:
72
+
73
+ ```text
74
+ .devflow/iterate/<task_id>/task_artifact.json
75
+ .devflow/iterate/<task_id>/observation_artifact.json
76
+ .devflow/iterate/<task_id>/iterator_run.json
77
+ .devflow/iterate/<task_id>/promotion_handoff.json
78
+ .devflow/iterate/<task_id>/attempts/<attempt_id>/verification_summary.json
79
+ .devflow/iterate/<task_id>/attempts/<attempt_id>/verifier_output.json
80
+ ```
81
+
82
+ Where `attempt_id` uses the exact format `attempt-<NNN...>`:
83
+ - prefix is always the literal `attempt-`
84
+ - suffix is a zero-padded decimal ordinal starting at `001`
85
+ - the numeric portion is at least 3 digits and may widen past 3 digits only after `999`
86
+ - ids are assigned monotonically within one `task_id` and are never reused
87
+
88
+ Examples:
89
+ - first attempt: `attempt-001`
90
+ - second attempt: `attempt-002`
91
+ - one thousandth attempt: `attempt-1000`
92
+
93
+ This is now the preferred contract, not just a placeholder suggestion.
94
+
95
+ Why this root is the right default:
96
+ - it matches the existing repo convention of lane-specific durable artifacts under `.devflow/<lane>/...`
97
+ - the iterate lane owns a task-scale execution record, not an arbitrary conversation transcript
98
+ - `task_id` can carry or reference session lineage without making artifact lookup conversation-scoped
99
+ - promotion out of iterate should preserve the iterate record as the historical source of the attempted task, rather than relocating it mid-run
100
+
101
+ If cross-lane lineage matters, artifacts should point to upstream or downstream ids in metadata. The storage root should stay stable.
102
+
103
+ ## Shared artifact chain
104
+
105
+ The four-agent lane should share this progression:
106
+ 1. `Framer` writes or amends `task_artifact`
107
+ 2. `Observer` writes `observation_artifact` against that task contract
108
+ 3. `Iterator` records a readiness decision in `iterator_run`
109
+ 4. `Coder` appends attempt records under `iterator_run`
110
+ 5. per-attempt verifier details are stored under `attempts/<attempt_id>/`
111
+ 6. `Iterator` records respawn reasons, blockage, promotion, or verified completion
112
+ 7. if the lane exits to `idea` or `insight`, `Iterator` also writes `promotion_handoff.json`
113
+
114
+ This progression is the operating contract. It keeps advisors constraining the worker and keeps Iterator accountable for the final truth claim.
115
+
116
+ ## Shared revision policy
117
+
118
+ Artifact revisioning should use explicit monotonic revision numbers, not content hashes and not opaque version strings.
119
+
120
+ Recommended envelope for durable top-level iterate artifacts:
121
+
122
+ ```yaml
123
+ artifact_envelope:
124
+ artifact_kind: task_artifact | observation_artifact | iterator_run | promotion_handoff
125
+ artifact_id: string
126
+ task_id: string
127
+ revision: integer
128
+ supersedes_revision: integer | null
129
+ updated_at: string
130
+ updated_by: Framer | Observer | Iterator | Coder
131
+ payload: {...}
132
+ ```
133
+
134
+ Policy decisions:
135
+ - `revision` starts at `1` and increments by `1` each time the same top-level artifact file is rewritten
136
+ - `supersedes_revision` is `null` on first write and otherwise points to the immediately prior revision number
137
+ - readers should treat `(artifact_kind, task_id, revision)` as the stable version identity for iterate artifacts
138
+ - `task_artifact.json`, `observation_artifact.json`, `iterator_run.json`, and `promotion_handoff.json` are revised in place using this envelope
139
+ - attempt-scoped files under `attempts/<attempt_id>/` are immutable per attempt by default and should not grow their own revision ladder unless a later design proves that necessary
140
+
141
+ Why this is the right default:
142
+ - monotonic integers are easy for agents, harnesses, and reviewers to compare
143
+ - revision numbers make readiness and promotion decisions auditable without introducing content-addressing complexity
144
+ - immutable attempt artifacts avoid accidental rewrites of verifier evidence
145
+ - the policy matches the lane's need for clear supervision history more than sophisticated storage deduplication
146
+
147
+ ## Task artifact
148
+
149
+ ### Purpose
150
+ The `task_artifact` defines the bounded ask before coding begins.
151
+
152
+ It should let any reviewer answer:
153
+ - what existing surface is being changed
154
+ - what is happening now
155
+ - what should happen instead
156
+ - how success will be judged
157
+ - what uncertainty still exists
158
+ - whether the task still belongs in iterate
159
+
160
+ ### Required base fields
161
+ Inside the `payload` body, the required task fields are:
162
+ - `task_id`
163
+ - `task_type`
164
+ - `project_id`
165
+ - `source_message`
166
+ - `where`
167
+ - `current_behavior`
168
+ - `desired_behavior`
169
+ - `success_criteria`
170
+ - `constraints`
171
+ - `assumptions`
172
+ - `blocking_unknowns`
173
+ - `nonblocking_unknowns`
174
+ - `promotion_recommendation`
175
+ - `task_details`
176
+
177
+ At the envelope level, `task_artifact.json` also requires:
178
+ - `artifact_kind=task_artifact`
179
+ - `artifact_id`
180
+ - `revision`
181
+ - `supersedes_revision`
182
+ - `updated_at`
183
+ - `updated_by=Framer`
184
+
185
+ ### Schema strategy decision
186
+ The task contract should use one shared base schema with a discriminated `task_type` plus a nested `task_details` section for type-specific requirements.
187
+
188
+ Why this is the right shape:
189
+ - all iterate tasks still need the same cross-agent spine for framing, observation, supervision, and completion
190
+ - `Framer`, `Observer`, and `Iterator` need one stable place to read core fields regardless of subtype
191
+ - the lane currently has a small, known subtype set, so a discriminator is simpler than maintaining separate top-level schemas
192
+ - subtype-specific strictness can still grow without fragmenting the shared contract
193
+
194
+ The design should therefore avoid separate `error_fix_task_artifact`, `quick_change_task_artifact`, and `targeted_improvement_task_artifact` roots unless the lane later proves the shared spine is breaking down.
195
+
196
+ ### Quality bar
197
+ A valid `task_artifact` is:
198
+ - specific enough that Coder does not need to reinterpret the request
199
+ - narrow enough to stay task-scale
200
+ - explicit about facts versus assumptions
201
+ - honest about missing information
202
+ - written so Observer can derive a real verification seam
203
+
204
+ ## Observation artifact
205
+
206
+ ### Purpose
207
+ The `observation_artifact` turns the framed task into observable truth.
208
+
209
+ It should let any reviewer answer:
210
+ - what evidence exists
211
+ - whether the failure or gap was confirmed
212
+ - how repeatable the issue is
213
+ - what exact condition must turn green for Iterator to approve completion
214
+
215
+ ### Required fields
216
+ Inside the `payload` body, the required observation fields are:
217
+ - `task_id`
218
+ - `mode`
219
+ - `evidence_summary`
220
+ - `log_sources`
221
+ - `log_evidence`
222
+ - `repro_steps`
223
+ - `repro_artifacts`
224
+ - `red_test_paths`
225
+ - `repeatability_status`
226
+ - `current_failure`
227
+ - `expected_green_condition`
228
+ - `confidence`
229
+ - `observer_verdict`
230
+
231
+ At the envelope level, `observation_artifact.json` also requires:
232
+ - `artifact_kind=observation_artifact`
233
+ - `artifact_id`
234
+ - `revision`
235
+ - `supersedes_revision`
236
+ - `updated_at`
237
+ - `updated_by=Observer`
238
+
239
+ ### Quality bar
240
+ A valid `observation_artifact` is:
241
+ - evidence-based rather than interpretive only
242
+ - explicit when repro is confirmed, not confirmed, or partially confirmed
243
+ - concrete enough that Iterator can later compare final state against the same seam
244
+ - honest when truth is insufficient for safe completion claims
245
+
246
+ ## Iterator run record
247
+
248
+ ### Purpose
249
+ `iterator_run` preserves supervision truth across attempts.
250
+
251
+ It should let any reviewer answer:
252
+ - when Iterator judged the task ready
253
+ - what each coder attempt changed
254
+ - why respawns happened
255
+ - whether the terminal outcome was completion, blocked, or promoted
256
+
257
+ ### Required fields
258
+ Inside the `payload` body, `iterator_run` requires:
259
+ - `task_id`
260
+ - `run_state`
261
+ - `readiness`
262
+ - `attempts`
263
+ - `latest_attempt`
264
+ - `respawn_count`
265
+ - `promotion`
266
+ - `final_verdict`
267
+
268
+ At the envelope level, `iterator_run.json` also requires:
269
+ - `artifact_kind=iterator_run`
270
+ - `artifact_id`
271
+ - `revision`
272
+ - `supersedes_revision`
273
+ - `updated_at`
274
+ - `updated_by`, usually `Iterator` and sometimes `Coder` for attempt append operations under Iterator supervision
275
+
276
+ ### Readiness state decision
277
+ Iterator readiness should be a first-class top-level structure, not just an event hidden inside attempt history.
278
+
279
+ Recommended shape:
280
+
281
+ ```yaml
282
+ iterator_run:
283
+ task_id: string
284
+ run_state: framing | observing | blocked_pre_coding | ready_for_coder | coding_in_progress | awaiting_iterator_review | needs_respawn | completed | blocked | promoted
285
+ readiness:
286
+ status: not_ready | ready_for_coder | blocked | promoted
287
+ decided_at: string
288
+ decided_by: Iterator
289
+ based_on:
290
+ task_artifact_revision: integer
291
+ observation_artifact_revision: integer
292
+ reason: string
293
+ attempts: []
294
+ latest_attempt: string | null
295
+ respawn_count: integer
296
+ promotion:
297
+ status: none | to_idea | to_insight
298
+ decided_at: string | null
299
+ decided_by: Iterator | null
300
+ reason: string | null
301
+ based_on:
302
+ task_artifact_revision: integer
303
+ observation_artifact_revision: integer | null
304
+ iterator_run_revision: integer
305
+ handoff_ref: string | null
306
+ downstream:
307
+ lane: idea | insight | null
308
+ downstream_id: string | null
309
+ downstream_artifact_ref: string | null
310
+ final_verdict: null | completed | blocked | promoted
311
+ ```
312
+
313
+ Why this should be explicit:
314
+ - readiness is an Iterator-owned gate, not merely another attempt note
315
+ - the harness needs to inspect whether coding started too early
316
+ - blocked or promoted outcomes can happen before any coder attempt exists
317
+ - top-level state makes pre-coding and post-attempt transitions auditable
318
+
319
+ Attempt history should still record when readiness changed, but the canonical current state belongs at top level.
320
+
321
+ ### Promotion linkage decision
322
+ When iterate exits to `idea` or `insight`, the durable linkage contract should be split into two parts:
323
+ 1. `iterator_run.payload.promotion`, which records the lane decision inline in the supervisory spine
324
+ 2. `promotion_handoff.json`, which stores the actual handoff payload that the downstream lane can consume
325
+
326
+ Recommended `promotion_handoff` payload shape:
327
+
328
+ ```yaml
329
+ promotion_handoff:
330
+ task_id: string
331
+ target_lane: idea | insight
332
+ decided_at: string
333
+ decided_by: Iterator
334
+ reason: string
335
+ handoff_summary: string
336
+ based_on:
337
+ task_artifact_revision: integer
338
+ observation_artifact_revision: integer | null
339
+ iterator_run_revision: integer
340
+ source_refs:
341
+ task_artifact_ref: string
342
+ observation_artifact_ref: string | null
343
+ iterator_run_ref: string
344
+ downstream:
345
+ downstream_id: string | null
346
+ downstream_artifact_ref: string | null
347
+ ```
348
+
349
+ At the envelope level, `promotion_handoff.json` should also use:
350
+ - `artifact_kind=promotion_handoff`
351
+ - `artifact_id`
352
+ - `revision`, normally `1` unless the handoff record itself is amended
353
+ - `supersedes_revision`
354
+ - `updated_at`
355
+ - `updated_by=Iterator`
356
+
357
+ Mandatory linkage fields for every promotion or reroute:
358
+ - in `iterator_run.payload.promotion`: `status`, `reason`, `based_on`, `handoff_ref`, and `downstream.lane`
359
+ - in `promotion_handoff.json`: `target_lane`, `handoff_summary`, `based_on`, and `source_refs`
360
+
361
+ Decision on downstream refs:
362
+ - `handoff_ref` to the iterate-owned `promotion_handoff.json` is mandatory
363
+ - direct downstream refs such as `downstream_id` or `downstream_artifact_ref` are optional at promotion time and may remain `null` if the downstream lane has not yet allocated durable state
364
+ - if the downstream lane does allocate an artifact or session synchronously, that ref should be filled in, but iterate should not block truthful promotion on that allocation
365
+
366
+ Why this is the right default:
367
+ - iterate can close truthfully without depending on downstream side effects
368
+ - the handoff remains durable and inspectable even if downstream work starts later
369
+ - the linkage gives `idea` and `insight` enough upstream provenance without forcing a cross-lane transaction
370
+
371
+ ### Attempt record expectations
372
+ Each attempt entry should summarize supervised coding work without swallowing raw verifier detail.
373
+
374
+ `attempt_id` should be a human-readable, sortable sequence id, not a timestamp and not an opaque UUID.
375
+
376
+ Exact policy:
377
+ - format is `attempt-<NNN...>`
378
+ - numbering starts at `attempt-001`
379
+ - numbering advances by `1` for each new coder spawn under the same `task_id`
380
+ - ids remain stable even if a later attempt is blocked, superseded, or leads to promotion
381
+ - `latest_attempt` in `iterator_run` should point to the highest assigned ordinal, not the most recently successful attempt
382
+
383
+ Why this is the right default:
384
+ - iterate runs are linear supervisory loops, so ordinal identity matches the mental model better than random ids
385
+ - lexical sort and chronological sort stay aligned
386
+ - reviewers can discuss respawns unambiguously as "attempt-002 failed verification" without decoding timestamps
387
+ - harness fixtures can assert attempt order deterministically
388
+
389
+ Suggested attempt shape:
390
+
391
+ ```yaml
392
+ attempt:
393
+ attempt_id: attempt-001
394
+ spawned_at: string
395
+ coder_summary: string
396
+ changed_surfaces: string[]
397
+ verification_summary_ref: string
398
+ verifier_output_ref: string | null
399
+ iterator_review:
400
+ disposition: success | respawn | blocked
401
+ reason: string
402
+ ```
403
+
404
+ ### Verifier output placement decision
405
+ `iterator_run.json` should contain durable summaries and file references, while verbose verifier evidence should live in attempt-scoped files.
406
+
407
+ Normalization decision:
408
+ - all verifier types must emit the same top-level JSON envelope in both `verification_summary.json` and `verifier_output.json`
409
+ - verifier-specific structure is allowed only inside a dedicated nested payload field
410
+ - consumers should be able to inspect verdict, seam alignment, and artifact refs without knowing which verifier produced the record
411
+
412
+ Why this split is the right default:
413
+ - `iterator_run` is the audit spine and should stay readable across multiple retries
414
+ - raw verifier evidence can be large and verifier-specific
415
+ - a shared envelope gives Iterator, harnesses, and downstream reviewers one stable parsing contract
416
+ - nested verifier-specific payloads preserve useful detail without forcing false uniformity across test runners, screenshots, logs, or manual checks
417
+
418
+ The run record should therefore keep:
419
+ - concise attempt-level verification summaries
420
+ - stable refs to attempt-scoped verifier files
421
+ - Iterator's disposition against that evidence
422
+
423
+ It should not try to inline full test logs, stack traces, or tool-native output blobs by default.
424
+
425
+ Recommended normalized `verification_summary.json` shape:
426
+
427
+ ```yaml
428
+ verification_summary:
429
+ artifact_kind: verification_summary
430
+ task_id: string
431
+ attempt_id: attempt-001
432
+ verifier_kind: test_run | browser_check | screenshot_diff | log_check | manual_probe | mixed
433
+ generated_at: string
434
+ produced_by: Coder
435
+ overall_result: pass | fail | inconclusive | not_run
436
+ green_condition_alignment:
437
+ status: satisfied | not_satisfied | unknown
438
+ against: string
439
+ notes: string
440
+ checks:
441
+ - check_id: string
442
+ label: string
443
+ result: pass | fail | inconclusive | not_run
444
+ summary: string
445
+ artifact_refs: string[]
446
+ summary: string
447
+ blocker_notes: string[]
448
+ output_ref: string | null
449
+ ```
450
+
451
+ Recommended normalized `verifier_output.json` shape:
452
+
453
+ ```yaml
454
+ verifier_output:
455
+ artifact_kind: verifier_output
456
+ task_id: string
457
+ attempt_id: attempt-001
458
+ verifier_kind: test_run | browser_check | screenshot_diff | log_check | manual_probe | mixed
459
+ generated_at: string
460
+ produced_by: Coder
461
+ overall_result: pass | fail | inconclusive | not_run
462
+ green_condition_alignment:
463
+ status: satisfied | not_satisfied | unknown
464
+ against: string
465
+ notes: string
466
+ evidence_refs: string[]
467
+ native_payload:
468
+ # verifier-specific structure lives only here
469
+ ```
470
+
471
+ Normalization rules that should stay stable:
472
+ - `overall_result` uses the shared enum above for every verifier type
473
+ - `green_condition_alignment` is mandatory even when the verifier cannot decide, in which case `status=unknown`
474
+ - `checks` belongs in `verification_summary.json` because Iterator often needs a readable rollup rather than only a raw blob
475
+ - `native_payload` in `verifier_output.json` may contain tool-specific objects, arrays, text blocks, or structured traces
476
+ - additional attempt-scoped files such as screenshots, traces, or junit XML may exist, but they should be referenced from the normalized JSON rather than replacing it
477
+
478
+ ### Quality bar
479
+ A valid `iterator_run`:
480
+ - captures attempt history rather than only the final state
481
+ - records Iterator-owned judgments distinctly from Coder-authored notes
482
+ - explains terminal disposition in a way that can be audited later
483
+ - keeps the canonical state readable even when several attempts accumulate
484
+
485
+ ## Agent-to-artifact mapping
486
+
487
+ - `Framer` authors and amends `task_artifact`
488
+ - `Observer` authors `observation_artifact`
489
+ - `Coder` contributes attempt records and verification summaries under `iterator_run`, with raw verifier detail stored in attempt-scoped files
490
+ - `Iterator` owns `run_state`, `readiness`, respawn reasons, promotion linkage, and the final disposition in `iterator_run`
491
+ - `Iterator` authors `promotion_handoff.json` whenever work exits iterate for `idea` or `insight`
492
+
493
+ This mapping should remain stable even if runtime execution details change.
494
+
495
+ ## Remaining open seams
496
+
497
+ At this stage, the two previously open seams are now closed for the pre-prompt design pass:
498
+ - `attempt_id` format is fixed to monotonic ordinal ids of the form `attempt-001`
499
+ - attempt-scoped verifier artifacts use a shared top-level JSON envelope, with verifier-specific structure nested under `native_payload`
500
+
501
+ Still intentionally open for a later design pass:
502
+ 1. whether `checks[].check_id` should follow a repo-wide naming convention shared with other lanes
503
+ 2. whether `verifier_kind=mixed` should stay a single artifact or fan out into multiple verifier records when one attempt runs several distinct verification modes
504
+ 3. whether any attempt-scoped artifacts beyond the normalized JSON pair should become mandatory for certain verifier kinds, such as screenshots for UI checks or junit XML for test suites