devflow-engine 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. devflow_engine/__init__.py +3 -0
  2. devflow_engine/agentic_prompts.py +100 -0
  3. devflow_engine/agentic_runtime.py +398 -0
  4. devflow_engine/api_key_flow_harness.py +539 -0
  5. devflow_engine/api_keys.py +357 -0
  6. devflow_engine/bootstrap/__init__.py +2 -0
  7. devflow_engine/bootstrap/provision_from_template.py +84 -0
  8. devflow_engine/cli/__init__.py +0 -0
  9. devflow_engine/cli/app.py +7270 -0
  10. devflow_engine/core/__init__.py +0 -0
  11. devflow_engine/core/config.py +86 -0
  12. devflow_engine/core/logging.py +29 -0
  13. devflow_engine/core/paths.py +45 -0
  14. devflow_engine/core/toml_kv.py +33 -0
  15. devflow_engine/devflow_event_worker.py +1292 -0
  16. devflow_engine/devflow_state.py +201 -0
  17. devflow_engine/devin2/__init__.py +9 -0
  18. devflow_engine/devin2/agent_definition.py +120 -0
  19. devflow_engine/devin2/pi_runner.py +204 -0
  20. devflow_engine/devin_orchestration.py +69 -0
  21. devflow_engine/docs/prompts/anti-patterns.md +42 -0
  22. devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
  23. devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
  24. devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
  25. devflow_engine/doctor/__init__.py +2 -0
  26. devflow_engine/doctor/triage.py +140 -0
  27. devflow_engine/error/__init__.py +0 -0
  28. devflow_engine/error/remediation.py +21 -0
  29. devflow_engine/errors/error_solver_dag.py +522 -0
  30. devflow_engine/errors/runtime_observability.py +67 -0
  31. devflow_engine/idea/__init__.py +4 -0
  32. devflow_engine/idea/actors.py +481 -0
  33. devflow_engine/idea/agentic.py +465 -0
  34. devflow_engine/idea/analyze.py +93 -0
  35. devflow_engine/idea/devin_chat_dag.py +1 -0
  36. devflow_engine/idea/diff.py +99 -0
  37. devflow_engine/idea/drafts.py +446 -0
  38. devflow_engine/idea/idea_creation_dag.py +643 -0
  39. devflow_engine/idea/ideation_enrichment.py +355 -0
  40. devflow_engine/idea/ideation_enrichment_worker.py +19 -0
  41. devflow_engine/idea/paths.py +28 -0
  42. devflow_engine/idea/promote.py +53 -0
  43. devflow_engine/idea/redaction.py +27 -0
  44. devflow_engine/idea/repo_tools.py +1277 -0
  45. devflow_engine/idea/response_mode.py +30 -0
  46. devflow_engine/idea/story_pipeline.py +1585 -0
  47. devflow_engine/idea/sufficiency.py +376 -0
  48. devflow_engine/idea/traditional_stories.py +1257 -0
  49. devflow_engine/implementation/__init__.py +0 -0
  50. devflow_engine/implementation/alembic_preflight.py +700 -0
  51. devflow_engine/implementation/dag.py +8450 -0
  52. devflow_engine/implementation/green_gate.py +93 -0
  53. devflow_engine/implementation/prompts.py +108 -0
  54. devflow_engine/implementation/test_runtime.py +623 -0
  55. devflow_engine/integration/__init__.py +19 -0
  56. devflow_engine/integration/agentic.py +66 -0
  57. devflow_engine/integration/dag.py +3539 -0
  58. devflow_engine/integration/prompts.py +114 -0
  59. devflow_engine/integration/supabase_schema.sql +31 -0
  60. devflow_engine/integration/supabase_sync.py +177 -0
  61. devflow_engine/llm/__init__.py +1 -0
  62. devflow_engine/llm/cli_one_shot.py +84 -0
  63. devflow_engine/llm/cli_stream.py +371 -0
  64. devflow_engine/llm/execution_context.py +26 -0
  65. devflow_engine/llm/invoke.py +1322 -0
  66. devflow_engine/llm/provider_api.py +304 -0
  67. devflow_engine/llm/repo_knowledge.py +588 -0
  68. devflow_engine/llm_primitives.py +315 -0
  69. devflow_engine/orchestration.py +62 -0
  70. devflow_engine/planning/__init__.py +0 -0
  71. devflow_engine/planning/analyze_repo.py +92 -0
  72. devflow_engine/planning/render_drafts.py +133 -0
  73. devflow_engine/playground/__init__.py +0 -0
  74. devflow_engine/playground/hooks.py +26 -0
  75. devflow_engine/playwright_workflow/__init__.py +5 -0
  76. devflow_engine/playwright_workflow/dag.py +1317 -0
  77. devflow_engine/process/__init__.py +5 -0
  78. devflow_engine/process/dag.py +59 -0
  79. devflow_engine/project_registration/__init__.py +3 -0
  80. devflow_engine/project_registration/dag.py +1581 -0
  81. devflow_engine/project_registry.py +109 -0
  82. devflow_engine/prompts/devin/generic/prompt.md +6 -0
  83. devflow_engine/prompts/devin/ideation/prompt.md +263 -0
  84. devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
  85. devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
  86. devflow_engine/prompts/devin/insight/prompt.md +11 -0
  87. devflow_engine/prompts/devin/insight/scenarios.md +5 -0
  88. devflow_engine/prompts/devin/intake/prompt.md +15 -0
  89. devflow_engine/prompts/devin/iterate/prompt.md +12 -0
  90. devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
  91. devflow_engine/prompts/devin/shared/principles.md +246 -0
  92. devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
  93. devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
  94. devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
  95. devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
  96. devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
  97. devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
  98. devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
  99. devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
  100. devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
  101. devflow_engine/prompts/implementation/red/prompt.md +27 -0
  102. devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
  103. devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
  104. devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
  105. devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
  106. devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
  107. devflow_engine/prompts/integration/README.md +185 -0
  108. devflow_engine/prompts/integration/green/example.md +67 -0
  109. devflow_engine/prompts/integration/green/green/prompt.md +10 -0
  110. devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
  111. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
  112. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
  113. devflow_engine/prompts/integration/green_enrich/example.md +79 -0
  114. devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
  115. devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
  116. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
  117. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  118. devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
  119. devflow_engine/prompts/integration/red/example.md +152 -0
  120. devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
  121. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  122. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
  123. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
  124. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
  125. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  126. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
  127. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
  128. devflow_engine/prompts/integration/red/red/prompt.md +11 -0
  129. devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
  130. devflow_engine/prompts/integration/red_review/example.md +71 -0
  131. devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
  132. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  133. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
  134. devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
  135. devflow_engine/prompts/integration/resolve/example.md +111 -0
  136. devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
  137. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
  138. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
  139. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
  140. devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
  141. devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
  142. devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
  143. devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
  144. devflow_engine/prompts/integration/validate/example.md +143 -0
  145. devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
  146. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  147. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
  148. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
  149. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
  150. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  151. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
  152. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
  153. devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
  154. devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
  155. devflow_engine/prompts/integration/write_workflows/example.md +100 -0
  156. devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
  157. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
  158. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
  159. devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
  160. devflow_engine/prompts/iterate/README.md +7 -0
  161. devflow_engine/prompts/iterate/coder/prompt.md +11 -0
  162. devflow_engine/prompts/iterate/framer/prompt.md +11 -0
  163. devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
  164. devflow_engine/prompts/iterate/observer/prompt.md +11 -0
  165. devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
  166. devflow_engine/prompts/recovery/execution/prompt.md +8 -0
  167. devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
  168. devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
  169. devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
  170. devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
  171. devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
  172. devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
  173. devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
  174. devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
  175. devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
  176. devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
  177. devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
  178. devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
  179. devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
  180. devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
  181. devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
  182. devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
  183. devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
  184. devflow_engine/recovery/__init__.py +3 -0
  185. devflow_engine/recovery/dag.py +2609 -0
  186. devflow_engine/recovery/models.py +220 -0
  187. devflow_engine/refactor.py +93 -0
  188. devflow_engine/registry/__init__.py +1 -0
  189. devflow_engine/registry/cards.py +238 -0
  190. devflow_engine/registry/domain_normalize.py +60 -0
  191. devflow_engine/registry/effects.py +65 -0
  192. devflow_engine/registry/enforce_report.py +150 -0
  193. devflow_engine/registry/module_cards_classify.py +164 -0
  194. devflow_engine/registry/module_cards_draft.py +184 -0
  195. devflow_engine/registry/module_cards_gate.py +59 -0
  196. devflow_engine/registry/packages.py +347 -0
  197. devflow_engine/registry/pathways.py +323 -0
  198. devflow_engine/review/__init__.py +11 -0
  199. devflow_engine/review/dag.py +588 -0
  200. devflow_engine/review/review_story.py +67 -0
  201. devflow_engine/scope_idea/__init__.py +3 -0
  202. devflow_engine/scope_idea/agentic.py +39 -0
  203. devflow_engine/scope_idea/dag.py +1069 -0
  204. devflow_engine/scope_idea/models.py +175 -0
  205. devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
  206. devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
  207. devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
  208. devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
  209. devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
  210. devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
  211. devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
  212. devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
  213. devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
  214. devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
  215. devflow_engine/skills/registry.example.yaml +42 -0
  216. devflow_engine/source_doc_assumptions.py +291 -0
  217. devflow_engine/source_doc_mutation_dag.py +1606 -0
  218. devflow_engine/source_doc_mutation_eval.py +417 -0
  219. devflow_engine/source_doc_mutation_worker.py +25 -0
  220. devflow_engine/source_docs_schema.py +207 -0
  221. devflow_engine/source_docs_updater.py +309 -0
  222. devflow_engine/source_scope/__init__.py +15 -0
  223. devflow_engine/source_scope/agentic.py +45 -0
  224. devflow_engine/source_scope/dag.py +1626 -0
  225. devflow_engine/source_scope/models.py +177 -0
  226. devflow_engine/stores/__init__.py +0 -0
  227. devflow_engine/stores/execution_store.py +3534 -0
  228. devflow_engine/story/__init__.py +0 -0
  229. devflow_engine/story/contracts.py +160 -0
  230. devflow_engine/story/discovery.py +47 -0
  231. devflow_engine/story/evidence.py +118 -0
  232. devflow_engine/story/hashing.py +27 -0
  233. devflow_engine/story/implemented_queue_purge.py +148 -0
  234. devflow_engine/story/indexer.py +105 -0
  235. devflow_engine/story/io.py +20 -0
  236. devflow_engine/story/markdown_contracts.py +298 -0
  237. devflow_engine/story/reconciliation.py +408 -0
  238. devflow_engine/story/validate_stories.py +149 -0
  239. devflow_engine/story/validate_tests_story.py +512 -0
  240. devflow_engine/story/validation.py +133 -0
  241. devflow_engine/ui_grounding/__init__.py +11 -0
  242. devflow_engine/ui_grounding/agentic.py +31 -0
  243. devflow_engine/ui_grounding/dag.py +874 -0
  244. devflow_engine/ui_grounding/models.py +224 -0
  245. devflow_engine/ui_grounding/pencil_bridge.py +247 -0
  246. devflow_engine/vendor/__init__.py +0 -0
  247. devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
  248. devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
  249. devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
  250. devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
  251. devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
  252. devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
  253. devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
  254. devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
  255. devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
  256. devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
  257. devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
  258. devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
  259. devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
  260. devflow_engine/worker.py +1086 -0
  261. devflow_engine/worker_guard.py +233 -0
  262. devflow_engine-1.0.0.dist-info/METADATA +235 -0
  263. devflow_engine-1.0.0.dist-info/RECORD +393 -0
  264. devflow_engine-1.0.0.dist-info/WHEEL +4 -0
  265. devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
  266. devin/__init__.py +6 -0
  267. devin/dag.py +58 -0
  268. devin/dag_two_arm.py +138 -0
  269. devin/devin_chat_scenario_catalog.json +588 -0
  270. devin/devin_eval.py +677 -0
  271. devin/nodes/__init__.py +0 -0
  272. devin/nodes/ideation/__init__.py +0 -0
  273. devin/nodes/ideation/node.py +195 -0
  274. devin/nodes/ideation/playground.py +267 -0
  275. devin/nodes/ideation/prompt.md +65 -0
  276. devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
  277. devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
  278. devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
  279. devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
  280. devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
  281. devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
  282. devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
  283. devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
  284. devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
  285. devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
  286. devin/nodes/ideation/scenarios/vague_idea.py +16 -0
  287. devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
  288. devin/nodes/ideation/tools.json +312 -0
  289. devin/nodes/insight/__init__.py +0 -0
  290. devin/nodes/insight/node.py +49 -0
  291. devin/nodes/insight/playground.py +154 -0
  292. devin/nodes/insight/prompt.md +61 -0
  293. devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
  294. devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
  295. devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
  296. devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
  297. devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
  298. devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
  299. devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
  300. devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
  301. devin/nodes/insight/scenarios/operational_debugging.py +15 -0
  302. devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
  303. devin/nodes/insight/scenarios/operational_question.py +9 -0
  304. devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
  305. devin/nodes/insight/scenarios/queue_status.py +15 -0
  306. devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
  307. devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
  308. devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
  309. devin/nodes/insight/scenarios/worker_state_check.py +15 -0
  310. devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
  311. devin/nodes/insight/tools.json +126 -0
  312. devin/nodes/intake/__init__.py +0 -0
  313. devin/nodes/intake/node.py +27 -0
  314. devin/nodes/intake/playground.py +47 -0
  315. devin/nodes/intake/prompt.md +12 -0
  316. devin/nodes/intake/scenarios/ideation_routing.py +4 -0
  317. devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
  318. devin/nodes/intake/scenarios/insight_routing.py +4 -0
  319. devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
  320. devin/nodes/iterate/README.md +44 -0
  321. devin/nodes/iterate/__init__.py +1 -0
  322. devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
  323. devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
  324. devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
  325. devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
  326. devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
  327. devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
  328. devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
  329. devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
  330. devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
  331. devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
  332. devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
  333. devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
  334. devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
  335. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
  336. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
  337. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
  338. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
  339. devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
  340. devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
  341. devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
  342. devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
  343. devin/nodes/iterate/agent-roles.md +89 -0
  344. devin/nodes/iterate/agents/README.md +10 -0
  345. devin/nodes/iterate/artifacts.md +504 -0
  346. devin/nodes/iterate/contract.md +100 -0
  347. devin/nodes/iterate/eval-plan.md +74 -0
  348. devin/nodes/iterate/node.py +100 -0
  349. devin/nodes/iterate/pipeline/README.md +13 -0
  350. devin/nodes/iterate/playground-contract.md +76 -0
  351. devin/nodes/iterate/prompt.md +11 -0
  352. devin/nodes/iterate/scenarios/README.md +38 -0
  353. devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
  354. devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
  355. devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
  356. devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
  357. devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
  358. devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
  359. devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
  360. devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
  361. devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
  362. devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
  363. devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
  364. devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
  365. devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
  366. devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
  367. devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
  368. devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
  369. devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
  370. devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
  371. devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
  372. devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
  373. devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
  374. devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
  375. devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
  376. devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
  377. devin/nodes/shared/__init__.py +0 -0
  378. devin/nodes/shared/filemaker_expert.md +80 -0
  379. devin/nodes/shared/filemaker_expert.py +354 -0
  380. devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
  381. devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
  382. devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
  383. devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
  384. devin/nodes/shared/helpers.py +156 -0
  385. devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
  386. devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
  387. devin/nodes/shared/models.py +44 -0
  388. devin/nodes/shared/post.py +40 -0
  389. devin/nodes/shared/router.py +107 -0
  390. devin/nodes/shared/tools.py +191 -0
  391. devin/shared/devin-chat-rubric.md +237 -0
  392. devin/shared/devin-chat-scenario-suite.md +90 -0
  393. devin/shared/eval_doctrine.md +9 -0
@@ -0,0 +1,12 @@
1
+ # Devin Intake Routing Doctrine
2
+
3
+ Determine which Devin arm should handle the current turn.
4
+
5
+ - `ideation`: software/product shaping, feature requests, workflow design, implementation planning, readiness clarification, or “build/change/add/fix this in the project” requests.
6
+ - `insight`: project-specific questions about code, repo state, queue status, worker state, architecture, behavior, operations, or “what is happening / how does this work?” questions.
7
+
8
+ Routing rules:
9
+ - Prefer `insight` for repo/operations/status questions.
10
+ - Prefer `ideation` for forward-looking product/build requests.
11
+ - If the user asks for planning/refinement of a software change, keep it in `ideation`.
12
+ - If the user asks for explanation/investigation of the current system, keep it in `insight`.
@@ -0,0 +1,4 @@
1
+ SCENARIO_NAME = 'ideation_routing'
2
+ SCENARIO_DESCRIPTION = 'Routes a forward-looking software feature request into the ideation arm.'
3
+ INPUT_PAYLOAD = {'raw_text': 'Build a client portal that lets staff triage support requests and track approvals.'}
4
+ EXPECTED_BEHAVIOR = {'route_arm': 'ideation', 'reason_contains': 'software'}
@@ -0,0 +1,5 @@
1
+ EVAL_CRITERIA = {'route_arm_must_equal': 'ideation'}
2
+
3
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
4
+ ok = actual_output.get('route_arm') == 'ideation'
5
+ return ok, ([] if ok else ['expected ideation route'])
@@ -0,0 +1,4 @@
1
+ SCENARIO_NAME = 'insight_routing'
2
+ SCENARIO_DESCRIPTION = 'Routes a repo/operations question into the insight arm.'
3
+ INPUT_PAYLOAD = {'raw_text': 'What is the source-doc queue status for this project right now?'}
4
+ EXPECTED_BEHAVIOR = {'route_arm': 'insight', 'reason_contains': 'operational'}
@@ -0,0 +1,5 @@
1
+ EVAL_CRITERIA = {'route_arm_must_equal': 'insight'}
2
+
3
+ def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
4
+ ok = actual_output.get('route_arm') == 'insight'
5
+ return ok, ([] if ok else ['expected insight route'])
@@ -0,0 +1,44 @@
1
+ # Iterate node design scaffold
2
+
3
+ This directory is the design home for the proposed Devin `iterate` arm.
4
+
5
+ It now reflects Marcus's preferred agent-development pipeline explicitly:
6
+
7
+ 1. objectives and requirements
8
+ 2. evals
9
+ 3. tools and boundaries
10
+ 4. harness and playground
11
+ 5. prompt deferred until the first four stages are accepted
12
+
13
+ ## Directory map
14
+
15
+ - `contract.md` , top-level iterate contract, routing boundary, and orchestration shape
16
+ - `artifacts.md` , shared artifact contracts across the iterate lane
17
+ - `agent-roles.md` , high-level accountability split for Iterator, Framer, Observer, and Coder
18
+ - `pipeline/` , cross-agent design docs ordered by development stage
19
+ - `agents/` , per-agent design docs, each ordered by development stage
20
+ - `eval-plan.md` , consolidated eval inventory spanning routing, framing, observation, supervision, and completion truth
21
+ - `playground-contract.md` , lane-level harness and fixture contract
22
+ - `scenarios/` , planning docs for future scenario fixtures and eval implementations
23
+
24
+ ## Design rules for this directory
25
+
26
+ - Design only, no implementation code
27
+ - No real prompt content yet
28
+ - Objectives lead, non-goals derive from objectives
29
+ - Evals come before tool affordances
30
+ - Tools are documented as bounded capabilities, not as permission theater
31
+ - Harness design tests contracts and truthfulness, not just style
32
+
33
+ ## Current recommended orchestration
34
+
35
+ - pattern: advisor-primary
36
+ - primary owner: `Iterator`
37
+ - advisors: `Framer`, `Observer`
38
+ - supervised worker: `Coder`
39
+
40
+ The intent is one accountable owner with specialist artifacts constraining the coding loop, not a flat peer swarm.
41
+
42
+ ## Prompt status
43
+
44
+ Prompt authoring is intentionally deferred. See `pipeline/05-prompt-deferred.md` for the acceptance rule.
@@ -0,0 +1 @@
1
+ """Iterate node package."""
@@ -0,0 +1,112 @@
1
+ # Iterate objectives and requirements
2
+
3
+ ## Why this stage exists
4
+
5
+ This is stage 1 in Marcus's iterate design order:
6
+ 1. objectives and requirements
7
+ 2. evals
8
+ 3. tools and boundaries
9
+ 4. harness and playground
10
+ 5. prompt content only after the first four stages are stable
11
+
12
+ If this file is vague, every later stage will drift. The point here is to lock the lane objective, routing boundary, role ownership, and completion requirements before anyone talks about tools or prompt wording.
13
+
14
+ ## Lane objective
15
+
16
+ `iterate` owns a bounded change request on an existing surface and carries it from messy ask to one of three truthful outcomes:
17
+ - verified completion
18
+ - honest blocked verdict
19
+ - explicit promotion out of iterate
20
+
21
+ This lane is for task-scale execution, not for read-only diagnosis and not for broad product planning.
22
+
23
+ ## What must be true for a request to belong in iterate
24
+
25
+ A request belongs in `iterate` only when all of these are plausibly true:
26
+ - the user wants a change, not just an explanation
27
+ - the target is an existing surface, behavior, or failure
28
+ - the work can be framed as a bounded task artifact
29
+ - success can be checked by a scoped observation seam or equivalent proof
30
+ - the change does not require new planning truth at story or feature scale
31
+
32
+ If any of those fail, routing should change instead of forcing the work through iterate.
33
+
34
+ ## Routing requirements
35
+
36
+ ### Route to `iterate`
37
+ Route to `iterate` when the ask is a targeted fix, quick change, or narrow improvement against an existing route, page, component, workflow step, API behavior, or failure mode.
38
+
39
+ ### Route to `insight`
40
+ Route to `insight` when the user wants investigation, explanation, diagnosis, or read-only analysis without asking for implementation.
41
+
42
+ ### Route to `idea`
43
+ Route to `idea` when framing reveals that the work is no longer task-scale, needs new product or workflow planning, or cannot be truthfully described as a bounded delta on an existing surface.
44
+
45
+ ### Re-route during execution
46
+ Initial routing is not permanent. If Framer or Observer discovers that the task is actually read-only or broader-planning work, Iterator must re-route honestly instead of preserving the original lane choice for convenience.
47
+
48
+ ## Truth requirements
49
+
50
+ ### Task truth before coding
51
+ A coding attempt requires a bounded `task_artifact` with:
52
+ - current behavior
53
+ - desired behavior
54
+ - explicit success criteria
55
+ - known scope boundary
56
+ - blocking unknowns called out separately from assumptions
57
+
58
+ ### Observation truth before completion
59
+ A completion claim requires an `observation_artifact` that establishes one of these:
60
+ - a confirmed repro that later stops reproducing, or
61
+ - a bounded red-to-green verification seam for the requested improvement
62
+
63
+ ### Honest uncertainty
64
+ If evidence is missing, repro cannot be confirmed, or the green condition is weak, the lane must say that plainly. Missing truth is a blocker signal, not a reason to improvise confidence.
65
+
66
+ ## Supervision requirements
67
+
68
+ ### Iterator is the accountable owner
69
+ `Iterator` owns lane judgment, readiness to code, respawn decisions, routing changes, and final disposition.
70
+
71
+ ### Framer and Observer constrain the loop
72
+ `Framer` and `Observer` are not optional flavor agents. Their artifacts are the contract that bounds what `Coder` is allowed to do and what `Iterator` is allowed to approve.
73
+
74
+ ### Coder stays subordinate to artifacts
75
+ `Coder` implements the scoped delta and reports evidence. `Coder` does not redefine scope, success criteria, readiness, or completion truth.
76
+
77
+ ## Scope requirements
78
+
79
+ The iterate lane must:
80
+ - stay at task scale
81
+ - name the affected surface as concretely as possible
82
+ - expose scope growth immediately
83
+ - prefer blocked or promoted verdicts over fake completion
84
+ - resist unrelated cleanup, opportunistic refactors, or stealth feature work
85
+
86
+ ## Required shared artifacts
87
+
88
+ The lane contract depends on four shared artifacts:
89
+ - `task_artifact`, authored by Framer and approved for use by Iterator
90
+ - `observation_artifact`, authored by Observer and used by Iterator as the truth seam
91
+ - `iterator_run`, owned by Iterator with coder attempts recorded inside it
92
+ - `promotion_handoff`, authored by Iterator only when work exits iterate for `idea` or `insight`
93
+
94
+ These artifacts are the cross-agent operating contract. If an important judgment is not grounded in one of them, the design is underspecified.
95
+
96
+ All durable top-level iterate artifacts should use monotonic integer revisions so readiness and promotion decisions can point to concrete artifact revisions rather than vague "latest" state.
97
+
98
+ ## Role ownership summary
99
+
100
+ - `Iterator` owns route fit, readiness, supervision, respawn logic, and final verdict
101
+ - `Framer` owns bounded task construction and promotion recommendations discovered during framing
102
+ - `Observer` owns evidence, repro, green-condition definition, and coding-readiness recommendation
103
+ - `Coder` owns implementation attempts and narrow verification execution under supervision
104
+
105
+ ## Minimum acceptance bar for moving to stage 2
106
+
107
+ This stage is ready for eval design only when reviewers can answer all of these clearly:
108
+ - what counts as iterate versus insight versus idea
109
+ - what artifact truth must exist before coding starts
110
+ - what completion evidence Iterator is allowed to trust
111
+ - what each of the four agents owns and must not absorb
112
+ - what conditions force blockage or promotion instead of completion
@@ -0,0 +1,131 @@
1
+ # Iterate evals by pipeline stage
2
+
3
+ ## Purpose
4
+
5
+ This is stage 2 in Marcus's iterate design order. Its job is to prove that the stage-1 contract is testable before anyone specifies tools, harnesses, or prompts.
6
+
7
+ A good eval here answers: "what concrete failure would tell us this iterate design is lying, drifting, or collapsing ownership boundaries?"
8
+
9
+ ## Eval design rules
10
+
11
+ Every eval in this file should check contract truth, not style.
12
+
13
+ That means evals should prefer:
14
+ - routing correctness over eloquent justification
15
+ - artifact quality over generic helpfulness
16
+ - ownership discipline over agent enthusiasm
17
+ - truthful blocked or promoted outcomes over forced completion
18
+
19
+ ## Eval buckets
20
+
21
+ ### 1. Route selection evals
22
+ These test whether intake and re-routing keep iterate limited to bounded change work.
23
+
24
+ Must pass examples:
25
+ - a concrete existing-surface bug fix routes to `iterate`
26
+ - a small behavior tweak routes to `iterate`
27
+ - a narrow UI improvement on an existing page routes to `iterate`
28
+
29
+ Must reject examples:
30
+ - investigation-only asks route to `insight`
31
+ - explanation-only asks route to `insight`
32
+ - broad feature or workflow requests route to `idea`
33
+ - initially small asks that expand during framing are promoted out of `iterate`
34
+
35
+ Failure signals:
36
+ - `iterate` absorbs read-only diagnostic work
37
+ - `iterate` absorbs broad planning work
38
+ - lane choice is treated as irreversible after better truth appears
39
+
40
+ ### 2. Task artifact integrity evals
41
+ These test Framer's ability to convert a messy ask into a bounded contract.
42
+
43
+ Required checks:
44
+ - the artifact identifies current behavior and desired behavior distinctly
45
+ - success criteria are observable and not aspirational
46
+ - scope is narrow enough for task-scale execution
47
+ - assumptions are separated from facts
48
+ - blocking unknowns are separated from nonblocking unknowns
49
+ - a promotion recommendation appears when framing reveals non-iterate scope
50
+ - task artifact revisions increase monotonically when framing is amended
51
+
52
+ Failure signals:
53
+ - task artifact reads like a vague restatement of the user message
54
+ - success criteria cannot be verified later by Observer or Iterator
55
+ - scope is so broad that Coder would need to reinterpret the task
56
+
57
+ ### 3. Observation artifact integrity evals
58
+ These test whether Observer creates a truthful seam for implementation and validation.
59
+
60
+ Required checks:
61
+ - evidence is grounded in logs, repro steps, failing seam output, or equivalent observable proof
62
+ - the artifact can report `not_confirmed` honestly when repro fails
63
+ - repeatability status is explicit
64
+ - expected green condition is concrete enough for Iterator to validate later
65
+ - missing evidence triggers a context request or blocked recommendation, not invented certainty
66
+ - observation artifact revisions increase monotonically when new truth is learned
67
+
68
+ Failure signals:
69
+ - repro is implied but not documented
70
+ - evidence summary has no traceable source
71
+ - green condition is too vague to distinguish success from partial progress
72
+
73
+ ### 4. Supervision integrity evals
74
+ These test whether Iterator preserves the advisor-primary model.
75
+
76
+ Required checks:
77
+ - Iterator refuses to start coding before task and observation artifacts are sufficient
78
+ - Iterator uses Framer and Observer outputs as constraints, not as optional suggestions
79
+ - Iterator respawns Coder only with repair-specific context tied to the artifacts
80
+ - Iterator blocks or promotes when truth or scope no longer fits iterate
81
+ - promotion or reroute writes an iterate-owned handoff artifact with references back to the exact task and observation revisions used for the decision
82
+
83
+ Failure signals:
84
+ - Iterator bypasses missing artifact truth because coding "might help"
85
+ - Iterator lets Coder redefine the task, evidence, or completion bar
86
+ - Iterator returns completion after scope drift
87
+
88
+ ### 5. Coder discipline evals
89
+ These test whether Coder behaves like a supervised worker instead of a peer decider.
90
+
91
+ Required checks:
92
+ - implementation stays within the bounded task and named surface
93
+ - verification stays narrow and relevant to the observation seam
94
+ - attempt reports say what changed, what passed, what failed, and what remains blocked
95
+ - second attempts respond to repair context rather than restarting from scratch conceptually
96
+
97
+ Failure signals:
98
+ - unrelated refactors or cleanup appear without authorization
99
+ - Coder claims success without matching the artifact green condition
100
+ - Coder silently broadens the solution to compensate for poor framing
101
+
102
+ ### 6. Completion integrity evals
103
+ These test the final truth gate.
104
+
105
+ Required checks:
106
+ - no completion without a green seam or equivalent scoped proof
107
+ - no completion when requested user-visible behavior is still missing
108
+ - no completion when the observed failure still reproduces
109
+ - no completion after unauthorized scope growth
110
+ - blocked verdicts are allowed and scored as correct when truth is insufficient
111
+ - promotion verdicts are allowed and scored as correct when the task has become planning work
112
+
113
+ Failure signals:
114
+ - the system rewards optimistic claims over truthful disposition
115
+ - completion can happen without evidence that corresponds to the original ask
116
+
117
+ ## Cross-agent eval expectations
118
+
119
+ The four-agent model should be explicitly visible in eval coverage:
120
+ - `Framer` evals prove task-bounding quality
121
+ - `Observer` evals prove evidence and green-condition quality
122
+ - `Coder` evals prove implementation discipline
123
+ - `Iterator` evals prove ownership of readiness, supervision, and final judgment
124
+
125
+ If a behavior matters but cannot be assigned to one of those owners, the contract is still blurry.
126
+
127
+ ## Review rule
128
+
129
+ A stage-1 requirement is not real until there is a plausible eval that could fail it.
130
+
131
+ If reviewers cannot describe how a bad route, bad artifact, bad supervision choice, or fake completion would be caught, the iterate design is not ready to advance to stage 3.
@@ -0,0 +1,110 @@
1
+ # Iterate tools and boundaries
2
+
3
+ ## Purpose
4
+
5
+ This is stage 3 in Marcus's iterate design order. By the time this file is written, the lane objective and eval expectations should already be stable.
6
+
7
+ The point here is not to list every possible runtime permission. The point is to define which capability classes each role needs in order to satisfy the stage-1 and stage-2 contract, and which capability classes would let that role collapse into another role.
8
+
9
+ ## Tooling principle
10
+
11
+ Describe capabilities in responsibility terms, not vendor or provider terms.
12
+
13
+ Good examples:
14
+ - repo inspection
15
+ - log and trace inspection
16
+ - bounded test execution
17
+ - artifact read and write
18
+
19
+ Bad examples:
20
+ - naming a provider-specific tool without explaining which contract requirement it serves
21
+ - granting broad execution rights to compensate for unclear ownership
22
+
23
+ ## Shared lane capability classes
24
+
25
+ The iterate lane as a whole needs access to these capability classes:
26
+ - conversation and history reading
27
+ - repository and project surface inspection
28
+ - logs, traces, screenshots, and repro surfaces when relevant
29
+ - artifact read and write surfaces
30
+ - narrow verification seam execution
31
+ - supervised implementation execution
32
+
33
+ These are lane-level needs. They are not automatically granted to every role equally.
34
+
35
+ ## Role capability boundaries
36
+
37
+ ### Iterator
38
+ Needs:
39
+ - read access to conversation context and shared artifacts
40
+ - enough inspection capability to judge route fit, readiness, and final evidence
41
+ - authority to spawn or supervise Coder
42
+ - write access to iterator-owned run records, monotonic revision updates, promotion handoff records, and final disposition
43
+
44
+ Must not become:
45
+ - the default implementation worker
46
+ - the substitute observer who handwaves missing evidence
47
+ - the substitute framer who rewrites the task mid-loop to make an attempt look successful
48
+
49
+ ### Framer
50
+ Needs:
51
+ - conversation and history reading
52
+ - project and surface inspection sufficient to localize the ask
53
+ - artifact write access for `task_artifact`
54
+
55
+ Must not have responsibility for:
56
+ - running broad repro or verification work that belongs to Observer
57
+ - implementation execution
58
+ - final completion judgment
59
+
60
+ ### Observer
61
+ Needs:
62
+ - evidence-source inspection such as logs, traces, screenshots, and repo context
63
+ - bounded repro execution and narrow red-seam creation
64
+ - artifact write access for `observation_artifact`
65
+
66
+ Must not have responsibility for:
67
+ - silently editing code to make repro disappear
68
+ - redefining task scope
69
+ - returning final completion on behalf of Iterator
70
+
71
+ ### Coder
72
+ Needs:
73
+ - repository read and write access within the supervised work area
74
+ - implementation execution capability
75
+ - narrow verification execution aligned to the task and observation seam
76
+ - ability to append implementation and verification notes to the run record
77
+
78
+ Must not have responsibility for:
79
+ - changing route classification
80
+ - redefining success criteria
81
+ - deciding that missing evidence is good enough
82
+ - writing the final verdict as if implementation were the same thing as truth
83
+
84
+ ## Boundary rules that should survive runtime implementation
85
+
86
+ - missing evidence should trigger context requests, blockage, or promotion, not bluffing
87
+ - missing task clarity should trigger framing repair, not coder improvisation
88
+ - scope growth should trigger promotion review, not quiet expansion of the task
89
+ - read-only user intent should route to `insight`, even if implementation would be easy
90
+ - artifact revisions should advance explicitly when durable top-level truth changes, rather than relying on implicit last-write-wins semantics
91
+ - only Iterator should stamp promotion linkage or write `promotion_handoff.json`
92
+ - provider convenience must not override role boundaries
93
+
94
+ ## Practical anti-collapse checks
95
+
96
+ A tools design is probably wrong if any of these become normal:
97
+ - Iterator frequently edits code directly because it is faster
98
+ - Framer runs deep repros because the task artifact was thin
99
+ - Observer fixes small issues while inspecting evidence
100
+ - Coder rewrites task meaning in order to declare success
101
+
102
+ Those are not efficiency wins. They are signs that the lane contract is dissolving.
103
+
104
+ ## Minimum acceptance bar for moving to stage 4
105
+
106
+ This stage is ready for harness and playground design only when reviewers can answer all of these clearly:
107
+ - which capability classes are required by the lane overall
108
+ - which of those capabilities belong to each role
109
+ - which boundary violations would invalidate the advisor-primary model
110
+ - how the tools model preserves Iterator as the single accountable owner
@@ -0,0 +1,32 @@
1
+ # Iterate harness and playground design
2
+
3
+ ## Purpose
4
+
5
+ The harness should exercise the lane as an orchestration contract, not just as prompt prose.
6
+
7
+ ## Required harness abilities
8
+
9
+ - inject route context and prior turns
10
+ - inspect authored artifacts at each stage
11
+ - inspect iterate artifacts under `.devflow/iterate/<task_id>/`
12
+ - simulate coder retries and near misses
13
+ - compare terminal claims against observation truth and success criteria
14
+ - verify that readiness state transitions happen before and after coder attempts at the right times
15
+ - verify monotonic revision bumps on top-level artifacts when framing, observation, or supervision state changes
16
+ - inspect `promotion_handoff.json` when work exits to `idea` or `insight`
17
+ - read attempt-scoped verifier artifacts without requiring them to be inlined into `iterator_run.json`
18
+ - assert deterministic attempt ordering via ordinal ids like `attempt-001`
19
+ - parse a shared verifier-artifact envelope before descending into verifier-specific payloads
20
+
21
+ ## Minimum fixture families
22
+
23
+ - reproducible error fix
24
+ - targeted improvement with a bounded failing seam
25
+ - ambiguous request needing framing discipline
26
+ - non-confirmed issue requiring honest blockage
27
+ - broader request that must promote to `idea`
28
+ - read-only request that must route to `insight`
29
+
30
+ ## Success condition
31
+
32
+ The harness is good when it can catch false-positive completions, scope drift, missing-artifact shortcuts, incorrect iterator state transitions, missing promotion linkage, broken artifact revision discipline, non-monotonic attempt ids, and verifier outputs that skip the shared normalization envelope.
@@ -0,0 +1,11 @@
1
+ # Prompt deferred
2
+
3
+ Prompt authoring for the iterate lane is intentionally deferred.
4
+
5
+ Prompt work should start only after:
6
+ 1. objectives and requirements are accepted
7
+ 2. evals are accepted
8
+ 3. tools and boundaries are accepted
9
+ 4. harness and playground expectations are accepted
10
+
11
+ Until then, prompt placeholders are acceptable, but real prompt content is out of scope for this directory pass.
@@ -0,0 +1,20 @@
1
+ # Coder, objectives and requirements
2
+
3
+ ## Objective
4
+
5
+ Implement the scoped delta described by the task and observation artifacts, then report the attempt honestly.
6
+
7
+ ## Requirements
8
+
9
+ - stay inside the scoped task unless Iterator explicitly broadens it
10
+ - treat framing and observation artifacts as the governing contract
11
+ - implement the smallest change that can satisfy the green condition
12
+ - run the narrowest valid verification seam
13
+ - report what changed, what passed, what failed, and what remains blocked
14
+ - support repair-specific retries when Iterator respawns with tighter context
15
+
16
+ ## Derived non-goals
17
+
18
+ - do not redefine the task contract
19
+ - do not broaden scope for opportunistic cleanup
20
+ - do not self-certify final completion
@@ -0,0 +1,8 @@
1
+ # Coder evals
2
+
3
+ - fixes a reproducible error without unrelated drift
4
+ - satisfies a targeted improvement seam with a minimal change
5
+ - reports partial progress honestly after a failed first pass
6
+ - stays within the scoped files or surfaces when the task is narrow
7
+ - provides actionable blocker detail when safe completion is impossible
8
+ - improves on a second attempt when respawned with repair context
@@ -0,0 +1,14 @@
1
+ # Coder tools and boundaries
2
+
3
+ ## Needed capabilities
4
+
5
+ - read task and observation artifacts
6
+ - inspect and modify scoped project files
7
+ - run narrow verification commands or checks
8
+ - write attempt summaries into the iterator run record
9
+
10
+ ## Boundary rules
11
+
12
+ - should not redefine task scope or user intent
13
+ - should not convert an observation gap into a fake success claim
14
+ - should not claim final victory without Iterator validation
@@ -0,0 +1,12 @@
1
+ # Coder harness and playground
2
+
3
+ The harness should inspect:
4
+ - whether the implementation stayed in scope
5
+ - whether the chosen verification seam was narrow and relevant
6
+ - whether the attempt report is honest about passes, failures, and blockers
7
+ - whether repair retries materially improve alignment
8
+
9
+ Key fixtures:
10
+ - reproducible fix succeeds in one pass
11
+ - first pass fails, second pass succeeds
12
+ - safe completion is impossible and the blocker is reported clearly
@@ -0,0 +1,20 @@
1
+ # Framer, objectives and requirements
2
+
3
+ ## Objective
4
+
5
+ Turn the user's messy request and relevant context into a bounded iterate task artifact that another role can safely act on.
6
+
7
+ ## Requirements
8
+
9
+ - classify the request as error fix, quick change, or targeted improvement
10
+ - extract the most likely surface, route, file, component, or function hints when available
11
+ - distinguish current behavior from desired behavior
12
+ - write observable success criteria
13
+ - separate facts, assumptions, blocking unknowns, and nonblocking unknowns
14
+ - recommend stay iterate, investigate first, or promote to idea
15
+
16
+ ## Derived non-goals
17
+
18
+ - do not perform observation work
19
+ - do not code
20
+ - do not broaden the task to make it sound more important
@@ -0,0 +1,8 @@
1
+ # Framer evals
2
+
3
+ - turns a messy fix request into a bounded task artifact
4
+ - preserves partial location hints instead of discarding them
5
+ - keeps a tiny request small rather than inflating it
6
+ - marks blocking unknowns honestly when the ask is underspecified
7
+ - distinguishes current behavior and desired behavior clearly
8
+ - recommends promotion when the request is no longer task-scale
@@ -0,0 +1,13 @@
1
+ # Framer tools and boundaries
2
+
3
+ ## Needed capabilities
4
+
5
+ - read conversation context and project hints
6
+ - inspect lightweight repo clues when necessary to localize the surface
7
+ - write the task artifact
8
+
9
+ ## Boundary rules
10
+
11
+ - should not collect evidence that belongs in observation truth
12
+ - should not run implementation changes
13
+ - should not hide uncertainty behind overconfident framing prose
@@ -0,0 +1,12 @@
1
+ # Framer harness and playground
2
+
3
+ The harness should inspect whether Framer outputs:
4
+ - a coherent task type
5
+ - a clear current versus desired behavior split
6
+ - observable success criteria
7
+ - explicit unknowns and promotion guidance
8
+
9
+ Key fixtures:
10
+ - messy error report with partial location
11
+ - tiny copy or behavior tweak
12
+ - underspecified but repairable request
@@ -0,0 +1,25 @@
1
+ # Iterator, objectives and requirements
2
+
3
+ ## Objective
4
+
5
+ Own the iterate lane end to end and return one truthful outcome:
6
+ - completed
7
+ - blocked
8
+ - needs more context
9
+ - promote to idea
10
+ - route to insight
11
+
12
+ ## Requirements
13
+
14
+ - synthesize Framer and Observer artifacts without collapsing their roles
15
+ - decide coding readiness before spawning Coder
16
+ - keep scope aligned to the original task contract
17
+ - issue repair-specific respawns when the first coding pass is close but incomplete
18
+ - refuse premature completion
19
+ - author the final lane verdict and rationale
20
+
21
+ ## Derived non-goals
22
+
23
+ - do not serve as the primary coding worker
24
+ - do not hand-wave missing observation truth
25
+ - do not bury promotions or blockers inside optimistic language
@@ -0,0 +1,9 @@
1
+ # Iterator evals
2
+
3
+ - declines to spawn Coder when task artifact is vague
4
+ - declines to spawn Coder when observation artifact is inconclusive for a required repro
5
+ - respawns Coder with a precise repair reason after a near miss
6
+ - refuses completion when the green condition is still red
7
+ - refuses completion when success criteria are only partially satisfied
8
+ - promotes to `idea` when scope expansion becomes necessary
9
+ - returns a blocked verdict when safe progress depends on missing evidence
@@ -0,0 +1,14 @@
1
+ # Iterator tools and boundaries
2
+
3
+ ## Needed capabilities
4
+
5
+ - read framing, observation, and attempt artifacts
6
+ - supervise worker attempts
7
+ - run or request final narrow verification checks
8
+ - write final disposition and respawn rationale
9
+
10
+ ## Boundary rules
11
+
12
+ - may coordinate all agents, but should not absorb coding as the default path
13
+ - may inspect verification evidence, but should not invent it
14
+ - may revise task handling, but should not casually overwrite Framer or Observer truth without explanation