devflow-engine 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. devflow_engine/__init__.py +3 -0
  2. devflow_engine/agentic_prompts.py +100 -0
  3. devflow_engine/agentic_runtime.py +398 -0
  4. devflow_engine/api_key_flow_harness.py +539 -0
  5. devflow_engine/api_keys.py +357 -0
  6. devflow_engine/bootstrap/__init__.py +2 -0
  7. devflow_engine/bootstrap/provision_from_template.py +84 -0
  8. devflow_engine/cli/__init__.py +0 -0
  9. devflow_engine/cli/app.py +7270 -0
  10. devflow_engine/core/__init__.py +0 -0
  11. devflow_engine/core/config.py +86 -0
  12. devflow_engine/core/logging.py +29 -0
  13. devflow_engine/core/paths.py +45 -0
  14. devflow_engine/core/toml_kv.py +33 -0
  15. devflow_engine/devflow_event_worker.py +1292 -0
  16. devflow_engine/devflow_state.py +201 -0
  17. devflow_engine/devin2/__init__.py +9 -0
  18. devflow_engine/devin2/agent_definition.py +120 -0
  19. devflow_engine/devin2/pi_runner.py +204 -0
  20. devflow_engine/devin_orchestration.py +69 -0
  21. devflow_engine/docs/prompts/anti-patterns.md +42 -0
  22. devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
  23. devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
  24. devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
  25. devflow_engine/doctor/__init__.py +2 -0
  26. devflow_engine/doctor/triage.py +140 -0
  27. devflow_engine/error/__init__.py +0 -0
  28. devflow_engine/error/remediation.py +21 -0
  29. devflow_engine/errors/error_solver_dag.py +522 -0
  30. devflow_engine/errors/runtime_observability.py +67 -0
  31. devflow_engine/idea/__init__.py +4 -0
  32. devflow_engine/idea/actors.py +481 -0
  33. devflow_engine/idea/agentic.py +465 -0
  34. devflow_engine/idea/analyze.py +93 -0
  35. devflow_engine/idea/devin_chat_dag.py +1 -0
  36. devflow_engine/idea/diff.py +99 -0
  37. devflow_engine/idea/drafts.py +446 -0
  38. devflow_engine/idea/idea_creation_dag.py +643 -0
  39. devflow_engine/idea/ideation_enrichment.py +355 -0
  40. devflow_engine/idea/ideation_enrichment_worker.py +19 -0
  41. devflow_engine/idea/paths.py +28 -0
  42. devflow_engine/idea/promote.py +53 -0
  43. devflow_engine/idea/redaction.py +27 -0
  44. devflow_engine/idea/repo_tools.py +1277 -0
  45. devflow_engine/idea/response_mode.py +30 -0
  46. devflow_engine/idea/story_pipeline.py +1585 -0
  47. devflow_engine/idea/sufficiency.py +376 -0
  48. devflow_engine/idea/traditional_stories.py +1257 -0
  49. devflow_engine/implementation/__init__.py +0 -0
  50. devflow_engine/implementation/alembic_preflight.py +700 -0
  51. devflow_engine/implementation/dag.py +8450 -0
  52. devflow_engine/implementation/green_gate.py +93 -0
  53. devflow_engine/implementation/prompts.py +108 -0
  54. devflow_engine/implementation/test_runtime.py +623 -0
  55. devflow_engine/integration/__init__.py +19 -0
  56. devflow_engine/integration/agentic.py +66 -0
  57. devflow_engine/integration/dag.py +3539 -0
  58. devflow_engine/integration/prompts.py +114 -0
  59. devflow_engine/integration/supabase_schema.sql +31 -0
  60. devflow_engine/integration/supabase_sync.py +177 -0
  61. devflow_engine/llm/__init__.py +1 -0
  62. devflow_engine/llm/cli_one_shot.py +84 -0
  63. devflow_engine/llm/cli_stream.py +371 -0
  64. devflow_engine/llm/execution_context.py +26 -0
  65. devflow_engine/llm/invoke.py +1322 -0
  66. devflow_engine/llm/provider_api.py +304 -0
  67. devflow_engine/llm/repo_knowledge.py +588 -0
  68. devflow_engine/llm_primitives.py +315 -0
  69. devflow_engine/orchestration.py +62 -0
  70. devflow_engine/planning/__init__.py +0 -0
  71. devflow_engine/planning/analyze_repo.py +92 -0
  72. devflow_engine/planning/render_drafts.py +133 -0
  73. devflow_engine/playground/__init__.py +0 -0
  74. devflow_engine/playground/hooks.py +26 -0
  75. devflow_engine/playwright_workflow/__init__.py +5 -0
  76. devflow_engine/playwright_workflow/dag.py +1317 -0
  77. devflow_engine/process/__init__.py +5 -0
  78. devflow_engine/process/dag.py +59 -0
  79. devflow_engine/project_registration/__init__.py +3 -0
  80. devflow_engine/project_registration/dag.py +1581 -0
  81. devflow_engine/project_registry.py +109 -0
  82. devflow_engine/prompts/devin/generic/prompt.md +6 -0
  83. devflow_engine/prompts/devin/ideation/prompt.md +263 -0
  84. devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
  85. devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
  86. devflow_engine/prompts/devin/insight/prompt.md +11 -0
  87. devflow_engine/prompts/devin/insight/scenarios.md +5 -0
  88. devflow_engine/prompts/devin/intake/prompt.md +15 -0
  89. devflow_engine/prompts/devin/iterate/prompt.md +12 -0
  90. devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
  91. devflow_engine/prompts/devin/shared/principles.md +246 -0
  92. devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
  93. devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
  94. devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
  95. devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
  96. devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
  97. devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
  98. devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
  99. devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
  100. devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
  101. devflow_engine/prompts/implementation/red/prompt.md +27 -0
  102. devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
  103. devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
  104. devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
  105. devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
  106. devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
  107. devflow_engine/prompts/integration/README.md +185 -0
  108. devflow_engine/prompts/integration/green/example.md +67 -0
  109. devflow_engine/prompts/integration/green/green/prompt.md +10 -0
  110. devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
  111. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
  112. devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
  113. devflow_engine/prompts/integration/green_enrich/example.md +79 -0
  114. devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
  115. devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
  116. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
  117. devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  118. devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
  119. devflow_engine/prompts/integration/red/example.md +152 -0
  120. devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
  121. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  122. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
  123. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
  124. devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
  125. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  126. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
  127. devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
  128. devflow_engine/prompts/integration/red/red/prompt.md +11 -0
  129. devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
  130. devflow_engine/prompts/integration/red_review/example.md +71 -0
  131. devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
  132. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
  133. devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
  134. devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
  135. devflow_engine/prompts/integration/resolve/example.md +111 -0
  136. devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
  137. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
  138. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
  139. devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
  140. devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
  141. devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
  142. devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
  143. devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
  144. devflow_engine/prompts/integration/validate/example.md +143 -0
  145. devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
  146. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
  147. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
  148. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
  149. devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
  150. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
  151. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
  152. devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
  153. devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
  154. devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
  155. devflow_engine/prompts/integration/write_workflows/example.md +100 -0
  156. devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
  157. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
  158. devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
  159. devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
  160. devflow_engine/prompts/iterate/README.md +7 -0
  161. devflow_engine/prompts/iterate/coder/prompt.md +11 -0
  162. devflow_engine/prompts/iterate/framer/prompt.md +11 -0
  163. devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
  164. devflow_engine/prompts/iterate/observer/prompt.md +11 -0
  165. devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
  166. devflow_engine/prompts/recovery/execution/prompt.md +8 -0
  167. devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
  168. devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
  169. devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
  170. devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
  171. devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
  172. devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
  173. devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
  174. devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
  175. devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
  176. devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
  177. devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
  178. devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
  179. devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
  180. devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
  181. devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
  182. devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
  183. devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
  184. devflow_engine/recovery/__init__.py +3 -0
  185. devflow_engine/recovery/dag.py +2609 -0
  186. devflow_engine/recovery/models.py +220 -0
  187. devflow_engine/refactor.py +93 -0
  188. devflow_engine/registry/__init__.py +1 -0
  189. devflow_engine/registry/cards.py +238 -0
  190. devflow_engine/registry/domain_normalize.py +60 -0
  191. devflow_engine/registry/effects.py +65 -0
  192. devflow_engine/registry/enforce_report.py +150 -0
  193. devflow_engine/registry/module_cards_classify.py +164 -0
  194. devflow_engine/registry/module_cards_draft.py +184 -0
  195. devflow_engine/registry/module_cards_gate.py +59 -0
  196. devflow_engine/registry/packages.py +347 -0
  197. devflow_engine/registry/pathways.py +323 -0
  198. devflow_engine/review/__init__.py +11 -0
  199. devflow_engine/review/dag.py +588 -0
  200. devflow_engine/review/review_story.py +67 -0
  201. devflow_engine/scope_idea/__init__.py +3 -0
  202. devflow_engine/scope_idea/agentic.py +39 -0
  203. devflow_engine/scope_idea/dag.py +1069 -0
  204. devflow_engine/scope_idea/models.py +175 -0
  205. devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
  206. devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
  207. devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
  208. devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
  209. devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
  210. devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
  211. devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
  212. devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
  213. devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
  214. devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
  215. devflow_engine/skills/registry.example.yaml +42 -0
  216. devflow_engine/source_doc_assumptions.py +291 -0
  217. devflow_engine/source_doc_mutation_dag.py +1606 -0
  218. devflow_engine/source_doc_mutation_eval.py +417 -0
  219. devflow_engine/source_doc_mutation_worker.py +25 -0
  220. devflow_engine/source_docs_schema.py +207 -0
  221. devflow_engine/source_docs_updater.py +309 -0
  222. devflow_engine/source_scope/__init__.py +15 -0
  223. devflow_engine/source_scope/agentic.py +45 -0
  224. devflow_engine/source_scope/dag.py +1626 -0
  225. devflow_engine/source_scope/models.py +177 -0
  226. devflow_engine/stores/__init__.py +0 -0
  227. devflow_engine/stores/execution_store.py +3534 -0
  228. devflow_engine/story/__init__.py +0 -0
  229. devflow_engine/story/contracts.py +160 -0
  230. devflow_engine/story/discovery.py +47 -0
  231. devflow_engine/story/evidence.py +118 -0
  232. devflow_engine/story/hashing.py +27 -0
  233. devflow_engine/story/implemented_queue_purge.py +148 -0
  234. devflow_engine/story/indexer.py +105 -0
  235. devflow_engine/story/io.py +20 -0
  236. devflow_engine/story/markdown_contracts.py +298 -0
  237. devflow_engine/story/reconciliation.py +408 -0
  238. devflow_engine/story/validate_stories.py +149 -0
  239. devflow_engine/story/validate_tests_story.py +512 -0
  240. devflow_engine/story/validation.py +133 -0
  241. devflow_engine/ui_grounding/__init__.py +11 -0
  242. devflow_engine/ui_grounding/agentic.py +31 -0
  243. devflow_engine/ui_grounding/dag.py +874 -0
  244. devflow_engine/ui_grounding/models.py +224 -0
  245. devflow_engine/ui_grounding/pencil_bridge.py +247 -0
  246. devflow_engine/vendor/__init__.py +0 -0
  247. devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
  248. devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
  249. devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
  250. devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
  251. devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
  252. devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
  253. devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
  254. devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
  255. devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
  256. devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
  257. devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
  258. devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
  259. devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
  260. devflow_engine/worker.py +1086 -0
  261. devflow_engine/worker_guard.py +233 -0
  262. devflow_engine-1.0.0.dist-info/METADATA +235 -0
  263. devflow_engine-1.0.0.dist-info/RECORD +393 -0
  264. devflow_engine-1.0.0.dist-info/WHEEL +4 -0
  265. devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
  266. devin/__init__.py +6 -0
  267. devin/dag.py +58 -0
  268. devin/dag_two_arm.py +138 -0
  269. devin/devin_chat_scenario_catalog.json +588 -0
  270. devin/devin_eval.py +677 -0
  271. devin/nodes/__init__.py +0 -0
  272. devin/nodes/ideation/__init__.py +0 -0
  273. devin/nodes/ideation/node.py +195 -0
  274. devin/nodes/ideation/playground.py +267 -0
  275. devin/nodes/ideation/prompt.md +65 -0
  276. devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
  277. devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
  278. devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
  279. devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
  280. devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
  281. devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
  282. devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
  283. devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
  284. devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
  285. devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
  286. devin/nodes/ideation/scenarios/vague_idea.py +16 -0
  287. devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
  288. devin/nodes/ideation/tools.json +312 -0
  289. devin/nodes/insight/__init__.py +0 -0
  290. devin/nodes/insight/node.py +49 -0
  291. devin/nodes/insight/playground.py +154 -0
  292. devin/nodes/insight/prompt.md +61 -0
  293. devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
  294. devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
  295. devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
  296. devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
  297. devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
  298. devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
  299. devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
  300. devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
  301. devin/nodes/insight/scenarios/operational_debugging.py +15 -0
  302. devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
  303. devin/nodes/insight/scenarios/operational_question.py +9 -0
  304. devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
  305. devin/nodes/insight/scenarios/queue_status.py +15 -0
  306. devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
  307. devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
  308. devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
  309. devin/nodes/insight/scenarios/worker_state_check.py +15 -0
  310. devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
  311. devin/nodes/insight/tools.json +126 -0
  312. devin/nodes/intake/__init__.py +0 -0
  313. devin/nodes/intake/node.py +27 -0
  314. devin/nodes/intake/playground.py +47 -0
  315. devin/nodes/intake/prompt.md +12 -0
  316. devin/nodes/intake/scenarios/ideation_routing.py +4 -0
  317. devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
  318. devin/nodes/intake/scenarios/insight_routing.py +4 -0
  319. devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
  320. devin/nodes/iterate/README.md +44 -0
  321. devin/nodes/iterate/__init__.py +1 -0
  322. devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
  323. devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
  324. devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
  325. devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
  326. devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
  327. devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
  328. devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
  329. devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
  330. devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
  331. devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
  332. devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
  333. devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
  334. devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
  335. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
  336. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
  337. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
  338. devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
  339. devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
  340. devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
  341. devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
  342. devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
  343. devin/nodes/iterate/agent-roles.md +89 -0
  344. devin/nodes/iterate/agents/README.md +10 -0
  345. devin/nodes/iterate/artifacts.md +504 -0
  346. devin/nodes/iterate/contract.md +100 -0
  347. devin/nodes/iterate/eval-plan.md +74 -0
  348. devin/nodes/iterate/node.py +100 -0
  349. devin/nodes/iterate/pipeline/README.md +13 -0
  350. devin/nodes/iterate/playground-contract.md +76 -0
  351. devin/nodes/iterate/prompt.md +11 -0
  352. devin/nodes/iterate/scenarios/README.md +38 -0
  353. devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
  354. devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
  355. devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
  356. devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
  357. devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
  358. devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
  359. devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
  360. devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
  361. devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
  362. devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
  363. devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
  364. devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
  365. devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
  366. devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
  367. devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
  368. devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
  369. devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
  370. devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
  371. devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
  372. devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
  373. devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
  374. devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
  375. devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
  376. devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
  377. devin/nodes/shared/__init__.py +0 -0
  378. devin/nodes/shared/filemaker_expert.md +80 -0
  379. devin/nodes/shared/filemaker_expert.py +354 -0
  380. devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
  381. devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
  382. devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
  383. devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
  384. devin/nodes/shared/helpers.py +156 -0
  385. devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
  386. devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
  387. devin/nodes/shared/models.py +44 -0
  388. devin/nodes/shared/post.py +40 -0
  389. devin/nodes/shared/router.py +107 -0
  390. devin/nodes/shared/tools.py +191 -0
  391. devin/shared/devin-chat-rubric.md +237 -0
  392. devin/shared/devin-chat-scenario-suite.md +90 -0
  393. devin/shared/eval_doctrine.md +9 -0
@@ -0,0 +1,45 @@
1
+ # Iterate routing scenario matrix
2
+
3
+ ## Purpose
4
+
5
+ This file makes the lane boundary concrete. It should stay aligned with:
6
+ - `pipeline/01-objectives-requirements.md` for route-fit rules
7
+ - `pipeline/02-evals.md` for what routing mistakes must be catchable
8
+ - the Iterator, Framer, Observer, Coder model, where Iterator owns final lane judgment and may re-route when better truth appears
9
+
10
+ ## Routing matrix
11
+
12
+ | Scenario | Initial read | Why | Expected route |
13
+ | --- | --- | --- | --- |
14
+ | concrete_error_fix | User reports a specific failure on an existing surface and wants it fixed | bounded implementation ask with observable failure | `iterate` |
15
+ | quick_behavior_change | User wants a small change to current behavior on an existing page, route, or component | targeted delta, not broad planning | `iterate` |
16
+ | targeted_ui_improvement | User wants a narrow UX or UI improvement on an existing surface | bounded improvement with scoped success criteria | `iterate` |
17
+ | explain_why_error_happens | User wants diagnosis or explanation only | read-only intent, no implementation requested | `insight` |
18
+ | investigate_before_any_change | User asks for analysis first and does not yet want a fix | read-only investigation | `insight` |
19
+ | broad_feature_request | Request introduces a new workflow, broader feature area, or story-scale planning need | no longer task-scale | `idea` |
20
+ | task_grows_after_framing | Initial ask sounds small, but Framer discovers broader planning or architecture work is required | promote when better truth appears | promote to `idea` |
21
+
22
+ ## Notes on who owns the decision
23
+
24
+ - intake may make the first route guess
25
+ - `Framer` may surface evidence that the ask is broader or less concrete than it looked
26
+ - `Observer` may surface evidence that the user actually wanted diagnosis only or that the task lacks observable truth
27
+ - `Iterator` owns the final judgment on whether the work stays in iterate, reroutes to insight, or promotes to idea
28
+ - when rerouting or promoting after iterate has started, Iterator should leave an iterate-owned `promotion_handoff.json` rather than making the lane transition visible only in response text
29
+
30
+ The lane should optimize for truthful routing, not for preserving the initial choice.
31
+
32
+ ## Additional scenario guidance
33
+
34
+ ### Stay in iterate
35
+ Stay in `iterate` when the work can still be described as a bounded delta on an existing surface, even if implementation is nontrivial.
36
+
37
+ ### Promote to idea
38
+ Promote to `idea` when success now depends on choosing among broader product, workflow, or architecture options rather than delivering the originally bounded delta.
39
+
40
+ ### Route to insight
41
+ Route to `insight` when the user wants explanation, diagnosis, or investigation and would reasonably be surprised if the system started changing code.
42
+
43
+ ## Review note
44
+
45
+ If a reviewer cannot tell why a scenario belongs in `iterate`, `insight`, or `idea`, the upstream objectives are still too fuzzy.
File without changes
@@ -0,0 +1,80 @@
1
+ # FileMaker Expert — advisor for Devin's insight agent
2
+ # Pi-Pi pattern: Devin (primary) calls query_filemaker_expert → spawns PI subprocess
3
+ # with this prompt → expert reads DDR artifacts → returns text response
4
+ # → Devin synthesizes into answer
5
+
6
+ ---
7
+ name: filemaker_expert
8
+ description: >
9
+ FileMaker database expert — read-only advisor. Reads DDR analysis artifacts
10
+ and answers questions about database structure, layouts, scripts, entities,
11
+ user flows, and feature clusters. Does NOT run ddr-docs — only reads.
12
+ tools: read,grep,find,ls,cat
13
+ ---
14
+ You are a **FileMaker Database Expert** — a narrow, read-only advisor agent.
15
+
16
+ Your job: receive a question about a FileMaker database, read the relevant DDR analysis artifacts, and return a thorough, grounded response.
17
+
18
+ ## Your domain
19
+
20
+ DDR analysis artifacts live under:
21
+ `{repo_root}/ai_docs/context/source_docs/ddr/{database_name}/`
22
+
23
+ **Detailed JSON (use first for anything requiring precision):**
24
+ - `03_scripts_detailed.json` — complete script steps with full calculations (use INSTEAD of the summary for script logic)
25
+ - `01_schema_detailed.json` — full field definitions with calculations, auto-enter, validation
26
+ - `05_layouts_detailed.json` — complete layout object inventory
27
+ - `02_relationships_detailed.json` — full relationship graph with all join paths
28
+ - `04_custom_functions_detailed.json` — custom functions with full formula text
29
+
30
+ **Summary markdown (overviews only):**
31
+ - `01_schema_summary.md` — table count and field type overview
32
+ - `05_layouts_summary.md` — layout counts and object counts
33
+ - `03_scripts_summary.md` — script/folder list (truncates all calculations at ~50 chars — do NOT rely on it for step details)
34
+ - `02_relationships_summary.md` — relationship graph overview
35
+
36
+ ## Source priority
37
+
38
+ **JSON is the source for details. Markdown summaries are for overviews only.**
39
+
40
+ - For anything involving calculations, SQL queries, script step logic, field-level detail, or specific values: read from the detailed JSON files
41
+ - Markdown summaries are useful for navigation and overview — they tell you what exists and the general structure
42
+ - Detailed JSON files (`*_detailed.json`) have the complete untruncated content
43
+
44
+ **Key detailed files:**
45
+ - `03_scripts_detailed.json` — every script step with FULL calculation text, untruncated (vs. summary's 50-char cutoff)
46
+ - `01_schema_detailed.json` — complete field definitions, calculations, auto-enter logic
47
+ - `05_layouts_detailed.json` — full layout object inventory
48
+ - `02_relationships_detailed.json` — complete relationship graph
49
+
50
+ **For script analysis:** always start from `03_scripts_detailed.json`. The summary (`03_scripts_summary.md`) truncates every calculation to ~50 chars — `JSONGetElement(Get(ScriptResult);"qbID")` becomes `JSONGetElement(Get(ScriptResult);"q` — making it unreadable. The detailed JSON has the complete text.
51
+
52
+ JSON in `analysis/` subdirectory:
53
+ - `analysis/feature_clusters.json` — grouped capabilities with descriptions
54
+ - `analysis/user_flows_entry_points.json` — entry point patterns
55
+ - `analysis/cruft_summary.json` — code health (uncalled scripts, unreachable layouts)
56
+
57
+ Context files in `ai_docs/context/source_docs/`:
58
+ - `domain_entities.json`, `user_workflows.json`, `product_brief.json`
59
+
60
+ ## How to work
61
+
62
+ 1. Determine which database(s) the question is about
63
+ 2. Identify the DDR output directory for that database
64
+ 3. Use `read` / `cat` to read the relevant artifact files
65
+ 4. Synthesize a response with specific names, counts, and structural facts
66
+ 5. Return plain text — the primary agent will incorporate it
67
+
68
+ ## Answer style
69
+
70
+ - Be concrete: "Clean Sweep has 455 layouts across 115 tables, including Account Page (654 objects), My Page/Cleaner dashboards, and Inspection Form III (225 objects)"
71
+ - Use headers: ## Database Overview, ## Layouts, ## Scripts, ## Code Health
72
+ - If the artifacts don't cover something, say "not in DDR analysis" and describe what's available
73
+ - Do not speculate — if it's not in the files, say so
74
+
75
+ ## Constraints
76
+
77
+ - Do NOT run ddr-docs or any analysis pipeline
78
+ - Do NOT write or modify any files
79
+ - Do not answer questions outside the FileMaker database domain
80
+ - Do not use search tools unless needed to locate a specific artifact path
@@ -0,0 +1,354 @@
1
+ """FileMaker Expert advisor — read-only DDR analysis reader.
2
+
3
+ Pi-Pi pattern: Devin (primary agent) calls FileMaker Expert as an advisor when it
4
+ needs to answer FileMaker database questions. The expert reads pre-existing DDR
5
+ analysis artifacts and returns a grounded, synthesized response.
6
+
7
+ The expert does NOT re-run ddr-docs. Analysis is done once on import by the
8
+ devflow_event_worker. The expert just reads what's already there.
9
+
10
+ Location convention:
11
+ - Source XML: ai_docs/context/source_docs/<name>.xml
12
+ - DDR analysis: ai_docs/context/source_docs/ddr/<name>/
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ from dataclasses import dataclass
19
+ from pathlib import Path
20
+
21
+ from devin.nodes.shared.tools import ToolResult
22
+
23
+
24
+ @dataclass
25
+ class DDRArtifacts:
26
+ """Paths to all available DDR analysis artifacts for one database."""
27
+ name: str
28
+ ddr_root: Path
29
+
30
+ @property
31
+ def analysis_dir(self) -> Path:
32
+ return self.ddr_root / "analysis"
33
+
34
+ def schema_summary_md(self) -> Path | None:
35
+ p = self.ddr_root / "01_schema_summary.md"
36
+ return p if p.exists() else None
37
+
38
+ def layouts_summary_md(self) -> Path | None:
39
+ p = self.ddr_root / "05_layouts_summary.md"
40
+ return p if p.exists() else None
41
+
42
+ def scripts_summary_md(self) -> Path | None:
43
+ p = self.ddr_root / "03_scripts_summary.md"
44
+ return p if p.exists() else None
45
+
46
+ def relationships_summary_md(self) -> Path | None:
47
+ p = self.ddr_root / "02_relationships_summary.md"
48
+ return p if p.exists() else None
49
+
50
+ def feature_clusters_json(self) -> Path | None:
51
+ p = self.analysis_dir / "feature_clusters.json"
52
+ return p if p.exists() else None
53
+
54
+ def entry_points_json(self) -> Path | None:
55
+ p = self.analysis_dir / "user_flows_entry_points.json"
56
+ return p if p.exists() else None
57
+
58
+ def cruft_summary_json(self) -> Path | None:
59
+ p = self.analysis_dir / "cruft_summary.json"
60
+ return p if p.exists() else None
61
+
62
+ def domain_entities_json(self) -> Path | None:
63
+ p = self.ddr_root.parent / "domain_entities.json"
64
+ return p if p.exists() else None
65
+
66
+ def user_workflows_json(self) -> Path | None:
67
+ p = self.ddr_root.parent / "user_workflows.json"
68
+ return p if p.exists() else None
69
+
70
+ def product_brief_json(self) -> Path | None:
71
+ p = self.ddr_root.parent / "product_brief.json"
72
+ return p if p.exists() else None
73
+
74
+
75
+ def _read_text(path: Path | None, max_lines: int = 80) -> str:
76
+ if not path or not path.exists():
77
+ return "(not available)"
78
+ try:
79
+ lines = path.read_text(encoding="utf-8", errors="replace").splitlines()
80
+ return "\n".join(lines[:max_lines])
81
+ except Exception:
82
+ return "(read error)"
83
+
84
+
85
+ def _read_json(path: Path | None) -> dict:
86
+ if not path or not path.exists():
87
+ return {}
88
+ try:
89
+ return json.loads(path.read_text(encoding="utf-8"))
90
+ except Exception:
91
+ return {}
92
+
93
+
94
+ def _summarize(md_path: Path | None, prefix: str) -> str:
95
+ """Extract the Total X count from a summary markdown header."""
96
+ if not md_path or not md_path.exists():
97
+ return "unknown"
98
+ try:
99
+ for line in md_path.read_text(encoding="utf-8", errors="replace").splitlines():
100
+ if prefix.lower() in line.lower():
101
+ parts = line.rsplit(":", 1)
102
+ if len(parts) == 2:
103
+ return parts[-1].strip()
104
+ except Exception:
105
+ pass
106
+ return "unknown"
107
+
108
+
109
+ def _synthesize_report(
110
+ artifacts: DDRArtifacts,
111
+ question: str,
112
+ include_summaries: bool = True,
113
+ include_workflows: bool = True,
114
+ ) -> str:
115
+ """Read all DDR artifacts and produce a synthesized expert report."""
116
+ sections: list[str] = []
117
+
118
+ # Database overview
119
+ schema_md = artifacts.schema_summary_md()
120
+ layouts_md = artifacts.layouts_summary_md()
121
+ scripts_md = artifacts.scripts_summary_md()
122
+ rels_md = artifacts.relationships_summary_md()
123
+
124
+ table_count = _summarize(schema_md, "Total Tables")
125
+ layout_count = _summarize(layouts_md, "Total Layouts")
126
+ script_count = _summarize(scripts_md, "Total Scripts")
127
+
128
+ sections.append(f"# FileMaker Expert Report: {artifacts.name}")
129
+ sections.append("")
130
+ sections.append("## Database Overview")
131
+ sections.append(f"- Tables: {table_count}")
132
+ sections.append(f"- Layouts: {layout_count}")
133
+ sections.append(f"- Scripts: {script_count}")
134
+ sections.append("")
135
+
136
+ # Schema excerpt
137
+ if include_summaries and schema_md:
138
+ sections.append("## Schema (excerpt)")
139
+ sections.append(_read_text(schema_md, max_lines=40))
140
+ sections.append("")
141
+
142
+ # Layouts excerpt
143
+ if include_summaries and layouts_md:
144
+ sections.append("## Layouts (excerpt)")
145
+ sections.append(_read_text(layouts_md, max_lines=50))
146
+ sections.append("")
147
+
148
+ # Scripts excerpt
149
+ if include_summaries and scripts_md:
150
+ sections.append("## Scripts (excerpt)")
151
+ sections.append(_read_text(scripts_md, max_lines=40))
152
+ sections.append("")
153
+
154
+ # Relationships
155
+ if include_summaries and rels_md:
156
+ sections.append("## Relationships (excerpt)")
157
+ sections.append(_read_text(rels_md, max_lines=30))
158
+ sections.append("")
159
+
160
+ # Feature clusters
161
+ fc_json = artifacts.feature_clusters_json()
162
+ if fc_json:
163
+ clusters = _read_json(fc_json)
164
+ cluster_list = clusters.get("clusters", [])
165
+ sections.append(f"## Feature Clusters ({len(cluster_list)} total)")
166
+ for c in cluster_list[:15]:
167
+ name = c.get("name", "unknown")
168
+ desc = c.get("description", "")[:100]
169
+ sections.append(f"- **{name}**: {desc}")
170
+ sections.append("")
171
+
172
+ # Entry points
173
+ ep_json = artifacts.entry_points_json()
174
+ if ep_json:
175
+ eps = _read_json(ep_json)
176
+ summary = eps.get("summary", {})
177
+ total = summary.get("total_entry_points", 0)
178
+ sections.append(f"## Entry Points (total: {total})")
179
+ entry_pts = eps.get("entry_points", {})
180
+ for key, val in entry_pts.items():
181
+ count = val.get("count", 0) if isinstance(val, dict) else 0
182
+ if count > 0:
183
+ sections.append(f"- {key}: {count}")
184
+ sections.append("")
185
+
186
+ # Cruft / code health
187
+ cruft_json = artifacts.cruft_summary_json()
188
+ if cruft_json:
189
+ cruft = _read_json(cruft_json)
190
+ problems: list[str] = []
191
+ for key in ("uncalled_scripts", "unreachable_layouts", "unreferenced_fields", "uncalled_functions"):
192
+ entry = cruft.get(key, {})
193
+ count = entry.get("count", 0) if isinstance(entry, dict) else 0
194
+ if count > 0:
195
+ problems.append(f"- {key}: {count}")
196
+ if problems:
197
+ sections.append("## Code Health (cruft detected)")
198
+ sections.extend(problems)
199
+ sections.append("")
200
+
201
+ # Domain entities
202
+ if include_workflows:
203
+ de_json = artifacts.domain_entities_json()
204
+ if de_json:
205
+ de = _read_json(de_json)
206
+ entities = de.get("entities", de.get("domain_entities", []))
207
+ if entities:
208
+ sections.append(f"## Domain Entities ({len(entities)} defined)")
209
+ for e in entities[:20]:
210
+ name = e.get("name", "unknown")
211
+ etype = e.get("type", "")
212
+ fields = e.get("fields", [])
213
+ sections.append(f"- **{name}** ({etype}, {len(fields)} fields)")
214
+ sections.append("")
215
+
216
+ # User workflows
217
+ uw_json = artifacts.user_workflows_json()
218
+ if include_workflows and uw_json:
219
+ uw = _read_json(uw_json)
220
+ workflows = uw.get("workflows", uw.get("user_workflows", []))
221
+ if workflows:
222
+ sections.append(f"## User Workflows ({len(workflows)} defined)")
223
+ for w in workflows[:15]:
224
+ name = w.get("name", "unknown")
225
+ steps = w.get("steps", w.get("stages", []))
226
+ sections.append(f"- **{name}**: {len(steps)} steps/stages")
227
+ sections.append("")
228
+
229
+ # Product brief
230
+ pb_json = artifacts.product_brief_json()
231
+ if include_workflows and pb_json:
232
+ pb = _read_json(pb_json)
233
+ brief_text = pb.get("brief", pb.get("description", pb.get("product_brief", "")))
234
+ if brief_text and len(brief_text) > 20:
235
+ sections.append("## Product Brief")
236
+ sections.append(str(brief_text)[:500])
237
+ sections.append("")
238
+
239
+ # Question answered
240
+ sections.append(f"---\n**Question:** {question}")
241
+
242
+ return "\n".join(sections)
243
+
244
+
245
+ def filemaker_expert(
246
+ *,
247
+ question: str,
248
+ repo_root: Path,
249
+ include_summaries: bool = True,
250
+ include_workflows: bool = True,
251
+ database_name: str | None = None,
252
+ ) -> ToolResult:
253
+ """FileMaker Expert advisor — read-only DDR analysis reader.
254
+
255
+ Devin (primary agent) calls this to get a grounded FileMaker database report.
256
+ The expert reads pre-existing DDR analysis artifacts from:
257
+ ai_docs/context/source_docs/ddr/<database_name>/
258
+
259
+ It does NOT re-run ddr-docs. Analysis was done on import by devflow_event_worker.
260
+
261
+ Args:
262
+ question: The question about the FileMaker database
263
+ repo_root: Project repo root (e.g. /Users/devflow/repos/cleaner)
264
+ include_summaries: Include schema/layouts/scripts/relationships excerpts (default: true)
265
+ include_workflows: Include domain entities, user workflows, product brief (default: true)
266
+ database_name: Specific database to focus on (derived from XML filename, e.g. "Clean Sweep_fmp12").
267
+ If not provided, reports on all databases found.
268
+ """
269
+ try:
270
+ source_docs = repo_root / "ai_docs" / "context" / "source_docs"
271
+ ddr_base = source_docs / "ddr"
272
+
273
+ if not ddr_base.exists():
274
+ return ToolResult(
275
+ ok=False,
276
+ tool_name="FileMakerExpert",
277
+ output={},
278
+ error=f"DDR analysis directory not found: {ddr_base}. "
279
+ "DDR analysis runs automatically on FileMaker file import — "
280
+ "if no analysis exists yet, the files were imported but analysis may have failed.",
281
+ )
282
+
283
+ # Find all DDR outputs
284
+ ddr_dirs = sorted(ddr_base.iterdir())
285
+ if not ddr_dirs:
286
+ return ToolResult(
287
+ ok=False,
288
+ tool_name="FileMakerExpert",
289
+ output={},
290
+ error=f"No DDR analysis results in {ddr_base}",
291
+ )
292
+
293
+ # Filter to specific database if named
294
+ if database_name:
295
+ ddr_dirs = [d for d in ddr_dirs if d.name == database_name]
296
+ if not ddr_dirs:
297
+ available = [d.name for d in sorted(ddr_base.iterdir())]
298
+ return ToolResult(
299
+ ok=False,
300
+ tool_name="FileMakerExpert",
301
+ output={},
302
+ error=f"Database '{database_name}' not found. Available: {available}",
303
+ )
304
+
305
+ databases: dict[str, dict] = {}
306
+ reports: list[str] = []
307
+
308
+ for ddr_dir in ddr_dirs:
309
+ artifacts = DDRArtifacts(name=ddr_dir.name, ddr_root=ddr_dir)
310
+
311
+ schema_md = artifacts.schema_summary_md()
312
+ layouts_md = artifacts.layouts_summary_md()
313
+ scripts_md = artifacts.scripts_summary_md()
314
+
315
+ table_count = _summarize(schema_md, "Total Tables")
316
+ layout_count = _summarize(layouts_md, "Total Layouts")
317
+ script_count = _summarize(scripts_md, "Total Scripts")
318
+
319
+ report = _synthesize_report(
320
+ artifacts,
321
+ question=question,
322
+ include_summaries=include_summaries,
323
+ include_workflows=include_workflows,
324
+ )
325
+ reports.append(report)
326
+
327
+ databases[ddr_dir.name] = {
328
+ "table_count": table_count,
329
+ "layout_count": layout_count,
330
+ "script_count": script_count,
331
+ "ddr_root": str(ddr_dir.relative_to(repo_root)),
332
+ "has_schema": bool(schema_md),
333
+ "has_layouts": bool(layouts_md),
334
+ "has_scripts": bool(scripts_md),
335
+ "has_cruft": bool(artifacts.cruft_summary_json()),
336
+ "has_feature_clusters": bool(artifacts.feature_clusters_json()),
337
+ }
338
+
339
+ combined = "\n\n---\n\n".join(reports) if reports else "No analysis results found."
340
+
341
+ return ToolResult(
342
+ ok=True,
343
+ tool_name="FileMakerExpert",
344
+ output={
345
+ "question": question,
346
+ "databases": databases,
347
+ "expert_report": combined,
348
+ "database_count": len(databases),
349
+ "database_names": list(databases.keys()),
350
+ },
351
+ )
352
+
353
+ except Exception as e:
354
+ return ToolResult(ok=False, tool_name="FileMakerExpert", output={}, error=str(e))
@@ -0,0 +1,176 @@
1
+ """Eval harness for FileMaker expert advisor.
2
+
3
+ Runs the filemaker_expert through the PI subprocess (matching devflow-tools.ts
4
+ queryFilemakerExpert pattern) against scenario fixtures and scores the output.
5
+
6
+ Usage:
7
+ python3 -m devin.nodes.shared.filemaker_expert_eval.runner
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import re
14
+ import subprocess
15
+ import time
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+
19
+ # _PROJECT_ROOT points to the devflow_engine repo root
20
+ _PROJECT_ROOT = Path(__file__).resolve().parents[5] # devflow_engine/
21
+ _AGENT_FILE = ".pi/agents/filemaker-expert/filemaker-expert.md"
22
+
23
+ # Default test project
24
+ _DEFAULT_REPO_ROOT = "/Users/devflow/repos/cleaner"
25
+ _DEFAULT_DATABASE = "Clean Sweep_fmp12"
26
+ _DEFAULT_MODEL = "minimax/MiniMax-M2.7"
27
+
28
+
29
+ @dataclass
30
+ class ScenarioResult:
31
+ scenario_id: str
32
+ passed: bool
33
+ score: int
34
+ max_score: int
35
+ output: str
36
+ details: dict[str, int]
37
+ elapsed: float = 0.0
38
+
39
+
40
+ def load_expert_prompt() -> str:
41
+ """Extract system prompt from expert definition file."""
42
+ content = _PROJECT_ROOT.joinpath(_AGENT_FILE).read_text()
43
+ match = re.match(r"^---\n[\s\S]*?\n---\n([\s\S]*)$", content, re.M)
44
+ return match.group(1).strip() if match else content
45
+
46
+
47
+ def run_filemaker_expert(
48
+ question: str,
49
+ repo_root: str = _DEFAULT_REPO_ROOT,
50
+ database: str = _DEFAULT_DATABASE,
51
+ model: str = _DEFAULT_MODEL,
52
+ timeout: int = 90,
53
+ ) -> tuple[str, float]:
54
+ """Run the filemaker expert via PI subprocess and return (output, elapsed)."""
55
+ system_prompt = load_expert_prompt()
56
+
57
+ context_block = (
58
+ f"Context: repo_root={repo_root}, database={database}\n\n"
59
+ f"Question: {question}"
60
+ )
61
+
62
+ args = [
63
+ "pi",
64
+ "--mode", "text",
65
+ "--no-session",
66
+ "--no-extensions",
67
+ "--model", model,
68
+ "--tools", "read,grep,find,ls",
69
+ "--thinking", "off",
70
+ "--append-system-prompt", system_prompt + "\n\n" + context_block,
71
+ question,
72
+ ]
73
+
74
+ start = time.time()
75
+ proc = subprocess.Popen(
76
+ args,
77
+ stdin=subprocess.PIPE,
78
+ stdout=subprocess.PIPE,
79
+ stderr=subprocess.PIPE,
80
+ text=True,
81
+ cwd=str(_PROJECT_ROOT),
82
+ )
83
+ stdout, _ = proc.communicate(timeout=timeout)
84
+ elapsed = time.time() - start
85
+
86
+ return stdout, elapsed
87
+
88
+
89
+ def score_scenario(output: str, scenario: dict) -> ScenarioResult:
90
+ """Score an expert output against a scenario's expected keys and structure."""
91
+ score = 0
92
+ max_score = sum(scenario["scoring"].values())
93
+ details = {}
94
+
95
+ for key, weight in scenario["scoring"].items():
96
+ # Check for expected keys in output
97
+ key_lower = key.lower().replace("_", " ")
98
+ if any(k in output for k in scenario["expected_keys"]):
99
+ details[key] = weight
100
+ score += weight
101
+ else:
102
+ details[key] = 0
103
+
104
+ return ScenarioResult(
105
+ scenario_id=scenario["id"],
106
+ passed=score == max_score,
107
+ score=score,
108
+ max_score=max_score,
109
+ output=output[:500],
110
+ details=details,
111
+ )
112
+
113
+
114
+ def run_all_scenarios(
115
+ scenarios_path: str | Path | None = None,
116
+ repo_root: str = _DEFAULT_REPO_ROOT,
117
+ database: str = _DEFAULT_DATABASE,
118
+ ) -> list[ScenarioResult]:
119
+ """Run all scenarios and return results."""
120
+ if scenarios_path is None:
121
+ scenarios_path = Path(__file__).parent / "scenarios.json"
122
+
123
+ scenarios = json.loads(Path(scenarios_path).read_text())
124
+
125
+ results = []
126
+ for scenario in scenarios:
127
+ print(f"\nRunning: {scenario['id']}")
128
+ output, elapsed = run_filemaker_expert(
129
+ question=scenario["question"],
130
+ repo_root=repo_root,
131
+ database=database,
132
+ )
133
+ result = score_scenario(output, scenario)
134
+ result.elapsed = elapsed
135
+ results.append(result)
136
+
137
+ print(f" Score: {result.score}/{result.max_score} ({elapsed:.1f}s)")
138
+ if not result.passed:
139
+ print(f" Missing: {[k for k, v in result.details.items() if v == 0]}")
140
+
141
+ return results
142
+
143
+
144
+ def print_report(results: list[ScenarioResult]) -> None:
145
+ """Print a formatted report of scenario results."""
146
+ total = sum(r.score for r in results)
147
+ max_total = sum(r.max_score for r in results)
148
+
149
+ print(f"\n{'='*60}")
150
+ print(f"FILEMAKER EXPERT EVAL REPORT")
151
+ print(f"{'='*60}")
152
+
153
+ for r in results:
154
+ status = "✓ PASS" if r.passed else "✗ FAIL"
155
+ print(f"\n{status} {r.scenario_id} {r.score}/{r.max_score}")
156
+ for key, val in r.details.items():
157
+ icon = "✓" if val > 0 else "✗"
158
+ print(f" {icon} {key}: {val}")
159
+
160
+ print(f"\n{'='*60}")
161
+ print(f"TOTAL: {total}/{max_total} ({100*total/max_total:.0f}%)")
162
+ print(f"{'='*60}")
163
+
164
+
165
+ if __name__ == "__main__":
166
+ import sys
167
+
168
+ repo_root = sys.argv[1] if len(sys.argv) > 1 else _DEFAULT_REPO_ROOT
169
+ database = sys.argv[2] if len(sys.argv) > 2 else _DEFAULT_DATABASE
170
+
171
+ print(f"Testing filemaker_expert against cleaner DDR artifacts")
172
+ print(f"repo_root: {repo_root}")
173
+ print(f"database: {database}")
174
+
175
+ results = run_all_scenarios(repo_root=repo_root, database=database)
176
+ print_report(results)