PyPI - devflow-engine - Versions diffs - 1.0.0__py3-none-any.whl - Mend

devflow-engine 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (393) hide show

devflow_engine/__init__.py +3 -0
devflow_engine/agentic_prompts.py +100 -0
devflow_engine/agentic_runtime.py +398 -0
devflow_engine/api_key_flow_harness.py +539 -0
devflow_engine/api_keys.py +357 -0
devflow_engine/bootstrap/__init__.py +2 -0
devflow_engine/bootstrap/provision_from_template.py +84 -0
devflow_engine/cli/__init__.py +0 -0
devflow_engine/cli/app.py +7270 -0
devflow_engine/core/__init__.py +0 -0
devflow_engine/core/config.py +86 -0
devflow_engine/core/logging.py +29 -0
devflow_engine/core/paths.py +45 -0
devflow_engine/core/toml_kv.py +33 -0
devflow_engine/devflow_event_worker.py +1292 -0
devflow_engine/devflow_state.py +201 -0
devflow_engine/devin2/__init__.py +9 -0
devflow_engine/devin2/agent_definition.py +120 -0
devflow_engine/devin2/pi_runner.py +204 -0
devflow_engine/devin_orchestration.py +69 -0
devflow_engine/docs/prompts/anti-patterns.md +42 -0
devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
devflow_engine/doctor/__init__.py +2 -0
devflow_engine/doctor/triage.py +140 -0
devflow_engine/error/__init__.py +0 -0
devflow_engine/error/remediation.py +21 -0
devflow_engine/errors/error_solver_dag.py +522 -0
devflow_engine/errors/runtime_observability.py +67 -0
devflow_engine/idea/__init__.py +4 -0
devflow_engine/idea/actors.py +481 -0
devflow_engine/idea/agentic.py +465 -0
devflow_engine/idea/analyze.py +93 -0
devflow_engine/idea/devin_chat_dag.py +1 -0
devflow_engine/idea/diff.py +99 -0
devflow_engine/idea/drafts.py +446 -0
devflow_engine/idea/idea_creation_dag.py +643 -0
devflow_engine/idea/ideation_enrichment.py +355 -0
devflow_engine/idea/ideation_enrichment_worker.py +19 -0
devflow_engine/idea/paths.py +28 -0
devflow_engine/idea/promote.py +53 -0
devflow_engine/idea/redaction.py +27 -0
devflow_engine/idea/repo_tools.py +1277 -0
devflow_engine/idea/response_mode.py +30 -0
devflow_engine/idea/story_pipeline.py +1585 -0
devflow_engine/idea/sufficiency.py +376 -0
devflow_engine/idea/traditional_stories.py +1257 -0
devflow_engine/implementation/__init__.py +0 -0
devflow_engine/implementation/alembic_preflight.py +700 -0
devflow_engine/implementation/dag.py +8450 -0
devflow_engine/implementation/green_gate.py +93 -0
devflow_engine/implementation/prompts.py +108 -0
devflow_engine/implementation/test_runtime.py +623 -0
devflow_engine/integration/__init__.py +19 -0
devflow_engine/integration/agentic.py +66 -0
devflow_engine/integration/dag.py +3539 -0
devflow_engine/integration/prompts.py +114 -0
devflow_engine/integration/supabase_schema.sql +31 -0
devflow_engine/integration/supabase_sync.py +177 -0
devflow_engine/llm/__init__.py +1 -0
devflow_engine/llm/cli_one_shot.py +84 -0
devflow_engine/llm/cli_stream.py +371 -0
devflow_engine/llm/execution_context.py +26 -0
devflow_engine/llm/invoke.py +1322 -0
devflow_engine/llm/provider_api.py +304 -0
devflow_engine/llm/repo_knowledge.py +588 -0
devflow_engine/llm_primitives.py +315 -0
devflow_engine/orchestration.py +62 -0
devflow_engine/planning/__init__.py +0 -0
devflow_engine/planning/analyze_repo.py +92 -0
devflow_engine/planning/render_drafts.py +133 -0
devflow_engine/playground/__init__.py +0 -0
devflow_engine/playground/hooks.py +26 -0
devflow_engine/playwright_workflow/__init__.py +5 -0
devflow_engine/playwright_workflow/dag.py +1317 -0
devflow_engine/process/__init__.py +5 -0
devflow_engine/process/dag.py +59 -0
devflow_engine/project_registration/__init__.py +3 -0
devflow_engine/project_registration/dag.py +1581 -0
devflow_engine/project_registry.py +109 -0
devflow_engine/prompts/devin/generic/prompt.md +6 -0
devflow_engine/prompts/devin/ideation/prompt.md +263 -0
devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
devflow_engine/prompts/devin/insight/prompt.md +11 -0
devflow_engine/prompts/devin/insight/scenarios.md +5 -0
devflow_engine/prompts/devin/intake/prompt.md +15 -0
devflow_engine/prompts/devin/iterate/prompt.md +12 -0
devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
devflow_engine/prompts/devin/shared/principles.md +246 -0
devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
devflow_engine/prompts/implementation/red/prompt.md +27 -0
devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
devflow_engine/prompts/integration/README.md +185 -0
devflow_engine/prompts/integration/green/example.md +67 -0
devflow_engine/prompts/integration/green/green/prompt.md +10 -0
devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
devflow_engine/prompts/integration/green_enrich/example.md +79 -0
devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
devflow_engine/prompts/integration/red/example.md +152 -0
devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
devflow_engine/prompts/integration/red/red/prompt.md +11 -0
devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
devflow_engine/prompts/integration/red_review/example.md +71 -0
devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
devflow_engine/prompts/integration/resolve/example.md +111 -0
devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
devflow_engine/prompts/integration/validate/example.md +143 -0
devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
devflow_engine/prompts/integration/write_workflows/example.md +100 -0
devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
devflow_engine/prompts/iterate/README.md +7 -0
devflow_engine/prompts/iterate/coder/prompt.md +11 -0
devflow_engine/prompts/iterate/framer/prompt.md +11 -0
devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
devflow_engine/prompts/iterate/observer/prompt.md +11 -0
devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
devflow_engine/prompts/recovery/execution/prompt.md +8 -0
devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
devflow_engine/recovery/__init__.py +3 -0
devflow_engine/recovery/dag.py +2609 -0
devflow_engine/recovery/models.py +220 -0
devflow_engine/refactor.py +93 -0
devflow_engine/registry/__init__.py +1 -0
devflow_engine/registry/cards.py +238 -0
devflow_engine/registry/domain_normalize.py +60 -0
devflow_engine/registry/effects.py +65 -0
devflow_engine/registry/enforce_report.py +150 -0
devflow_engine/registry/module_cards_classify.py +164 -0
devflow_engine/registry/module_cards_draft.py +184 -0
devflow_engine/registry/module_cards_gate.py +59 -0
devflow_engine/registry/packages.py +347 -0
devflow_engine/registry/pathways.py +323 -0
devflow_engine/review/__init__.py +11 -0
devflow_engine/review/dag.py +588 -0
devflow_engine/review/review_story.py +67 -0
devflow_engine/scope_idea/__init__.py +3 -0
devflow_engine/scope_idea/agentic.py +39 -0
devflow_engine/scope_idea/dag.py +1069 -0
devflow_engine/scope_idea/models.py +175 -0
devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
devflow_engine/skills/registry.example.yaml +42 -0
devflow_engine/source_doc_assumptions.py +291 -0
devflow_engine/source_doc_mutation_dag.py +1606 -0
devflow_engine/source_doc_mutation_eval.py +417 -0
devflow_engine/source_doc_mutation_worker.py +25 -0
devflow_engine/source_docs_schema.py +207 -0
devflow_engine/source_docs_updater.py +309 -0
devflow_engine/source_scope/__init__.py +15 -0
devflow_engine/source_scope/agentic.py +45 -0
devflow_engine/source_scope/dag.py +1626 -0
devflow_engine/source_scope/models.py +177 -0
devflow_engine/stores/__init__.py +0 -0
devflow_engine/stores/execution_store.py +3534 -0
devflow_engine/story/__init__.py +0 -0
devflow_engine/story/contracts.py +160 -0
devflow_engine/story/discovery.py +47 -0
devflow_engine/story/evidence.py +118 -0
devflow_engine/story/hashing.py +27 -0
devflow_engine/story/implemented_queue_purge.py +148 -0
devflow_engine/story/indexer.py +105 -0
devflow_engine/story/io.py +20 -0
devflow_engine/story/markdown_contracts.py +298 -0
devflow_engine/story/reconciliation.py +408 -0
devflow_engine/story/validate_stories.py +149 -0
devflow_engine/story/validate_tests_story.py +512 -0
devflow_engine/story/validation.py +133 -0
devflow_engine/ui_grounding/__init__.py +11 -0
devflow_engine/ui_grounding/agentic.py +31 -0
devflow_engine/ui_grounding/dag.py +874 -0
devflow_engine/ui_grounding/models.py +224 -0
devflow_engine/ui_grounding/pencil_bridge.py +247 -0
devflow_engine/vendor/__init__.py +0 -0
devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
devflow_engine/worker.py +1086 -0
devflow_engine/worker_guard.py +233 -0
devflow_engine-1.0.0.dist-info/METADATA +235 -0
devflow_engine-1.0.0.dist-info/RECORD +393 -0
devflow_engine-1.0.0.dist-info/WHEEL +4 -0
devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
devin/__init__.py +6 -0
devin/dag.py +58 -0
devin/dag_two_arm.py +138 -0
devin/devin_chat_scenario_catalog.json +588 -0
devin/devin_eval.py +677 -0
devin/nodes/__init__.py +0 -0
devin/nodes/ideation/__init__.py +0 -0
devin/nodes/ideation/node.py +195 -0
devin/nodes/ideation/playground.py +267 -0
devin/nodes/ideation/prompt.md +65 -0
devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
devin/nodes/ideation/scenarios/vague_idea.py +16 -0
devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
devin/nodes/ideation/tools.json +312 -0
devin/nodes/insight/__init__.py +0 -0
devin/nodes/insight/node.py +49 -0
devin/nodes/insight/playground.py +154 -0
devin/nodes/insight/prompt.md +61 -0
devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
devin/nodes/insight/scenarios/operational_debugging.py +15 -0
devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
devin/nodes/insight/scenarios/operational_question.py +9 -0
devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
devin/nodes/insight/scenarios/queue_status.py +15 -0
devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
devin/nodes/insight/scenarios/worker_state_check.py +15 -0
devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
devin/nodes/insight/tools.json +126 -0
devin/nodes/intake/__init__.py +0 -0
devin/nodes/intake/node.py +27 -0
devin/nodes/intake/playground.py +47 -0
devin/nodes/intake/prompt.md +12 -0
devin/nodes/intake/scenarios/ideation_routing.py +4 -0
devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
devin/nodes/intake/scenarios/insight_routing.py +4 -0
devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
devin/nodes/iterate/README.md +44 -0
devin/nodes/iterate/__init__.py +1 -0
devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
devin/nodes/iterate/agent-roles.md +89 -0
devin/nodes/iterate/agents/README.md +10 -0
devin/nodes/iterate/artifacts.md +504 -0
devin/nodes/iterate/contract.md +100 -0
devin/nodes/iterate/eval-plan.md +74 -0
devin/nodes/iterate/node.py +100 -0
devin/nodes/iterate/pipeline/README.md +13 -0
devin/nodes/iterate/playground-contract.md +76 -0
devin/nodes/iterate/prompt.md +11 -0
devin/nodes/iterate/scenarios/README.md +38 -0
devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
devin/nodes/shared/__init__.py +0 -0
devin/nodes/shared/filemaker_expert.md +80 -0
devin/nodes/shared/filemaker_expert.py +354 -0
devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
devin/nodes/shared/helpers.py +156 -0
devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
devin/nodes/shared/models.py +44 -0
devin/nodes/shared/post.py +40 -0
devin/nodes/shared/router.py +107 -0
devin/nodes/shared/tools.py +191 -0
devin/shared/devin-chat-rubric.md +237 -0
devin/shared/devin-chat-scenario-suite.md +90 -0
devin/shared/eval_doctrine.md +9 -0

devin/nodes/iterate/scenarios/routing-matrix.md ADDED Viewed

@@ -0,0 +1,45 @@
+# Iterate routing scenario matrix
+## Purpose
+This file makes the lane boundary concrete. It should stay aligned with:
+- `pipeline/01-objectives-requirements.md` for route-fit rules
+- `pipeline/02-evals.md` for what routing mistakes must be catchable
+- the Iterator, Framer, Observer, Coder model, where Iterator owns final lane judgment and may re-route when better truth appears
+## Routing matrix
+| Scenario | Initial read | Why | Expected route |
+| --- | --- | --- | --- |
+| concrete_error_fix | User reports a specific failure on an existing surface and wants it fixed | bounded implementation ask with observable failure | `iterate` |
+| quick_behavior_change | User wants a small change to current behavior on an existing page, route, or component | targeted delta, not broad planning | `iterate` |
+| targeted_ui_improvement | User wants a narrow UX or UI improvement on an existing surface | bounded improvement with scoped success criteria | `iterate` |
+| explain_why_error_happens | User wants diagnosis or explanation only | read-only intent, no implementation requested | `insight` |
+| investigate_before_any_change | User asks for analysis first and does not yet want a fix | read-only investigation | `insight` |
+| broad_feature_request | Request introduces a new workflow, broader feature area, or story-scale planning need | no longer task-scale | `idea` |
+| task_grows_after_framing | Initial ask sounds small, but Framer discovers broader planning or architecture work is required | promote when better truth appears | promote to `idea` |
+## Notes on who owns the decision
+- intake may make the first route guess
+- `Framer` may surface evidence that the ask is broader or less concrete than it looked
+- `Observer` may surface evidence that the user actually wanted diagnosis only or that the task lacks observable truth
+- `Iterator` owns the final judgment on whether the work stays in iterate, reroutes to insight, or promotes to idea
+- when rerouting or promoting after iterate has started, Iterator should leave an iterate-owned `promotion_handoff.json` rather than making the lane transition visible only in response text
+The lane should optimize for truthful routing, not for preserving the initial choice.
+## Additional scenario guidance
+### Stay in iterate
+Stay in `iterate` when the work can still be described as a bounded delta on an existing surface, even if implementation is nontrivial.
+### Promote to idea
+Promote to `idea` when success now depends on choosing among broader product, workflow, or architecture options rather than delivering the originally bounded delta.
+### Route to insight
+Route to `insight` when the user wants explanation, diagnosis, or investigation and would reasonably be surprised if the system started changing code.
+## Review note
+If a reviewer cannot tell why a scenario belongs in `iterate`, `insight`, or `idea`, the upstream objectives are still too fuzzy.

devin/nodes/shared/__init__.py ADDED Viewed

File without changes

devin/nodes/shared/filemaker_expert.md ADDED Viewed

@@ -0,0 +1,80 @@
+# FileMaker Expert — advisor for Devin's insight agent
+# Pi-Pi pattern: Devin (primary) calls query_filemaker_expert → spawns PI subprocess
+#                 with this prompt → expert reads DDR artifacts → returns text response
+#                 → Devin synthesizes into answer
+---
+name: filemaker_expert
+description: >
+  FileMaker database expert — read-only advisor. Reads DDR analysis artifacts
+  and answers questions about database structure, layouts, scripts, entities,
+  user flows, and feature clusters. Does NOT run ddr-docs — only reads.
+tools: read,grep,find,ls,cat
+---
+You are a **FileMaker Database Expert** — a narrow, read-only advisor agent.
+Your job: receive a question about a FileMaker database, read the relevant DDR analysis artifacts, and return a thorough, grounded response.
+## Your domain
+DDR analysis artifacts live under:
+`{repo_root}/ai_docs/context/source_docs/ddr/{database_name}/`
+**Detailed JSON (use first for anything requiring precision):**
+- `03_scripts_detailed.json` — complete script steps with full calculations (use INSTEAD of the summary for script logic)
+- `01_schema_detailed.json` — full field definitions with calculations, auto-enter, validation
+- `05_layouts_detailed.json` — complete layout object inventory
+- `02_relationships_detailed.json` — full relationship graph with all join paths
+- `04_custom_functions_detailed.json` — custom functions with full formula text
+**Summary markdown (overviews only):**
+- `01_schema_summary.md` — table count and field type overview
+- `05_layouts_summary.md` — layout counts and object counts
+- `03_scripts_summary.md` — script/folder list (truncates all calculations at ~50 chars — do NOT rely on it for step details)
+- `02_relationships_summary.md` — relationship graph overview
+## Source priority
+**JSON is the source for details. Markdown summaries are for overviews only.**
+- For anything involving calculations, SQL queries, script step logic, field-level detail, or specific values: read from the detailed JSON files
+- Markdown summaries are useful for navigation and overview — they tell you what exists and the general structure
+- Detailed JSON files (`*_detailed.json`) have the complete untruncated content
+**Key detailed files:**
+- `03_scripts_detailed.json` — every script step with FULL calculation text, untruncated (vs. summary's 50-char cutoff)
+- `01_schema_detailed.json` — complete field definitions, calculations, auto-enter logic
+- `05_layouts_detailed.json` — full layout object inventory
+- `02_relationships_detailed.json` — complete relationship graph
+**For script analysis:** always start from `03_scripts_detailed.json`. The summary (`03_scripts_summary.md`) truncates every calculation to ~50 chars — `JSONGetElement(Get(ScriptResult);"qbID")` becomes `JSONGetElement(Get(ScriptResult);"q` — making it unreadable. The detailed JSON has the complete text.
+JSON in `analysis/` subdirectory:
+- `analysis/feature_clusters.json` — grouped capabilities with descriptions
+- `analysis/user_flows_entry_points.json` — entry point patterns
+- `analysis/cruft_summary.json` — code health (uncalled scripts, unreachable layouts)
+Context files in `ai_docs/context/source_docs/`:
+- `domain_entities.json`, `user_workflows.json`, `product_brief.json`
+## How to work
+1. Determine which database(s) the question is about
+2. Identify the DDR output directory for that database
+3. Use `read` / `cat` to read the relevant artifact files
+4. Synthesize a response with specific names, counts, and structural facts
+5. Return plain text — the primary agent will incorporate it
+## Answer style
+- Be concrete: "Clean Sweep has 455 layouts across 115 tables, including Account Page (654 objects), My Page/Cleaner dashboards, and Inspection Form III (225 objects)"
+- Use headers: ## Database Overview, ## Layouts, ## Scripts, ## Code Health
+- If the artifacts don't cover something, say "not in DDR analysis" and describe what's available
+- Do not speculate — if it's not in the files, say so
+## Constraints
+- Do NOT run ddr-docs or any analysis pipeline
+- Do NOT write or modify any files
+- Do not answer questions outside the FileMaker database domain
+- Do not use search tools unless needed to locate a specific artifact path

devin/nodes/shared/filemaker_expert.py ADDED Viewed

@@ -0,0 +1,354 @@
+"""FileMaker Expert advisor — read-only DDR analysis reader.
+Pi-Pi pattern: Devin (primary agent) calls FileMaker Expert as an advisor when it
+needs to answer FileMaker database questions. The expert reads pre-existing DDR
+analysis artifacts and returns a grounded, synthesized response.
+The expert does NOT re-run ddr-docs. Analysis is done once on import by the
+devflow_event_worker. The expert just reads what's already there.
+Location convention:
+  - Source XML: ai_docs/context/source_docs/<name>.xml
+  - DDR analysis:  ai_docs/context/source_docs/ddr/<name>/
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from devin.nodes.shared.tools import ToolResult
+@dataclass
+class DDRArtifacts:
+    """Paths to all available DDR analysis artifacts for one database."""
+    name: str
+    ddr_root: Path
+    @property
+    def analysis_dir(self) -> Path:
+        return self.ddr_root / "analysis"
+    def schema_summary_md(self) -> Path | None:
+        p = self.ddr_root / "01_schema_summary.md"
+        return p if p.exists() else None
+    def layouts_summary_md(self) -> Path | None:
+        p = self.ddr_root / "05_layouts_summary.md"
+        return p if p.exists() else None
+    def scripts_summary_md(self) -> Path | None:
+        p = self.ddr_root / "03_scripts_summary.md"
+        return p if p.exists() else None
+    def relationships_summary_md(self) -> Path | None:
+        p = self.ddr_root / "02_relationships_summary.md"
+        return p if p.exists() else None
+    def feature_clusters_json(self) -> Path | None:
+        p = self.analysis_dir / "feature_clusters.json"
+        return p if p.exists() else None
+    def entry_points_json(self) -> Path | None:
+        p = self.analysis_dir / "user_flows_entry_points.json"
+        return p if p.exists() else None
+    def cruft_summary_json(self) -> Path | None:
+        p = self.analysis_dir / "cruft_summary.json"
+        return p if p.exists() else None
+    def domain_entities_json(self) -> Path | None:
+        p = self.ddr_root.parent / "domain_entities.json"
+        return p if p.exists() else None
+    def user_workflows_json(self) -> Path | None:
+        p = self.ddr_root.parent / "user_workflows.json"
+        return p if p.exists() else None
+    def product_brief_json(self) -> Path | None:
+        p = self.ddr_root.parent / "product_brief.json"
+        return p if p.exists() else None
+def _read_text(path: Path | None, max_lines: int = 80) -> str:
+    if not path or not path.exists():
+        return "(not available)"
+    try:
+        lines = path.read_text(encoding="utf-8", errors="replace").splitlines()
+        return "\n".join(lines[:max_lines])
+    except Exception:
+        return "(read error)"
+def _read_json(path: Path | None) -> dict:
+    if not path or not path.exists():
+        return {}
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except Exception:
+        return {}
+def _summarize(md_path: Path | None, prefix: str) -> str:
+    """Extract the Total X count from a summary markdown header."""
+    if not md_path or not md_path.exists():
+        return "unknown"
+    try:
+        for line in md_path.read_text(encoding="utf-8", errors="replace").splitlines():
+            if prefix.lower() in line.lower():
+                parts = line.rsplit(":", 1)
+                if len(parts) == 2:
+                    return parts[-1].strip()
+    except Exception:
+        pass
+    return "unknown"
+def _synthesize_report(
+    artifacts: DDRArtifacts,
+    question: str,
+    include_summaries: bool = True,
+    include_workflows: bool = True,
+) -> str:
+    """Read all DDR artifacts and produce a synthesized expert report."""
+    sections: list[str] = []
+    # Database overview
+    schema_md = artifacts.schema_summary_md()
+    layouts_md = artifacts.layouts_summary_md()
+    scripts_md = artifacts.scripts_summary_md()
+    rels_md = artifacts.relationships_summary_md()
+    table_count = _summarize(schema_md, "Total Tables")
+    layout_count = _summarize(layouts_md, "Total Layouts")
+    script_count = _summarize(scripts_md, "Total Scripts")
+    sections.append(f"# FileMaker Expert Report: {artifacts.name}")
+    sections.append("")
+    sections.append("## Database Overview")
+    sections.append(f"- Tables: {table_count}")
+    sections.append(f"- Layouts: {layout_count}")
+    sections.append(f"- Scripts: {script_count}")
+    sections.append("")
+    # Schema excerpt
+    if include_summaries and schema_md:
+        sections.append("## Schema (excerpt)")
+        sections.append(_read_text(schema_md, max_lines=40))
+        sections.append("")
+    # Layouts excerpt
+    if include_summaries and layouts_md:
+        sections.append("## Layouts (excerpt)")
+        sections.append(_read_text(layouts_md, max_lines=50))
+        sections.append("")
+    # Scripts excerpt
+    if include_summaries and scripts_md:
+        sections.append("## Scripts (excerpt)")
+        sections.append(_read_text(scripts_md, max_lines=40))
+        sections.append("")
+    # Relationships
+    if include_summaries and rels_md:
+        sections.append("## Relationships (excerpt)")
+        sections.append(_read_text(rels_md, max_lines=30))
+        sections.append("")
+    # Feature clusters
+    fc_json = artifacts.feature_clusters_json()
+    if fc_json:
+        clusters = _read_json(fc_json)
+        cluster_list = clusters.get("clusters", [])
+        sections.append(f"## Feature Clusters ({len(cluster_list)} total)")
+        for c in cluster_list[:15]:
+            name = c.get("name", "unknown")
+            desc = c.get("description", "")[:100]
+            sections.append(f"- **{name}**: {desc}")
+        sections.append("")
+    # Entry points
+    ep_json = artifacts.entry_points_json()
+    if ep_json:
+        eps = _read_json(ep_json)
+        summary = eps.get("summary", {})
+        total = summary.get("total_entry_points", 0)
+        sections.append(f"## Entry Points (total: {total})")
+        entry_pts = eps.get("entry_points", {})
+        for key, val in entry_pts.items():
+            count = val.get("count", 0) if isinstance(val, dict) else 0
+            if count > 0:
+                sections.append(f"- {key}: {count}")
+        sections.append("")
+    # Cruft / code health
+    cruft_json = artifacts.cruft_summary_json()
+    if cruft_json:
+        cruft = _read_json(cruft_json)
+        problems: list[str] = []
+        for key in ("uncalled_scripts", "unreachable_layouts", "unreferenced_fields", "uncalled_functions"):
+            entry = cruft.get(key, {})
+            count = entry.get("count", 0) if isinstance(entry, dict) else 0
+            if count > 0:
+                problems.append(f"- {key}: {count}")
+        if problems:
+            sections.append("## Code Health (cruft detected)")
+            sections.extend(problems)
+            sections.append("")
+    # Domain entities
+    if include_workflows:
+        de_json = artifacts.domain_entities_json()
+        if de_json:
+            de = _read_json(de_json)
+            entities = de.get("entities", de.get("domain_entities", []))
+            if entities:
+                sections.append(f"## Domain Entities ({len(entities)} defined)")
+                for e in entities[:20]:
+                    name = e.get("name", "unknown")
+                    etype = e.get("type", "")
+                    fields = e.get("fields", [])
+                    sections.append(f"- **{name}** ({etype}, {len(fields)} fields)")
+                sections.append("")
+    # User workflows
+    uw_json = artifacts.user_workflows_json()
+    if include_workflows and uw_json:
+        uw = _read_json(uw_json)
+        workflows = uw.get("workflows", uw.get("user_workflows", []))
+        if workflows:
+            sections.append(f"## User Workflows ({len(workflows)} defined)")
+            for w in workflows[:15]:
+                name = w.get("name", "unknown")
+                steps = w.get("steps", w.get("stages", []))
+                sections.append(f"- **{name}**: {len(steps)} steps/stages")
+            sections.append("")
+    # Product brief
+    pb_json = artifacts.product_brief_json()
+    if include_workflows and pb_json:
+        pb = _read_json(pb_json)
+        brief_text = pb.get("brief", pb.get("description", pb.get("product_brief", "")))
+        if brief_text and len(brief_text) > 20:
+            sections.append("## Product Brief")
+            sections.append(str(brief_text)[:500])
+            sections.append("")
+    # Question answered
+    sections.append(f"---\n**Question:** {question}")
+    return "\n".join(sections)
+def filemaker_expert(
+    *,
+    question: str,
+    repo_root: Path,
+    include_summaries: bool = True,
+    include_workflows: bool = True,
+    database_name: str | None = None,
+) -> ToolResult:
+    """FileMaker Expert advisor — read-only DDR analysis reader.
+    Devin (primary agent) calls this to get a grounded FileMaker database report.
+    The expert reads pre-existing DDR analysis artifacts from:
+      ai_docs/context/source_docs/ddr/<database_name>/
+    It does NOT re-run ddr-docs. Analysis was done on import by devflow_event_worker.
+    Args:
+        question:       The question about the FileMaker database
+        repo_root:      Project repo root (e.g. /Users/devflow/repos/cleaner)
+        include_summaries: Include schema/layouts/scripts/relationships excerpts (default: true)
+        include_workflows: Include domain entities, user workflows, product brief (default: true)
+        database_name:  Specific database to focus on (derived from XML filename, e.g. "Clean Sweep_fmp12").
+                       If not provided, reports on all databases found.
+    """
+    try:
+        source_docs = repo_root / "ai_docs" / "context" / "source_docs"
+        ddr_base = source_docs / "ddr"
+        if not ddr_base.exists():
+            return ToolResult(
+                ok=False,
+                tool_name="FileMakerExpert",
+                output={},
+                error=f"DDR analysis directory not found: {ddr_base}. "
+                      "DDR analysis runs automatically on FileMaker file import — "
+                      "if no analysis exists yet, the files were imported but analysis may have failed.",
+            )
+        # Find all DDR outputs
+        ddr_dirs = sorted(ddr_base.iterdir())
+        if not ddr_dirs:
+            return ToolResult(
+                ok=False,
+                tool_name="FileMakerExpert",
+                output={},
+                error=f"No DDR analysis results in {ddr_base}",
+            )
+        # Filter to specific database if named
+        if database_name:
+            ddr_dirs = [d for d in ddr_dirs if d.name == database_name]
+            if not ddr_dirs:
+                available = [d.name for d in sorted(ddr_base.iterdir())]
+                return ToolResult(
+                    ok=False,
+                    tool_name="FileMakerExpert",
+                    output={},
+                    error=f"Database '{database_name}' not found. Available: {available}",
+                )
+        databases: dict[str, dict] = {}
+        reports: list[str] = []
+        for ddr_dir in ddr_dirs:
+            artifacts = DDRArtifacts(name=ddr_dir.name, ddr_root=ddr_dir)
+            schema_md = artifacts.schema_summary_md()
+            layouts_md = artifacts.layouts_summary_md()
+            scripts_md = artifacts.scripts_summary_md()
+            table_count = _summarize(schema_md, "Total Tables")
+            layout_count = _summarize(layouts_md, "Total Layouts")
+            script_count = _summarize(scripts_md, "Total Scripts")
+            report = _synthesize_report(
+                artifacts,
+                question=question,
+                include_summaries=include_summaries,
+                include_workflows=include_workflows,
+            )
+            reports.append(report)
+            databases[ddr_dir.name] = {
+                "table_count": table_count,
+                "layout_count": layout_count,
+                "script_count": script_count,
+                "ddr_root": str(ddr_dir.relative_to(repo_root)),
+                "has_schema": bool(schema_md),
+                "has_layouts": bool(layouts_md),
+                "has_scripts": bool(scripts_md),
+                "has_cruft": bool(artifacts.cruft_summary_json()),
+                "has_feature_clusters": bool(artifacts.feature_clusters_json()),
+            }
+        combined = "\n\n---\n\n".join(reports) if reports else "No analysis results found."
+        return ToolResult(
+            ok=True,
+            tool_name="FileMakerExpert",
+            output={
+                "question": question,
+                "databases": databases,
+                "expert_report": combined,
+                "database_count": len(databases),
+                "database_names": list(databases.keys()),
+            },
+        )
+    except Exception as e:
+        return ToolResult(ok=False, tool_name="FileMakerExpert", output={}, error=str(e))

devin/nodes/shared/filemaker_expert_eval/runner.py ADDED Viewed

@@ -0,0 +1,176 @@
+"""Eval harness for FileMaker expert advisor.
+Runs the filemaker_expert through the PI subprocess (matching devflow-tools.ts
+queryFilemakerExpert pattern) against scenario fixtures and scores the output.
+Usage:
+    python3 -m devin.nodes.shared.filemaker_expert_eval.runner
+"""
+from __future__ import annotations
+import json
+import re
+import subprocess
+import time
+from dataclasses import dataclass
+from pathlib import Path
+# _PROJECT_ROOT points to the devflow_engine repo root
+_PROJECT_ROOT = Path(__file__).resolve().parents[5]  # devflow_engine/
+_AGENT_FILE = ".pi/agents/filemaker-expert/filemaker-expert.md"
+# Default test project
+_DEFAULT_REPO_ROOT = "/Users/devflow/repos/cleaner"
+_DEFAULT_DATABASE = "Clean Sweep_fmp12"
+_DEFAULT_MODEL = "minimax/MiniMax-M2.7"
+@dataclass
+class ScenarioResult:
+    scenario_id: str
+    passed: bool
+    score: int
+    max_score: int
+    output: str
+    details: dict[str, int]
+    elapsed: float = 0.0
+def load_expert_prompt() -> str:
+    """Extract system prompt from expert definition file."""
+    content = _PROJECT_ROOT.joinpath(_AGENT_FILE).read_text()
+    match = re.match(r"^---\n[\s\S]*?\n---\n([\s\S]*)$", content, re.M)
+    return match.group(1).strip() if match else content
+def run_filemaker_expert(
+    question: str,
+    repo_root: str = _DEFAULT_REPO_ROOT,
+    database: str = _DEFAULT_DATABASE,
+    model: str = _DEFAULT_MODEL,
+    timeout: int = 90,
+) -> tuple[str, float]:
+    """Run the filemaker expert via PI subprocess and return (output, elapsed)."""
+    system_prompt = load_expert_prompt()
+    context_block = (
+        f"Context: repo_root={repo_root}, database={database}\n\n"
+        f"Question: {question}"
+    )
+    args = [
+        "pi",
+        "--mode", "text",
+        "--no-session",
+        "--no-extensions",
+        "--model", model,
+        "--tools", "read,grep,find,ls",
+        "--thinking", "off",
+        "--append-system-prompt", system_prompt + "\n\n" + context_block,
+        question,
+    ]
+    start = time.time()
+    proc = subprocess.Popen(
+        args,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        cwd=str(_PROJECT_ROOT),
+    )
+    stdout, _ = proc.communicate(timeout=timeout)
+    elapsed = time.time() - start
+    return stdout, elapsed
+def score_scenario(output: str, scenario: dict) -> ScenarioResult:
+    """Score an expert output against a scenario's expected keys and structure."""
+    score = 0
+    max_score = sum(scenario["scoring"].values())
+    details = {}
+    for key, weight in scenario["scoring"].items():
+        # Check for expected keys in output
+        key_lower = key.lower().replace("_", " ")
+        if any(k in output for k in scenario["expected_keys"]):
+            details[key] = weight
+            score += weight
+        else:
+            details[key] = 0
+    return ScenarioResult(
+        scenario_id=scenario["id"],
+        passed=score == max_score,
+        score=score,
+        max_score=max_score,
+        output=output[:500],
+        details=details,
+    )
+def run_all_scenarios(
+    scenarios_path: str | Path | None = None,
+    repo_root: str = _DEFAULT_REPO_ROOT,
+    database: str = _DEFAULT_DATABASE,
+) -> list[ScenarioResult]:
+    """Run all scenarios and return results."""
+    if scenarios_path is None:
+        scenarios_path = Path(__file__).parent / "scenarios.json"
+    scenarios = json.loads(Path(scenarios_path).read_text())
+    results = []
+    for scenario in scenarios:
+        print(f"\nRunning: {scenario['id']}")
+        output, elapsed = run_filemaker_expert(
+            question=scenario["question"],
+            repo_root=repo_root,
+            database=database,
+        )
+        result = score_scenario(output, scenario)
+        result.elapsed = elapsed
+        results.append(result)
+        print(f"  Score: {result.score}/{result.max_score} ({elapsed:.1f}s)")
+        if not result.passed:
+            print(f"  Missing: {[k for k, v in result.details.items() if v == 0]}")
+    return results
+def print_report(results: list[ScenarioResult]) -> None:
+    """Print a formatted report of scenario results."""
+    total = sum(r.score for r in results)
+    max_total = sum(r.max_score for r in results)
+    print(f"\n{'='*60}")
+    print(f"FILEMAKER EXPERT EVAL REPORT")
+    print(f"{'='*60}")
+    for r in results:
+        status = "✓ PASS" if r.passed else "✗ FAIL"
+        print(f"\n{status} {r.scenario_id}  {r.score}/{r.max_score}")
+        for key, val in r.details.items():
+            icon = "✓" if val > 0 else "✗"
+            print(f"    {icon} {key}: {val}")
+    print(f"\n{'='*60}")
+    print(f"TOTAL: {total}/{max_total}  ({100*total/max_total:.0f}%)")
+    print(f"{'='*60}")
+if __name__ == "__main__":
+    import sys
+    repo_root = sys.argv[1] if len(sys.argv) > 1 else _DEFAULT_REPO_ROOT
+    database = sys.argv[2] if len(sys.argv) > 2 else _DEFAULT_DATABASE
+    print(f"Testing filemaker_expert against cleaner DDR artifacts")
+    print(f"repo_root: {repo_root}")
+    print(f"database: {database}")
+    results = run_all_scenarios(repo_root=repo_root, database=database)
+    print_report(results)