PyPI - devflow-engine - Versions diffs - 1.0.0__py3-none-any.whl - Mend

devflow-engine 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (393) hide show

devflow_engine/__init__.py +3 -0
devflow_engine/agentic_prompts.py +100 -0
devflow_engine/agentic_runtime.py +398 -0
devflow_engine/api_key_flow_harness.py +539 -0
devflow_engine/api_keys.py +357 -0
devflow_engine/bootstrap/__init__.py +2 -0
devflow_engine/bootstrap/provision_from_template.py +84 -0
devflow_engine/cli/__init__.py +0 -0
devflow_engine/cli/app.py +7270 -0
devflow_engine/core/__init__.py +0 -0
devflow_engine/core/config.py +86 -0
devflow_engine/core/logging.py +29 -0
devflow_engine/core/paths.py +45 -0
devflow_engine/core/toml_kv.py +33 -0
devflow_engine/devflow_event_worker.py +1292 -0
devflow_engine/devflow_state.py +201 -0
devflow_engine/devin2/__init__.py +9 -0
devflow_engine/devin2/agent_definition.py +120 -0
devflow_engine/devin2/pi_runner.py +204 -0
devflow_engine/devin_orchestration.py +69 -0
devflow_engine/docs/prompts/anti-patterns.md +42 -0
devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
devflow_engine/doctor/__init__.py +2 -0
devflow_engine/doctor/triage.py +140 -0
devflow_engine/error/__init__.py +0 -0
devflow_engine/error/remediation.py +21 -0
devflow_engine/errors/error_solver_dag.py +522 -0
devflow_engine/errors/runtime_observability.py +67 -0
devflow_engine/idea/__init__.py +4 -0
devflow_engine/idea/actors.py +481 -0
devflow_engine/idea/agentic.py +465 -0
devflow_engine/idea/analyze.py +93 -0
devflow_engine/idea/devin_chat_dag.py +1 -0
devflow_engine/idea/diff.py +99 -0
devflow_engine/idea/drafts.py +446 -0
devflow_engine/idea/idea_creation_dag.py +643 -0
devflow_engine/idea/ideation_enrichment.py +355 -0
devflow_engine/idea/ideation_enrichment_worker.py +19 -0
devflow_engine/idea/paths.py +28 -0
devflow_engine/idea/promote.py +53 -0
devflow_engine/idea/redaction.py +27 -0
devflow_engine/idea/repo_tools.py +1277 -0
devflow_engine/idea/response_mode.py +30 -0
devflow_engine/idea/story_pipeline.py +1585 -0
devflow_engine/idea/sufficiency.py +376 -0
devflow_engine/idea/traditional_stories.py +1257 -0
devflow_engine/implementation/__init__.py +0 -0
devflow_engine/implementation/alembic_preflight.py +700 -0
devflow_engine/implementation/dag.py +8450 -0
devflow_engine/implementation/green_gate.py +93 -0
devflow_engine/implementation/prompts.py +108 -0
devflow_engine/implementation/test_runtime.py +623 -0
devflow_engine/integration/__init__.py +19 -0
devflow_engine/integration/agentic.py +66 -0
devflow_engine/integration/dag.py +3539 -0
devflow_engine/integration/prompts.py +114 -0
devflow_engine/integration/supabase_schema.sql +31 -0
devflow_engine/integration/supabase_sync.py +177 -0
devflow_engine/llm/__init__.py +1 -0
devflow_engine/llm/cli_one_shot.py +84 -0
devflow_engine/llm/cli_stream.py +371 -0
devflow_engine/llm/execution_context.py +26 -0
devflow_engine/llm/invoke.py +1322 -0
devflow_engine/llm/provider_api.py +304 -0
devflow_engine/llm/repo_knowledge.py +588 -0
devflow_engine/llm_primitives.py +315 -0
devflow_engine/orchestration.py +62 -0
devflow_engine/planning/__init__.py +0 -0
devflow_engine/planning/analyze_repo.py +92 -0
devflow_engine/planning/render_drafts.py +133 -0
devflow_engine/playground/__init__.py +0 -0
devflow_engine/playground/hooks.py +26 -0
devflow_engine/playwright_workflow/__init__.py +5 -0
devflow_engine/playwright_workflow/dag.py +1317 -0
devflow_engine/process/__init__.py +5 -0
devflow_engine/process/dag.py +59 -0
devflow_engine/project_registration/__init__.py +3 -0
devflow_engine/project_registration/dag.py +1581 -0
devflow_engine/project_registry.py +109 -0
devflow_engine/prompts/devin/generic/prompt.md +6 -0
devflow_engine/prompts/devin/ideation/prompt.md +263 -0
devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
devflow_engine/prompts/devin/insight/prompt.md +11 -0
devflow_engine/prompts/devin/insight/scenarios.md +5 -0
devflow_engine/prompts/devin/intake/prompt.md +15 -0
devflow_engine/prompts/devin/iterate/prompt.md +12 -0
devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
devflow_engine/prompts/devin/shared/principles.md +246 -0
devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
devflow_engine/prompts/implementation/red/prompt.md +27 -0
devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
devflow_engine/prompts/integration/README.md +185 -0
devflow_engine/prompts/integration/green/example.md +67 -0
devflow_engine/prompts/integration/green/green/prompt.md +10 -0
devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
devflow_engine/prompts/integration/green_enrich/example.md +79 -0
devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
devflow_engine/prompts/integration/red/example.md +152 -0
devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
devflow_engine/prompts/integration/red/red/prompt.md +11 -0
devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
devflow_engine/prompts/integration/red_review/example.md +71 -0
devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
devflow_engine/prompts/integration/resolve/example.md +111 -0
devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
devflow_engine/prompts/integration/validate/example.md +143 -0
devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
devflow_engine/prompts/integration/write_workflows/example.md +100 -0
devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
devflow_engine/prompts/iterate/README.md +7 -0
devflow_engine/prompts/iterate/coder/prompt.md +11 -0
devflow_engine/prompts/iterate/framer/prompt.md +11 -0
devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
devflow_engine/prompts/iterate/observer/prompt.md +11 -0
devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
devflow_engine/prompts/recovery/execution/prompt.md +8 -0
devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
devflow_engine/recovery/__init__.py +3 -0
devflow_engine/recovery/dag.py +2609 -0
devflow_engine/recovery/models.py +220 -0
devflow_engine/refactor.py +93 -0
devflow_engine/registry/__init__.py +1 -0
devflow_engine/registry/cards.py +238 -0
devflow_engine/registry/domain_normalize.py +60 -0
devflow_engine/registry/effects.py +65 -0
devflow_engine/registry/enforce_report.py +150 -0
devflow_engine/registry/module_cards_classify.py +164 -0
devflow_engine/registry/module_cards_draft.py +184 -0
devflow_engine/registry/module_cards_gate.py +59 -0
devflow_engine/registry/packages.py +347 -0
devflow_engine/registry/pathways.py +323 -0
devflow_engine/review/__init__.py +11 -0
devflow_engine/review/dag.py +588 -0
devflow_engine/review/review_story.py +67 -0
devflow_engine/scope_idea/__init__.py +3 -0
devflow_engine/scope_idea/agentic.py +39 -0
devflow_engine/scope_idea/dag.py +1069 -0
devflow_engine/scope_idea/models.py +175 -0
devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
devflow_engine/skills/registry.example.yaml +42 -0
devflow_engine/source_doc_assumptions.py +291 -0
devflow_engine/source_doc_mutation_dag.py +1606 -0
devflow_engine/source_doc_mutation_eval.py +417 -0
devflow_engine/source_doc_mutation_worker.py +25 -0
devflow_engine/source_docs_schema.py +207 -0
devflow_engine/source_docs_updater.py +309 -0
devflow_engine/source_scope/__init__.py +15 -0
devflow_engine/source_scope/agentic.py +45 -0
devflow_engine/source_scope/dag.py +1626 -0
devflow_engine/source_scope/models.py +177 -0
devflow_engine/stores/__init__.py +0 -0
devflow_engine/stores/execution_store.py +3534 -0
devflow_engine/story/__init__.py +0 -0
devflow_engine/story/contracts.py +160 -0
devflow_engine/story/discovery.py +47 -0
devflow_engine/story/evidence.py +118 -0
devflow_engine/story/hashing.py +27 -0
devflow_engine/story/implemented_queue_purge.py +148 -0
devflow_engine/story/indexer.py +105 -0
devflow_engine/story/io.py +20 -0
devflow_engine/story/markdown_contracts.py +298 -0
devflow_engine/story/reconciliation.py +408 -0
devflow_engine/story/validate_stories.py +149 -0
devflow_engine/story/validate_tests_story.py +512 -0
devflow_engine/story/validation.py +133 -0
devflow_engine/ui_grounding/__init__.py +11 -0
devflow_engine/ui_grounding/agentic.py +31 -0
devflow_engine/ui_grounding/dag.py +874 -0
devflow_engine/ui_grounding/models.py +224 -0
devflow_engine/ui_grounding/pencil_bridge.py +247 -0
devflow_engine/vendor/__init__.py +0 -0
devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
devflow_engine/worker.py +1086 -0
devflow_engine/worker_guard.py +233 -0
devflow_engine-1.0.0.dist-info/METADATA +235 -0
devflow_engine-1.0.0.dist-info/RECORD +393 -0
devflow_engine-1.0.0.dist-info/WHEEL +4 -0
devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
devin/__init__.py +6 -0
devin/dag.py +58 -0
devin/dag_two_arm.py +138 -0
devin/devin_chat_scenario_catalog.json +588 -0
devin/devin_eval.py +677 -0
devin/nodes/__init__.py +0 -0
devin/nodes/ideation/__init__.py +0 -0
devin/nodes/ideation/node.py +195 -0
devin/nodes/ideation/playground.py +267 -0
devin/nodes/ideation/prompt.md +65 -0
devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
devin/nodes/ideation/scenarios/vague_idea.py +16 -0
devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
devin/nodes/ideation/tools.json +312 -0
devin/nodes/insight/__init__.py +0 -0
devin/nodes/insight/node.py +49 -0
devin/nodes/insight/playground.py +154 -0
devin/nodes/insight/prompt.md +61 -0
devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
devin/nodes/insight/scenarios/operational_debugging.py +15 -0
devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
devin/nodes/insight/scenarios/operational_question.py +9 -0
devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
devin/nodes/insight/scenarios/queue_status.py +15 -0
devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
devin/nodes/insight/scenarios/worker_state_check.py +15 -0
devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
devin/nodes/insight/tools.json +126 -0
devin/nodes/intake/__init__.py +0 -0
devin/nodes/intake/node.py +27 -0
devin/nodes/intake/playground.py +47 -0
devin/nodes/intake/prompt.md +12 -0
devin/nodes/intake/scenarios/ideation_routing.py +4 -0
devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
devin/nodes/intake/scenarios/insight_routing.py +4 -0
devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
devin/nodes/iterate/README.md +44 -0
devin/nodes/iterate/__init__.py +1 -0
devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
devin/nodes/iterate/agent-roles.md +89 -0
devin/nodes/iterate/agents/README.md +10 -0
devin/nodes/iterate/artifacts.md +504 -0
devin/nodes/iterate/contract.md +100 -0
devin/nodes/iterate/eval-plan.md +74 -0
devin/nodes/iterate/node.py +100 -0
devin/nodes/iterate/pipeline/README.md +13 -0
devin/nodes/iterate/playground-contract.md +76 -0
devin/nodes/iterate/prompt.md +11 -0
devin/nodes/iterate/scenarios/README.md +38 -0
devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
devin/nodes/shared/__init__.py +0 -0
devin/nodes/shared/filemaker_expert.md +80 -0
devin/nodes/shared/filemaker_expert.py +354 -0
devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
devin/nodes/shared/helpers.py +156 -0
devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
devin/nodes/shared/models.py +44 -0
devin/nodes/shared/post.py +40 -0
devin/nodes/shared/router.py +107 -0
devin/nodes/shared/tools.py +191 -0
devin/shared/devin-chat-rubric.md +237 -0
devin/shared/devin-chat-scenario-suite.md +90 -0
devin/shared/eval_doctrine.md +9 -0

devin/nodes/ideation/node.py ADDED Viewed

@@ -0,0 +1,195 @@
+"""IdeationAgent — active planning arm of Devin chat DAG."""
+from __future__ import annotations
+from pathlib import Path
+from devflow_engine.devin2.pi_runner import run_devin2_pi_agent
+from devflow_engine.idea.sufficiency import extract_sufficient_idea
+from devflow_engine.vendor.datalumina_genai.core.nodes.base import Node
+from devflow_engine.vendor.datalumina_genai.core.task import TaskContext
+from devin.nodes.shared.helpers import (
+    dfs_node_running,
+    load_node_prompt_lines,
+    pipeline_root,
+    resolve_project_id,
+    store_run,
+    write_json,
+)
+from devin.nodes.shared.models import DevinAgentResponse
+def _prior_messages_as_text(prior_messages: list) -> str:
+    """Format prior conversation messages into a readable text for sufficiency extraction."""
+    if not prior_messages:
+        return ''
+    lines = []
+    for msg in prior_messages:
+        role = msg.get('from') or msg.get('role', 'unknown')
+        content = msg.get('text') or msg.get('content', '')
+        if isinstance(content, list):
+            content = ' '.join(c.get('text', '') for c in content if isinstance(c, dict))
+        lines.append(f'{role}: {content}')
+    return '\n'.join(lines)
+def _build_context_for_ideation(input_payload: dict, project_id: str) -> dict:
+    """Build context_payload for the IdeationAgent from scenario input.
+    In real multi-turn usage, prior_messages carry context from earlier turns.
+    When _precreate_artifact is set, an idea artifact already exists and the
+    agent should commit it when the user says "create it" / "go ahead" etc.
+    """
+    sufficient_idea = input_payload.get('_sufficiency') or {}
+    has_contract = bool(sufficient_idea) or {'problem', 'target_users', 'user_outcomes', 'scope'}.issubset(
+        set(sufficient_idea.keys())
+    )
+    expected_status = 'ready_for_downstream' if has_contract else 'ideation_contract_response'
+    return {
+        'expected_status': expected_status,
+        'precreated_artifact': input_payload.get('_precreate_artifact', False),
+        'prior_messages': input_payload.get('prior_messages', []),
+    }
+class IdeationAgentNode(Node):
+    async def process(self, task_context: TaskContext) -> TaskContext:
+        event = task_context.event
+        repo_root = Path(event.repo_root)
+        store, run_id = store_run()
+        node_exec_id = store.create_node_attempt(
+            run_id=run_id, node_id='ideation_agent', node_name='IdeationAgent', attempt=1
+        )
+        project_id = str(
+            task_context.metadata.get('project_id')
+            or resolve_project_id(repo_root, idea_id=event.idea_id)
+        )
+        dfs_node_running(
+            project_id=project_id,
+            run_id=run_id,
+            node_id='ideation_agent',
+            summary='Running Devin ideation agent',
+            idea_id=event.idea_id,
+        )
+        # Build sufficient_idea from current turn AND prior_messages so multi-turn context is available
+        prior_messages = task_context.metadata.get('prior_messages') or []
+        prior_text = _prior_messages_as_text(prior_messages)
+        raw_text = str(task_context.metadata.get('raw_text') or event.raw_text or '')
+        sufficient_idea = extract_sufficient_idea(raw_text, prior_text=prior_text if prior_text else None)
+        # Check if idea artifact already exists (persisted in prior turn via devflow_init_idea).
+        # If it exists, the agent can commit it regardless of text-extraction completeness —
+        # the artifact IS the contract. Load it so we can inject its content into the context.
+        idea_json_path = repo_root / '.devflow' / 'ideas' / event.idea_id / 'idea.json'
+        idea_exists = idea_json_path.exists()
+        persisted_idea: dict[str, Any] = {}
+        if idea_exists:
+            try:
+                import json
+                persisted_idea = json.loads(idea_json_path.read_text(encoding='utf-8'))
+            except Exception:
+                pass
+        # Determine if we have enough to commit
+        has_contract = {'problem', 'target_users', 'user_outcomes', 'scope'}.issubset(
+            set(sufficient_idea.keys())
+        )
+        if idea_exists:
+            has_contract = True
+        expected_status = 'ready_for_downstream' if has_contract else 'ideation_contract_response'
+        # Build session_id for emit tools
+        session_id = f"idea:{project_id}:{event.idea_id}"
+        # Load node prompt from prompt.md (matches Insight pattern)
+        prompt_lines = load_node_prompt_lines(__file__)
+        # Turn-specific operational guidance (kept minimal in node.py; rest is in prompt.md)
+        guidance = prompt_lines + [
+            f"Return response_kind='{expected_status}'.",
+            'Treat inferred details as provisional when not explicitly provided.',
+            'Keep momentum — do not re-ask answered questions.',
+        ]
+        context_payload = {
+            'idea_id': event.idea_id,
+            'current_user_message': raw_text,
+            'route': task_context.metadata.get('route') or {},
+            'expected_status': expected_status,
+            'project_id': project_id,
+            'repo_root': str(repo_root),
+            'session_id': session_id,
+            # Additional context from scenario input (e.g., pre-fetched insight for eval)
+            'insight_context': task_context.metadata.get('insight_context') or '',
+            # When idea artifact already exists, inject its actual content so the agent
+            # operates on the real persisted idea, not garbled text-extracted fragments
+            'persisted_idea': persisted_idea if persisted_idea else None,
+            'idea_artifact_exists': idea_exists,
+            'idea_artifact_path': str(idea_json_path) if idea_exists else '',
+        }
+        result = run_devin2_pi_agent(
+            repo_root=repo_root,
+            stage_name='devin_ideation_response',
+            route_arm='ideation',
+            context_payload=context_payload,
+            operational_guidance=guidance,
+            output_model=DevinAgentResponse,
+            timeout_seconds=180,
+        )
+        model = DevinAgentResponse.model_validate(result.response_model.model_dump())
+        invocation_log_path = str(result.invocation.log_path) if result.invocation.log_path else None
+        # Re-extract sufficient_idea from the agent's response so fabricated content appears in output
+        fabricated_idea = extract_sufficient_idea(model.response_message or '', prior_text=None)
+        # Merge: use agent-fabricated fields, fall back to pre-computed input-based extraction
+        merged_sufficient = {**sufficient_idea}
+        for key in ['problem', 'target_users', 'user_outcomes', 'scope', 'assumptions']:
+            if fabricated_idea.get(key):
+                merged_sufficient[key] = fabricated_idea[key]
+        # Build response payload with idea artifact state
+        idea_json_path = repo_root / '.devflow' / 'ideas' / event.idea_id / 'idea.json'
+        idea_exists = idea_json_path.exists()
+        response_payload = {
+            'idea_id': event.idea_id,
+            'pipeline_dir': str(
+                pipeline_root(repo_root, idea_id=event.idea_id, pipeline_key=event.pipeline_key)
+            ),
+            'sufficient_idea': merged_sufficient,
+            'idea_artifact_exists': idea_exists,
+            'response_message': model.response_message,
+            'response_kind': model.response_kind,
+            'suggested_next_step': model.suggested_next_step,
+            'follow_up_questions': model.follow_up_questions,
+            'response_style_notes': model.style_notes,
+            'invocation_log_path': invocation_log_path,
+        }
+        out_path = (
+            pipeline_root(repo_root, idea_id=event.idea_id, pipeline_key=event.pipeline_key)
+            / 'ideation_response.json'
+        )
+        write_json(out_path, response_payload)
+        store.add_artifact(
+            run_id=run_id,
+            node_exec_id=node_exec_id,
+            kind='devin_ideation_response',
+            uri=str(out_path),
+            metadata={'response_kind': model.response_kind},
+        )
+        store.mark_node_finished(
+            node_exec_id=node_exec_id, status='succeeded', output=response_payload
+        )
+        task_context.metadata['response_guidance'] = response_payload
+        task_context.metadata['agent_loop_terminal'] = {
+            'status': model.response_kind,
+            **response_payload,
+        }
+        return task_context

devin/nodes/ideation/playground.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""IdeationAgent playground — run scenarios against real Spicy-Server project with real PI.
+Usage:
+  python3 -m devin.nodes.ideation.playground --real-pi --project proj_75f63d30 --repo-root /Users/devflow/repos/Spicy-Server
+This runs the real IdeationAgentNode against the Spicy-Server DevFlow project,
+exercising the full PI harness with devflow-tools.ts extension enabled.
+"""
+from __future__ import annotations
+import argparse
+import asyncio
+import importlib.util
+import json
+from pathlib import Path
+from devin.nodes.ideation.node import IdeationAgentNode
+from devin.nodes.shared.models import DevinAgentResponse, DevinChatDagEvent, ScenarioResult
+from devflow_engine.vendor.datalumina_genai.core.task import TaskContext
+SCENARIO_DIR = Path(__file__).with_name('scenarios')
+def _parse_tool_calls_from_log(log_path: str | None) -> list[str]:
+    """Extract tool call names from a PI JSONL log file.
+    The log is JSONL where each line has {"line": ..., "stream": "stderr"|"stdout", "ts": ...}.
+    We scan for lines containing tool-use markers (invoke calls in PI output)
+    and return a deduplicated list of tool names called.
+    """
+    if not log_path:
+        return []
+    import json
+    tool_names: list[str] = []
+    seen = set()
+    try:
+        with open(log_path, encoding='utf-8') as fh:
+            for line in fh:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    entry = json.loads(line)
+                except Exception:
+                    continue
+                text = entry.get('line', '')
+                # PI devin tool invocations appear in stdout as structured JSON blocks
+                # or in the raw output text. Look for tool name patterns.
+                # Common patterns: "Using tool: X" or "invoke":{"name":"X"} or tool name prefixes
+                for tool in (
+                    'devflow_init_idea', 'devflow_amend_idea', 'devflow_commit_idea',
+                    'idea_compliance_check', 'goldilocks_check', 'devin_insight',
+                    'devflow_read_project_config', 'devflow_read_queue_summary',
+                    'devflow_read_story_queue', 'devflow_read_worker_state',
+                    'emit_start_working', 'emit_stop_working', 'emit_response',
+                ):
+                    if tool in text and tool not in seen:
+                        seen.add(tool)
+                        tool_names.append(tool)
+    except Exception:
+        pass
+    return tool_names
+def _load_module(path: Path):
+    spec = importlib.util.spec_from_file_location(path.stem, path)
+    module = importlib.util.module_from_spec(spec)
+    assert spec and spec.loader
+    spec.loader.exec_module(module)
+    return module
+def _collect_scenarios():
+    pairs = []
+    for scenario_path in sorted(SCENARIO_DIR.glob('*.py')):
+        if scenario_path.name.endswith('_evals.py'):
+            continue
+        eval_path = scenario_path.with_name(f'{scenario_path.stem}_evals.py')
+        if eval_path.exists():
+            pairs.append((_load_module(scenario_path), _load_module(eval_path)))
+    return pairs
+def _precreate_idea_artifact(repo_root: Path, idea_id: str) -> None:
+    """Pre-create a commit-ready idea artifact with drafts/current/manifest.json."""
+    import json
+    idea_dir = repo_root / '.devflow' / 'ideas' / idea_id
+    idea_dir.mkdir(parents=True, exist_ok=True)
+    artifact = {
+        'idea_id': idea_id,
+        'title': 'Client Onboarding Workflow',
+        'problem': 'Small professional services firms lose revenue and reputation when client onboarding is slow and inconsistent.',
+        'target_users': ['Internal client-facing staff (paralegals, assistants, admin) who manage the onboarding workflow'],
+        'user_outcomes': [
+            'Clients feel welcomed promptly',
+            'Staff spend less time chasing documents',
+            'No new client starts with confusion',
+        ],
+        'scope': 'Onboarding workflow system with document collection, task tracking, client portal, and status notifications.',
+        'assumptions': ['Small professional services firms with 15-20 new clients per month'],
+        'status': 'shaped',
+    }
+    (idea_dir / 'idea.json').write_text(json.dumps(artifact, indent=2) + '\n', encoding='utf-8')
+    # Also create the drafts/current/manifest.json so devflow idea promote --draft-set current works
+    drafts_dir = idea_dir / 'drafts' / 'current'
+    drafts_dir.mkdir(parents=True, exist_ok=True)
+    manifest = {
+        'idea_id': idea_id,
+        'draft_set_id': 'current',
+        'created_at': '2026-04-22T00:00:00Z',
+        'artifact_path': str(idea_dir / 'idea.json'),
+        'status': 'shaped',
+    }
+    (drafts_dir / 'manifest.json').write_text(json.dumps(manifest, indent=2) + '\n', encoding='utf-8')
+async def _run_scenario_real_pi(scenario_module, eval_module, repo_root: Path, project_id: str) -> ScenarioResult:
+    """Run a single scenario against the real IdeationAgentNode with real PI harness."""
+    from devin.nodes.shared.helpers import set_runtime_store
+    from devflow_engine.stores.execution_store import ExecutionStore
+    input_payload = dict(scenario_module.INPUT_PAYLOAD)
+    expected = dict(scenario_module.EXPECTED_BEHAVIOR)
+    idea_id = input_payload.get('idea_id', f'{project_id}_scenario_{scenario_module.SCENARIO_NAME}')
+    raw_text = input_payload.get('current_user_message', '')
+    # Pre-create idea artifact if scenario requires it (mirrors real multi-turn init)
+    if input_payload.get('_precreate_artifact'):
+        _precreate_idea_artifact(repo_root, idea_id)
+    event = DevinChatDagEvent(
+        repo_root=str(repo_root),
+        idea_id=idea_id,
+        raw_text=raw_text,
+        pipeline_key=f'scenario-{scenario_module.SCENARIO_NAME}',
+    )
+    metadata = {
+        'raw_text': raw_text,
+        'route': {'route_arm': 'ideation'},
+        'project_id': project_id,
+        'prior_messages': input_payload.get('prior_messages', []),
+    }
+    task_context = TaskContext(event=event, metadata=metadata)
+    # Set up runtime store for this scenario
+    db_path = repo_root / '.devflow' / 'execution.sqlite'
+    store = ExecutionStore(db_path=db_path)
+    import uuid, time
+    run_id = store.create_run(
+        dag_id='devin_chat_dag',
+        dag_version='1.0',
+        root_correlation_id=str(uuid.uuid4()),
+        config={'project_id': project_id, 'scenario': scenario_module.SCENARIO_NAME},
+    )
+    set_runtime_store(store, run_id)
+    node = IdeationAgentNode(task_context=None)
+    try:
+        result_ctx = await node.process(task_context)
+        response_guidance = result_ctx.metadata.get('response_guidance', {})
+        agent_terminal = result_ctx.metadata.get('agent_loop_terminal', {})
+    except Exception as exc:
+        set_runtime_store(None, None)
+        return ScenarioResult(
+            scenario_name=scenario_module.SCENARIO_NAME,
+            passed=False,
+            actual_output={'error': str(exc)},
+            expected_behavior=expected,
+            notes=[f'Node raised exception: {exc}'],
+        )
+    set_runtime_store(None, None)
+    actual_output = {
+        'response_message': response_guidance.get('response_message', ''),
+        'response_kind': response_guidance.get('response_kind', agent_terminal.get('status', '')),
+        'suggested_next_step': response_guidance.get('suggested_next_step', ''),
+        'follow_up_questions': response_guidance.get('follow_up_questions', []),
+        'style_notes': response_guidance.get('response_style_notes', []),
+        'sufficient_idea': response_guidance.get('sufficient_idea', {}),
+        'idea_artifact_exists': response_guidance.get('idea_artifact_exists', False),
+        'tool_calls': _parse_tool_calls_from_log(response_guidance.get('invocation_log_path')),
+    }
+    try:
+        passed, notes = eval_module.evaluate(actual_output)
+    except Exception as exc:
+        passed = False
+        notes = [f'Eval raised exception: {exc}']
+    return ScenarioResult(
+        scenario_name=scenario_module.SCENARIO_NAME,
+        passed=passed,
+        actual_output=actual_output,
+        expected_behavior=expected,
+        notes=notes,
+    )
+def run_all_real_pi(repo_root: Path, project_id: str) -> list[ScenarioResult]:
+    """Run all scenarios with real PI against the given project."""
+    results = []
+    created_ideas: list[str] = []
+    for scenario_mod, eval_mod in _collect_scenarios():
+        print(f"Running scenario: {scenario_mod.SCENARIO_NAME} ...", flush=True)
+        input_payload = dict(scenario_mod.INPUT_PAYLOAD)
+        idea_id = input_payload.get('idea_id', f'{project_id}_scenario_{scenario_mod.SCENARIO_NAME}')
+        created_ideas.append(idea_id)
+        result = asyncio.run(_run_scenario_real_pi(scenario_mod, eval_mod, repo_root, project_id))
+        results.append(result)
+        status = "PASS" if result.passed else "FAIL"
+        print(f"  → {status}", flush=True)
+    return results, created_ideas
+def cleanup_test_artifacts(repo_root: Path, created_ideas: list[str]) -> None:
+    """Remove idea artifacts and any pipeline artifacts created during eval."""
+    import shutil
+    ideas_dir = repo_root / '.devflow' / 'ideas'
+    cleaned = []
+    for idea_id in created_ideas:
+        idea_path = ideas_dir / idea_id
+        if idea_path.exists():
+            shutil.rmtree(idea_path)
+            cleaned.append(idea_id)
+    print(f"\nCleanup: removed {len(cleaned)} test idea artifacts")
+    for idea_id in cleaned:
+        print(f"  removed: {idea_id}")
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='IdeationAgent playground — real PI against Spicy-Server')
+    parser.add_argument('--real-pi', action='store_true', help='Run with real PI harness')
+    parser.add_argument('--project', type=str, default='proj_75f63d30', help='DevFlow project ID')
+    parser.add_argument('--repo-root', type=str, default='/Users/devflow/repos/Spicy-Server', help='Repo root path')
+    args = parser.parse_args()
+    if not args.real_pi:
+        print("Use --real-pi to run against actual PI harness", file=__import__('sys').stderr)
+        exit(1)
+    repo_root = Path(args.repo_root)
+    project_id = args.project
+    print(f"Running IdeationAgent eval against Spicy-Server ({project_id})")
+    print(f"Repo root: {repo_root}")
+    print("-" * 60)
+    results, created_ideas = run_all_real_pi(repo_root, project_id)
+    print("-" * 60)
+    print(f"\nResults: {sum(1 for r in results if r.passed)}/{len(results)} passed\n")
+    for result in results:
+        status = "PASS" if result.passed else "FAIL"
+        print(f"  [{status}] {result.scenario_name}")
+        for note in result.notes:
+            print(f"        note: {note}")
+    # Always clean up test artifacts
+    cleanup_test_artifacts(repo_root, created_ideas)
+    print("\n" + json.dumps([item.model_dump() for item in results], indent=2, sort_keys=True))

devin/nodes/ideation/prompt.md ADDED Viewed

@@ -0,0 +1,65 @@
+# Devin Ideation Agent
+You help users define and refine development work, then commit approved ideas to the pipeline.
+## Job
+1. **Map the existing codebase first** — call `devin_insight` to investigate existing patterns before shaping any idea for a project with code. Do not guess at conventions.
+2. **Shape the idea** — extract or fabricate problem, target_users, user_outcomes, scope, and assumptions.
+3. **Commit when the user approves** — "create it", "do it", "go ahead", "ship it" means return `ready_for_downstream` immediately using the idea already shaped in the conversation. Do not re-ask.
+4. **Ask one targeted question only** when ambiguity materially changes the solution and cannot be assumed.
+## Codebase reading
+All filesystem inspection goes through `devin_insight`. Do not use read, grep, find, or cat on the codebase.
+## Fabricating from vague input (no code/source references)
+- Produce a complete idea artifact: problem, target_users, user_outcomes, scope, assumptions
+- Persist it via `Devflow_Init_Idea` — not just text
+- Trace your assumptions in the assumptions field
+- Return `ideation_contract_response` with one follow-up question validating a key assumption
+**Exception**: if the user's message mentions code files, repo paths, function names, or explicit codebase references — investigate with `devin_insight` first, then shape around what you find. Do not fabricate.
+## Multi-turn context
+When `prior_messages` is present, use it to understand what has already been agreed upon. If the user says "create it" and the idea was already shaped across prior turns, commit it immediately.
+## Committing an existing artifact
+When `persisted_idea` is provided in the context, the idea artifact already exists on disk at `idea_artifact_path`. Use it directly — do not re-ask about it. Call `Devflow_Commit_Idea` with the `idea_id` from the context and `draft_set='current'` to commit it.
+## Ambiguous "create it" with multiple in-flight ideas
+If the user says "create it" but there are multiple uncommitted ideas and it's unclear which one is meant:
+1. Call `devflow_read_project_config` or `devflow_read_queue_summary` to list active ideas in flight.
+2. Ask one clarifying question naming the ideas to let the user pick.
+Do not guess.
+## Devflow tools
+- `Devflow_Init_Idea` — persist a new idea artifact
+- `Devflow_Amend_Idea` — refine an existing idea
+- `Devflow_Commit_Idea` — promote an approved idea
+- `devflow_read_project_config` / `devflow_read_queue_summary` / `devflow_read_worker_state` — inspect state
+## Emit tools
+- `Emit_Start_Working` before a devflow call
+- `Emit_Stop_Working` after it completes
+- `Emit_Response` for mid-turn progress
+Use `session_id` from context exactly as provided.
+## Output
+Return JSON with:
+- `response_message`: compact reply
+- `response_kind`: `ideation_contract_response` | `ready_for_downstream` | `needs_clarification`
+- `suggested_next_step`: what to do next
+- `follow_up_questions`: at most one question
+- `style_notes`: optional
+`ideation_contract_response` = idea not complete; `ready_for_downstream` = user approved and idea is committed.

devin/nodes/ideation/scenarios/continue_refinement.py ADDED Viewed

@@ -0,0 +1,13 @@
+SCENARIO_NAME = 'continue_refinement'
+SCENARIO_DESCRIPTION = 'User refines an existing idea — agent explains conversational shape change.'
+INPUT_PAYLOAD = {
+    'current_user_message': 'Actually, let us make it real-time instead of batch processing. Also we need mobile access.',
+    'idea_id': 'proj_75f63d30_ideas_notify_001',
+    'project_id': 'proj_75f63d30',
+    'prior_messages': [
+        {'role': 'user', 'content': 'Add a notification system to the client portal.'},
+        {'role': 'assistant', 'content': '{"response_kind": "ideation_contract_response", "response_message": "Great — adding a notification system to the client portal. I need one clarifying question before shaping this: should notifications be real-time (push/live) or batch (daily/periodic digest)?", "follow_up_questions": ["Should notifications be real-time (push/live) or batch (daily/periodic digest)?"], "sufficiency_quotient": 0.4}'},
+        {'role': 'user', 'content': 'Batch is fine for now, just needs to be reliable.'},
+    ],
+}
+EXPECTED_BEHAVIOR = {'response_kind': 'ideation_contract_response', 'conversationally_explains_delta': True, 'incorporates_all_changes': True, 'mentions_ddr_persists': True, 'no_re_asking': True}

devin/nodes/ideation/scenarios/continue_refinement_evals.py ADDED Viewed

@@ -0,0 +1,18 @@
+EVAL_CRITERIA = {
+    'response_kind_must_be': 'ideation_contract_response',
+    'conversationally_explains_delta': True,
+    'incorporates_all_changes': True,
+    'mentions_ddr_persists': True,
+    'no_re_asking': True,
+}
+def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
+    ok = True
+    notes = []
+    msg = str(actual_output.get('response_message') or '')
+    # Should explain how the changes reshape the idea - check it mentions the changes
+    if not any(w in msg.lower() for w in ['real-time', 'mobile', 'instead', 'batch', 'overhaul']):
+        notes.append('does not conversationally explain the delta changes')
+    # Should not re-ask questions already answered
+    # Should mention DDR persists if applicable
+    return ok, notes

devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py ADDED Viewed

@@ -0,0 +1,17 @@
+SCENARIO_NAME = 'idea_fits_existing_patterns'
+SCENARIO_DESCRIPTION = 'User provides an idea and agent fits it into existing codebase patterns.'
+INPUT_PAYLOAD = {
+    'current_user_message': 'Add a notification system to the client portal.',
+    'idea_id': 'proj_75f63d30_ideas_notify_002',
+    'project_id': 'proj_75f63d30',
+    'repo_root': '/Users/devflow/repos/Spicy-Server',
+    'codebase_patterns_exist': True,
+    # Inline insight context so the agent doesn't need to call devin_insight (which may fail
+    # in eval due to --no-tools constraints on the PI subprocess)
+    'insight_context': 'Spicy-Server uses FastAPI + Pydantic for APIs, JWT auth, PostgreSQL via Supabase. '
+                     'Existing notification patterns: none yet — this would be the first real-time system. '
+                     'Auth pattern: JWT Bearer tokens. DB: Supabase Postgres. '
+                     'API conventions: routers in app/routers/, schemas in app/schemas/, '
+                     'service layer in app/services/. No existing WebSocket infrastructure.',
+}
+EXPECTED_BEHAVIOR = {'response_kind': 'ideation_contract_response', 'maps_to_existing_patterns': True, 'references_codebase_conventions': True, 'avoids_reinventing': True}

devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py ADDED Viewed

@@ -0,0 +1,16 @@
+EVAL_CRITERIA = {
+    'response_kind_must_be': 'ideation_contract_response',
+    'maps_to_existing_patterns': True,
+    'references_codebase_conventions': True,
+    'avoids_reinventing': True,
+}
+def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
+    ok = True
+    notes = []
+    msg = str(actual_output.get('response_message') or '').lower()
+    # Must reference existing patterns in the codebase
+    if not any(w in msg for w in ['existing', 'pattern', 'already', 'convention', 'structure', 'similar', 'current', 'already']):
+        notes.append('does not reference existing codebase patterns')
+        ok = False
+    return ok, notes

devin/nodes/ideation/scenarios/large_idea_split.py ADDED Viewed

@@ -0,0 +1,4 @@
+SCENARIO_NAME = 'large_idea_split'
+SCENARIO_DESCRIPTION = 'User gives a large idea — agent splits into manageable shapes tracked separately.'
+INPUT_PAYLOAD = {'current_user_message': 'We need a complete overhaul of how we manage projects end-to-end from first contact through delivery.'}
+EXPECTED_BEHAVIOR = {'response_kind': 'ideation_contract_response', 'splits_into_manageable_shapes': True, 'tracks_each_separately': True, 'maintains_global_integrated_conversation': True, 'number_of_sub_ideas': 3}

devin/nodes/ideation/scenarios/large_idea_split_evals.py ADDED Viewed

@@ -0,0 +1,17 @@
+EVAL_CRITERIA = {
+    'response_kind_must_be': 'ideation_contract_response',
+    'splits_into_manageable_shapes': True,
+    'tracks_each_separately': True,
+    'maintains_global_integrated_conversation': True,
+    'number_of_sub_ideas': 3,
+}
+def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
+    ok = True
+    notes = []
+    # Should produce sub-idea tracking
+    # Look for indication of splitting in response
+    msg = str(actual_output.get('response_message') or '').lower()
+    if not any(w in msg for w in ['split', 'break', 'part', 'phase', 'stage', 'first', 'second']):
+        notes.append('does not indicate splitting the idea into manageable shapes')
+    return ok, notes

devin/nodes/ideation/scenarios/source_documentation_added.py ADDED Viewed

@@ -0,0 +1,4 @@
+SCENARIO_NAME = 'source_documentation_added'
+SCENARIO_DESCRIPTION = 'User adds source docs and agent reshapes the idea based on that documentation.'
+INPUT_PAYLOAD = {'current_user_message': 'Here is the current database schema. Use it to shape the onboarding workflow idea.', 'has_ddr_artifacts': True}
+EXPECTED_BEHAVIOR = {'response_kind': 'ideation_contract_response', 'uses_documentation_to_reshape': True, 'ddr_persists': True, 'references_specific_doc_elements': True}

devin/nodes/ideation/scenarios/source_documentation_added_evals.py ADDED Viewed

@@ -0,0 +1,16 @@
+EVAL_CRITERIA = {
+    'response_kind_must_be': 'ideation_contract_response',
+    'uses_documentation_to_reshape': True,
+    'ddr_persists': True,
+    'references_specific_doc_elements': True,
+}
+def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
+    ok = True
+    notes = []
+    msg = str(actual_output.get('response_message') or '').lower()
+    # Should reference the schema or documentation specifically
+    # DDR should persist (mentioned in output)
+    if not any(w in msg for w in ['schema', 'database', 'document', 'ddr', 'structure']):
+        notes.append('does not reference specific doc elements')
+    return ok, notes