devflow-engine 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devflow_engine/__init__.py +3 -0
- devflow_engine/agentic_prompts.py +100 -0
- devflow_engine/agentic_runtime.py +398 -0
- devflow_engine/api_key_flow_harness.py +539 -0
- devflow_engine/api_keys.py +357 -0
- devflow_engine/bootstrap/__init__.py +2 -0
- devflow_engine/bootstrap/provision_from_template.py +84 -0
- devflow_engine/cli/__init__.py +0 -0
- devflow_engine/cli/app.py +7270 -0
- devflow_engine/core/__init__.py +0 -0
- devflow_engine/core/config.py +86 -0
- devflow_engine/core/logging.py +29 -0
- devflow_engine/core/paths.py +45 -0
- devflow_engine/core/toml_kv.py +33 -0
- devflow_engine/devflow_event_worker.py +1292 -0
- devflow_engine/devflow_state.py +201 -0
- devflow_engine/devin2/__init__.py +9 -0
- devflow_engine/devin2/agent_definition.py +120 -0
- devflow_engine/devin2/pi_runner.py +204 -0
- devflow_engine/devin_orchestration.py +69 -0
- devflow_engine/docs/prompts/anti-patterns.md +42 -0
- devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
- devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
- devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
- devflow_engine/doctor/__init__.py +2 -0
- devflow_engine/doctor/triage.py +140 -0
- devflow_engine/error/__init__.py +0 -0
- devflow_engine/error/remediation.py +21 -0
- devflow_engine/errors/error_solver_dag.py +522 -0
- devflow_engine/errors/runtime_observability.py +67 -0
- devflow_engine/idea/__init__.py +4 -0
- devflow_engine/idea/actors.py +481 -0
- devflow_engine/idea/agentic.py +465 -0
- devflow_engine/idea/analyze.py +93 -0
- devflow_engine/idea/devin_chat_dag.py +1 -0
- devflow_engine/idea/diff.py +99 -0
- devflow_engine/idea/drafts.py +446 -0
- devflow_engine/idea/idea_creation_dag.py +643 -0
- devflow_engine/idea/ideation_enrichment.py +355 -0
- devflow_engine/idea/ideation_enrichment_worker.py +19 -0
- devflow_engine/idea/paths.py +28 -0
- devflow_engine/idea/promote.py +53 -0
- devflow_engine/idea/redaction.py +27 -0
- devflow_engine/idea/repo_tools.py +1277 -0
- devflow_engine/idea/response_mode.py +30 -0
- devflow_engine/idea/story_pipeline.py +1585 -0
- devflow_engine/idea/sufficiency.py +376 -0
- devflow_engine/idea/traditional_stories.py +1257 -0
- devflow_engine/implementation/__init__.py +0 -0
- devflow_engine/implementation/alembic_preflight.py +700 -0
- devflow_engine/implementation/dag.py +8450 -0
- devflow_engine/implementation/green_gate.py +93 -0
- devflow_engine/implementation/prompts.py +108 -0
- devflow_engine/implementation/test_runtime.py +623 -0
- devflow_engine/integration/__init__.py +19 -0
- devflow_engine/integration/agentic.py +66 -0
- devflow_engine/integration/dag.py +3539 -0
- devflow_engine/integration/prompts.py +114 -0
- devflow_engine/integration/supabase_schema.sql +31 -0
- devflow_engine/integration/supabase_sync.py +177 -0
- devflow_engine/llm/__init__.py +1 -0
- devflow_engine/llm/cli_one_shot.py +84 -0
- devflow_engine/llm/cli_stream.py +371 -0
- devflow_engine/llm/execution_context.py +26 -0
- devflow_engine/llm/invoke.py +1322 -0
- devflow_engine/llm/provider_api.py +304 -0
- devflow_engine/llm/repo_knowledge.py +588 -0
- devflow_engine/llm_primitives.py +315 -0
- devflow_engine/orchestration.py +62 -0
- devflow_engine/planning/__init__.py +0 -0
- devflow_engine/planning/analyze_repo.py +92 -0
- devflow_engine/planning/render_drafts.py +133 -0
- devflow_engine/playground/__init__.py +0 -0
- devflow_engine/playground/hooks.py +26 -0
- devflow_engine/playwright_workflow/__init__.py +5 -0
- devflow_engine/playwright_workflow/dag.py +1317 -0
- devflow_engine/process/__init__.py +5 -0
- devflow_engine/process/dag.py +59 -0
- devflow_engine/project_registration/__init__.py +3 -0
- devflow_engine/project_registration/dag.py +1581 -0
- devflow_engine/project_registry.py +109 -0
- devflow_engine/prompts/devin/generic/prompt.md +6 -0
- devflow_engine/prompts/devin/ideation/prompt.md +263 -0
- devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
- devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
- devflow_engine/prompts/devin/insight/prompt.md +11 -0
- devflow_engine/prompts/devin/insight/scenarios.md +5 -0
- devflow_engine/prompts/devin/intake/prompt.md +15 -0
- devflow_engine/prompts/devin/iterate/prompt.md +12 -0
- devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
- devflow_engine/prompts/devin/shared/principles.md +246 -0
- devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
- devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
- devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
- devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
- devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
- devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
- devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
- devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
- devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
- devflow_engine/prompts/implementation/red/prompt.md +27 -0
- devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
- devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
- devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
- devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
- devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
- devflow_engine/prompts/integration/README.md +185 -0
- devflow_engine/prompts/integration/green/example.md +67 -0
- devflow_engine/prompts/integration/green/green/prompt.md +10 -0
- devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
- devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
- devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
- devflow_engine/prompts/integration/green_enrich/example.md +79 -0
- devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
- devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
- devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
- devflow_engine/prompts/integration/red/example.md +152 -0
- devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
- devflow_engine/prompts/integration/red/red/prompt.md +11 -0
- devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
- devflow_engine/prompts/integration/red_review/example.md +71 -0
- devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
- devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
- devflow_engine/prompts/integration/resolve/example.md +111 -0
- devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
- devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
- devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
- devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
- devflow_engine/prompts/integration/validate/example.md +143 -0
- devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
- devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
- devflow_engine/prompts/integration/write_workflows/example.md +100 -0
- devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
- devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
- devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
- devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
- devflow_engine/prompts/iterate/README.md +7 -0
- devflow_engine/prompts/iterate/coder/prompt.md +11 -0
- devflow_engine/prompts/iterate/framer/prompt.md +11 -0
- devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
- devflow_engine/prompts/iterate/observer/prompt.md +11 -0
- devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
- devflow_engine/prompts/recovery/execution/prompt.md +8 -0
- devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
- devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
- devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
- devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
- devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
- devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
- devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
- devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
- devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
- devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
- devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
- devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
- devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
- devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
- devflow_engine/recovery/__init__.py +3 -0
- devflow_engine/recovery/dag.py +2609 -0
- devflow_engine/recovery/models.py +220 -0
- devflow_engine/refactor.py +93 -0
- devflow_engine/registry/__init__.py +1 -0
- devflow_engine/registry/cards.py +238 -0
- devflow_engine/registry/domain_normalize.py +60 -0
- devflow_engine/registry/effects.py +65 -0
- devflow_engine/registry/enforce_report.py +150 -0
- devflow_engine/registry/module_cards_classify.py +164 -0
- devflow_engine/registry/module_cards_draft.py +184 -0
- devflow_engine/registry/module_cards_gate.py +59 -0
- devflow_engine/registry/packages.py +347 -0
- devflow_engine/registry/pathways.py +323 -0
- devflow_engine/review/__init__.py +11 -0
- devflow_engine/review/dag.py +588 -0
- devflow_engine/review/review_story.py +67 -0
- devflow_engine/scope_idea/__init__.py +3 -0
- devflow_engine/scope_idea/agentic.py +39 -0
- devflow_engine/scope_idea/dag.py +1069 -0
- devflow_engine/scope_idea/models.py +175 -0
- devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
- devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
- devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
- devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
- devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
- devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
- devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
- devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
- devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
- devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
- devflow_engine/skills/registry.example.yaml +42 -0
- devflow_engine/source_doc_assumptions.py +291 -0
- devflow_engine/source_doc_mutation_dag.py +1606 -0
- devflow_engine/source_doc_mutation_eval.py +417 -0
- devflow_engine/source_doc_mutation_worker.py +25 -0
- devflow_engine/source_docs_schema.py +207 -0
- devflow_engine/source_docs_updater.py +309 -0
- devflow_engine/source_scope/__init__.py +15 -0
- devflow_engine/source_scope/agentic.py +45 -0
- devflow_engine/source_scope/dag.py +1626 -0
- devflow_engine/source_scope/models.py +177 -0
- devflow_engine/stores/__init__.py +0 -0
- devflow_engine/stores/execution_store.py +3534 -0
- devflow_engine/story/__init__.py +0 -0
- devflow_engine/story/contracts.py +160 -0
- devflow_engine/story/discovery.py +47 -0
- devflow_engine/story/evidence.py +118 -0
- devflow_engine/story/hashing.py +27 -0
- devflow_engine/story/implemented_queue_purge.py +148 -0
- devflow_engine/story/indexer.py +105 -0
- devflow_engine/story/io.py +20 -0
- devflow_engine/story/markdown_contracts.py +298 -0
- devflow_engine/story/reconciliation.py +408 -0
- devflow_engine/story/validate_stories.py +149 -0
- devflow_engine/story/validate_tests_story.py +512 -0
- devflow_engine/story/validation.py +133 -0
- devflow_engine/ui_grounding/__init__.py +11 -0
- devflow_engine/ui_grounding/agentic.py +31 -0
- devflow_engine/ui_grounding/dag.py +874 -0
- devflow_engine/ui_grounding/models.py +224 -0
- devflow_engine/ui_grounding/pencil_bridge.py +247 -0
- devflow_engine/vendor/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
- devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
- devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
- devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
- devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
- devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
- devflow_engine/worker.py +1086 -0
- devflow_engine/worker_guard.py +233 -0
- devflow_engine-1.0.0.dist-info/METADATA +235 -0
- devflow_engine-1.0.0.dist-info/RECORD +393 -0
- devflow_engine-1.0.0.dist-info/WHEEL +4 -0
- devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
- devin/__init__.py +6 -0
- devin/dag.py +58 -0
- devin/dag_two_arm.py +138 -0
- devin/devin_chat_scenario_catalog.json +588 -0
- devin/devin_eval.py +677 -0
- devin/nodes/__init__.py +0 -0
- devin/nodes/ideation/__init__.py +0 -0
- devin/nodes/ideation/node.py +195 -0
- devin/nodes/ideation/playground.py +267 -0
- devin/nodes/ideation/prompt.md +65 -0
- devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
- devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
- devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
- devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
- devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
- devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
- devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
- devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
- devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
- devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
- devin/nodes/ideation/scenarios/vague_idea.py +16 -0
- devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
- devin/nodes/ideation/tools.json +312 -0
- devin/nodes/insight/__init__.py +0 -0
- devin/nodes/insight/node.py +49 -0
- devin/nodes/insight/playground.py +154 -0
- devin/nodes/insight/prompt.md +61 -0
- devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
- devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
- devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
- devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
- devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
- devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
- devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
- devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
- devin/nodes/insight/scenarios/operational_debugging.py +15 -0
- devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
- devin/nodes/insight/scenarios/operational_question.py +9 -0
- devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
- devin/nodes/insight/scenarios/queue_status.py +15 -0
- devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
- devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
- devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
- devin/nodes/insight/scenarios/worker_state_check.py +15 -0
- devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
- devin/nodes/insight/tools.json +126 -0
- devin/nodes/intake/__init__.py +0 -0
- devin/nodes/intake/node.py +27 -0
- devin/nodes/intake/playground.py +47 -0
- devin/nodes/intake/prompt.md +12 -0
- devin/nodes/intake/scenarios/ideation_routing.py +4 -0
- devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
- devin/nodes/intake/scenarios/insight_routing.py +4 -0
- devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
- devin/nodes/iterate/README.md +44 -0
- devin/nodes/iterate/__init__.py +1 -0
- devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
- devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
- devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
- devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
- devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
- devin/nodes/iterate/agent-roles.md +89 -0
- devin/nodes/iterate/agents/README.md +10 -0
- devin/nodes/iterate/artifacts.md +504 -0
- devin/nodes/iterate/contract.md +100 -0
- devin/nodes/iterate/eval-plan.md +74 -0
- devin/nodes/iterate/node.py +100 -0
- devin/nodes/iterate/pipeline/README.md +13 -0
- devin/nodes/iterate/playground-contract.md +76 -0
- devin/nodes/iterate/prompt.md +11 -0
- devin/nodes/iterate/scenarios/README.md +38 -0
- devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
- devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
- devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
- devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
- devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
- devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
- devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
- devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
- devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
- devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
- devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
- devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
- devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
- devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
- devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
- devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
- devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
- devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
- devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
- devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
- devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
- devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
- devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
- devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
- devin/nodes/shared/__init__.py +0 -0
- devin/nodes/shared/filemaker_expert.md +80 -0
- devin/nodes/shared/filemaker_expert.py +354 -0
- devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
- devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
- devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
- devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
- devin/nodes/shared/helpers.py +156 -0
- devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
- devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
- devin/nodes/shared/models.py +44 -0
- devin/nodes/shared/post.py +40 -0
- devin/nodes/shared/router.py +107 -0
- devin/nodes/shared/tools.py +191 -0
- devin/shared/devin-chat-rubric.md +237 -0
- devin/shared/devin-chat-scenario-suite.md +90 -0
- devin/shared/eval_doctrine.md +9 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
SCENARIO_NAME = 'user_says_create_it'
|
|
2
|
+
SCENARIO_DESCRIPTION = 'User and agent have iterated on an idea for several turns. User now says "create it" — agent should commit the idea to ready_for_downstream immediately, no questions.'
|
|
3
|
+
INPUT_PAYLOAD = {
|
|
4
|
+
'current_user_message': 'Create it. Make it happen.',
|
|
5
|
+
'idea_id': 'proj_75f63d30_ideas_createit_001',
|
|
6
|
+
'project_id': 'proj_75f63d30',
|
|
7
|
+
'repo_root': '/Users/devflow/repos/Spicy-Server',
|
|
8
|
+
# Prior agent turn calls devflow_init_idea to persist the artifact before user says "create it".
|
|
9
|
+
# This mirrors real multi-turn behavior where ideas are persisted before commitment.
|
|
10
|
+
'prior_messages': [
|
|
11
|
+
{'from': 'user', 'text': 'I want to improve how our team handles onboarding new clients'},
|
|
12
|
+
{'from': 'agent', 'text': 'Let me understand your context better — what type of clients are you working with?'},
|
|
13
|
+
{'from': 'user', 'text': 'Small professional services firms — lawyers, accountants, consultants'},
|
|
14
|
+
{'from': 'agent', 'text': 'Got it. And roughly how many new clients per month are you onboarding?'},
|
|
15
|
+
{'from': 'user', 'text': 'About 15-20 per month across all our advisors'},
|
|
16
|
+
{'from': 'agent', 'text': 'Devflow_Init_Idea called — idea persisted in .devflow/ideas/proj_75f63d30_ideas_createit_001/.\n\nProblem: Small professional services firms lose revenue and reputation when client onboarding is slow and inconsistent.\nTarget users: Internal client-facing staff (paralegals, assistants, admin) who manage the onboarding workflow.\nUser outcomes: Clients feel welcomed promptly, staff spend less time chasing documents, no new client starts with confusion.\nScope: Onboarding workflow system with document collection, task tracking, client portal, and status notifications.'},
|
|
17
|
+
{'from': 'user', 'text': 'Create it. Make it happen.'},
|
|
18
|
+
],
|
|
19
|
+
# Pre-create the idea artifact so it exists for devflow_commit_idea to commit.
|
|
20
|
+
# In real usage, devflow_init_idea is called in the prior turn and the artifact is created.
|
|
21
|
+
# The eval harness mirrors this by ensuring the artifact exists before the "create it" turn.
|
|
22
|
+
'_precreate_artifact': True,
|
|
23
|
+
}
|
|
24
|
+
EXPECTED_BEHAVIOR = {
|
|
25
|
+
'response_kind': 'ready_for_downstream',
|
|
26
|
+
'idea_artifact_exists': True,
|
|
27
|
+
'idea_artifact_contains_all_fields': True,
|
|
28
|
+
'no_clarifying_questions': True,
|
|
29
|
+
'devflow_commit_called': True,
|
|
30
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
EVAL_CRITERIA = {
|
|
2
|
+
'response_kind_must_be': 'ready_for_downstream',
|
|
3
|
+
'no_clarifying_questions': True,
|
|
4
|
+
'devflow_commit_called': True, # agent returned ready_for_downstream = committed
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
|
|
9
|
+
ok = True
|
|
10
|
+
notes = []
|
|
11
|
+
|
|
12
|
+
# Must be ready_for_downstream (agent committed when user said create it)
|
|
13
|
+
if actual_output.get('response_kind') != 'ready_for_downstream':
|
|
14
|
+
ok = False
|
|
15
|
+
notes.append(f"expected ready_for_downstream, got {actual_output.get('response_kind')}")
|
|
16
|
+
|
|
17
|
+
# Must not ask clarifying questions — user said create it, agent must commit
|
|
18
|
+
fqs = actual_output.get('follow_up_questions') or []
|
|
19
|
+
if fqs:
|
|
20
|
+
ok = False
|
|
21
|
+
notes.append(f'should not ask questions but got: {fqs}')
|
|
22
|
+
|
|
23
|
+
return ok, notes
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
SCENARIO_NAME = 'vague_idea'
|
|
2
|
+
SCENARIO_DESCRIPTION = 'User has a vague idea and needs the agent to move it forward.'
|
|
3
|
+
INPUT_PAYLOAD = {
|
|
4
|
+
'current_user_message': 'I want to improve how our team handles onboarding new clients.',
|
|
5
|
+
'idea_id': 'proj_75f63d30_ideas_vague_001',
|
|
6
|
+
'project_id': 'proj_75f63d30',
|
|
7
|
+
'repo_root': '/Users/devflow/repos/Spicy-Server',
|
|
8
|
+
}
|
|
9
|
+
EXPECTED_BEHAVIOR = {
|
|
10
|
+
'response_kind': 'ideation_contract_response',
|
|
11
|
+
'produces_full_idea_artifact': True,
|
|
12
|
+
'assumptions_traced': True,
|
|
13
|
+
'asks_one_targeted_question': True,
|
|
14
|
+
'no_premature_stories': True,
|
|
15
|
+
'idea_artifact_exists': True,
|
|
16
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
EVAL_CRITERIA = {
|
|
2
|
+
'response_kind_must_be': 'ideation_contract_response',
|
|
3
|
+
'produces_full_idea_artifact': True, # vague input → agent FABRICATES complete idea
|
|
4
|
+
'assumptions_traced': True,
|
|
5
|
+
'asks_one_targeted_question': True, # fabricates AND asks one question to verify assumptions
|
|
6
|
+
'no_premature_stories': True,
|
|
7
|
+
'idea_artifact_exists': True,
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def evaluate(actual_output: dict) -> tuple[bool, list[str]]:
|
|
12
|
+
ok = True
|
|
13
|
+
notes = []
|
|
14
|
+
|
|
15
|
+
# Response kind check
|
|
16
|
+
if actual_output.get('response_kind') != 'ideation_contract_response':
|
|
17
|
+
ok = False
|
|
18
|
+
notes.append(f"expected ideation_contract_response, got {actual_output.get('response_kind')}")
|
|
19
|
+
|
|
20
|
+
# Must ask exactly one targeted question (the tool fabricates AND asks to validate assumptions)
|
|
21
|
+
fqs = actual_output.get('follow_up_questions') or []
|
|
22
|
+
if len(fqs) != 1:
|
|
23
|
+
ok = False
|
|
24
|
+
notes.append(f'expected exactly one follow-up question, got {len(fqs)}')
|
|
25
|
+
|
|
26
|
+
# Must produce a FABRICATED complete idea
|
|
27
|
+
msg = (actual_output.get('response_message') or '').lower()
|
|
28
|
+
has_user_mention = any(w in msg for w in ['client', 'team', 'staff', 'user', 'onboarding', 'person'])
|
|
29
|
+
has_outcome_mention = any(w in msg for w in ['outcome', 'result', 'benefit', 'improve', 'faster', 'reduce', 'consistent'])
|
|
30
|
+
has_scope_mention = any(w in msg for w in ['scope', 'cover', 'include', 'system', 'workflow', 'process', 'portal', 'track'])
|
|
31
|
+
|
|
32
|
+
if not has_user_mention:
|
|
33
|
+
ok = False
|
|
34
|
+
notes.append('fabricated idea does not mention who the target users are')
|
|
35
|
+
if not has_outcome_mention:
|
|
36
|
+
ok = False
|
|
37
|
+
notes.append('fabricated idea does not mention intended outcomes or benefits')
|
|
38
|
+
if not has_scope_mention:
|
|
39
|
+
ok = False
|
|
40
|
+
notes.append('fabricated idea does not describe the scope of what to build')
|
|
41
|
+
|
|
42
|
+
# Assumptions must be mentioned in response_message (case-insensitive check for assumption mentions)
|
|
43
|
+
assumption_phrases = ['assumption', 'assuming', "i'm assuming", "i assume", 'assumptions traced', 'assumptions:']
|
|
44
|
+
if not any(phrase in msg for phrase in assumption_phrases):
|
|
45
|
+
notes.append('no assumptions traced in fabricated idea')
|
|
46
|
+
|
|
47
|
+
return ok, notes
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
{
|
|
2
|
+
"tools": [
|
|
3
|
+
{
|
|
4
|
+
"name": "devflow_read_project_config",
|
|
5
|
+
"description": "Read the DevFlow project configuration for a given project_id. Returns project metadata: name, id, repo location (local path and upstream URL), start commands, preview URL, and production URL.",
|
|
6
|
+
"parameters": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"project_id": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "The DevFlow project ID (e.g. proj_75f63d30)"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"required": [
|
|
15
|
+
"project_id"
|
|
16
|
+
]
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"name": "devflow_read_queue_summary",
|
|
21
|
+
"description": "Read a summary of all DevFlow queues for a project. Returns queue counts by status for: idea, story, implementation, integration, recovery, and error queues.",
|
|
22
|
+
"parameters": {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"properties": {
|
|
25
|
+
"project_id": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"description": "The DevFlow project ID"
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
"required": [
|
|
31
|
+
"project_id"
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"name": "devflow_read_story_queue",
|
|
37
|
+
"description": "Read the DevFlow story queue for a project, optionally filtered by status.",
|
|
38
|
+
"parameters": {
|
|
39
|
+
"type": "object",
|
|
40
|
+
"properties": {
|
|
41
|
+
"project_id": {
|
|
42
|
+
"type": "string",
|
|
43
|
+
"description": "The DevFlow project ID"
|
|
44
|
+
},
|
|
45
|
+
"status": {
|
|
46
|
+
"type": "string",
|
|
47
|
+
"description": "Filter by status: queued, in_progress, completed, failed",
|
|
48
|
+
"enum": [
|
|
49
|
+
"queued",
|
|
50
|
+
"in_progress",
|
|
51
|
+
"completed",
|
|
52
|
+
"failed"
|
|
53
|
+
]
|
|
54
|
+
}
|
|
55
|
+
},
|
|
56
|
+
"required": [
|
|
57
|
+
"project_id"
|
|
58
|
+
]
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
"name": "devflow_read_worker_state",
|
|
63
|
+
"description": "Read the current DevFlow worker state for a project. Returns active workers and what they are working on, plus a summary of current or last active run logs. If the last run was a recovery, includes recovery and failed run logs.",
|
|
64
|
+
"parameters": {
|
|
65
|
+
"type": "object",
|
|
66
|
+
"properties": {
|
|
67
|
+
"project_id": {
|
|
68
|
+
"type": "string",
|
|
69
|
+
"description": "The DevFlow project ID"
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
"required": [
|
|
73
|
+
"project_id"
|
|
74
|
+
]
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"name": "devflow_init_idea",
|
|
79
|
+
"description": "Initialize a new idea artifact in the DevFlow pipeline. Before calling this tool, call devin_insight to explore the codebase and understand existing patterns so the idea aligns with current architecture. The idea title should reflect the user's intended outcome, not a technical implementation. Args: idea_id (string, required), title (string, required), project_id (string, required), text (string, required - full idea text from the user's message)",
|
|
80
|
+
"parameters": {
|
|
81
|
+
"type": "object",
|
|
82
|
+
"properties": {
|
|
83
|
+
"idea_id": {
|
|
84
|
+
"type": "string",
|
|
85
|
+
"description": "Unique idea identifier (e.g. proj_xxx_idea_yyy)"
|
|
86
|
+
},
|
|
87
|
+
"title": {
|
|
88
|
+
"type": "string",
|
|
89
|
+
"description": "Human-readable title for the idea"
|
|
90
|
+
},
|
|
91
|
+
"project_id": {
|
|
92
|
+
"type": "string",
|
|
93
|
+
"description": "DevFlow project ID this idea belongs to"
|
|
94
|
+
},
|
|
95
|
+
"text": {
|
|
96
|
+
"type": "string",
|
|
97
|
+
"description": "Full idea text \u2014 the user's complete description of what they want to build or change. Include all context, constraints, and intent."
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
"required": [
|
|
101
|
+
"idea_id",
|
|
102
|
+
"title",
|
|
103
|
+
"project_id",
|
|
104
|
+
"text"
|
|
105
|
+
]
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
"name": "devflow_amend_idea",
|
|
110
|
+
"description": "Amend an existing idea artifact with refined or additional text. Updates the idea shape and sufficiency fields in place. Args: idea_id (string, required), refined_text (string, required)",
|
|
111
|
+
"parameters": {
|
|
112
|
+
"type": "object",
|
|
113
|
+
"properties": {
|
|
114
|
+
"idea_id": {
|
|
115
|
+
"type": "string",
|
|
116
|
+
"description": "The idea ID to amend"
|
|
117
|
+
},
|
|
118
|
+
"refined_text": {
|
|
119
|
+
"type": "string",
|
|
120
|
+
"description": "The user's refined or additional text for this idea"
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
"required": [
|
|
124
|
+
"idea_id",
|
|
125
|
+
"refined_text"
|
|
126
|
+
]
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
"name": "devflow_commit_idea",
|
|
131
|
+
"description": "Promote an approved idea to ready_for_downstream state. Commits the idea artifact and marks it ready for story generation.",
|
|
132
|
+
"parameters": {
|
|
133
|
+
"type": "object",
|
|
134
|
+
"properties": {
|
|
135
|
+
"idea_id": {
|
|
136
|
+
"type": "string",
|
|
137
|
+
"description": "The idea ID to commit/promote"
|
|
138
|
+
},
|
|
139
|
+
"draft_set": {
|
|
140
|
+
"type": "string",
|
|
141
|
+
"description": "Draft set name to promote from"
|
|
142
|
+
},
|
|
143
|
+
"project_id": {
|
|
144
|
+
"type": "string",
|
|
145
|
+
"description": "DevFlow project ID"
|
|
146
|
+
}
|
|
147
|
+
},
|
|
148
|
+
"required": [
|
|
149
|
+
"idea_id",
|
|
150
|
+
"draft_set",
|
|
151
|
+
"project_id"
|
|
152
|
+
]
|
|
153
|
+
}
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
"name": "emit_start_working",
|
|
157
|
+
"description": "Start real-time working feedback for the user. Registers a single row on start and updates it every 2 seconds with rotating verbs until emit_stop_working is called. Multiple sessions can be active simultaneously. Args: activity (string, optional - initial verb; auto-selected if omitted), session_id (string, required)",
|
|
158
|
+
"parameters": {
|
|
159
|
+
"type": "object",
|
|
160
|
+
"properties": {
|
|
161
|
+
"activity": {
|
|
162
|
+
"type": "string",
|
|
163
|
+
"description": "Initial activity verb (e.g. running, reviewing, shaping). Auto-selected from 50 verbs if omitted. Available verbs: running, reviewing, shaping, crafting, exploring, analyzing, mapping, building, checking, preparing, processing, designing, configuring, organizing, sequencing, implementing, validating, verifying, tracing, scoping, estimating, integrating, testing, deploying, monitoring, optimizing, documenting, routing, forwarding, translating, encoding, decoding, partitioning, distributing, collecting, filtering, transforming, loading, saving, retrieving, computing, calculating, scheduling, coordinating, synthesizing, extracting, compiling, resolving, confirming, assembling, orchestrating"
|
|
164
|
+
},
|
|
165
|
+
"session_id": {
|
|
166
|
+
"type": "string",
|
|
167
|
+
"description": "Session ID for the agent_devin_messages row"
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
"required": [
|
|
171
|
+
"session_id"
|
|
172
|
+
]
|
|
173
|
+
}
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
"name": "emit_stop_working",
|
|
177
|
+
"description": "Stop the real-time working feedback for a session. Updates the existing start_working row with the stop signal and clears the rotating verb timer. Args: session_id (string, required)",
|
|
178
|
+
"parameters": {
|
|
179
|
+
"type": "object",
|
|
180
|
+
"properties": {
|
|
181
|
+
"session_id": {
|
|
182
|
+
"type": "string",
|
|
183
|
+
"description": "Session ID for the feedback stream to stop"
|
|
184
|
+
}
|
|
185
|
+
},
|
|
186
|
+
"required": [
|
|
187
|
+
"session_id"
|
|
188
|
+
]
|
|
189
|
+
}
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
"name": "emit_response",
|
|
193
|
+
"description": "Emit a generic real-time feedback message to the agent_devin_messages table. Used for mid-turn progress updates that are not start/stop.",
|
|
194
|
+
"parameters": {
|
|
195
|
+
"type": "object",
|
|
196
|
+
"properties": {
|
|
197
|
+
"message": {
|
|
198
|
+
"type": "string",
|
|
199
|
+
"description": "The feedback message to display to the user"
|
|
200
|
+
},
|
|
201
|
+
"emit_type": {
|
|
202
|
+
"type": "string",
|
|
203
|
+
"description": "Type of emit: start_working, stop_working, progress, info, conclude_node"
|
|
204
|
+
},
|
|
205
|
+
"session_id": {
|
|
206
|
+
"type": "string",
|
|
207
|
+
"description": "Session ID for the agent_devin_messages row"
|
|
208
|
+
}
|
|
209
|
+
},
|
|
210
|
+
"required": [
|
|
211
|
+
"message",
|
|
212
|
+
"emit_type",
|
|
213
|
+
"session_id"
|
|
214
|
+
]
|
|
215
|
+
}
|
|
216
|
+
},
|
|
217
|
+
{
|
|
218
|
+
"name": "devin_insight",
|
|
219
|
+
"description": "Call the Devin InsightAgent as a subagent to explore the codebase and provide grounded context. Use when you need to understand existing patterns before shaping an idea.",
|
|
220
|
+
"parameters": {
|
|
221
|
+
"type": "object",
|
|
222
|
+
"properties": {
|
|
223
|
+
"current_user_message": {
|
|
224
|
+
"type": "string",
|
|
225
|
+
"description": "The user's current message to pass to the InsightAgent for exploration"
|
|
226
|
+
},
|
|
227
|
+
"context": {
|
|
228
|
+
"type": "object",
|
|
229
|
+
"description": "Context dict containing idea_id, project_id, repo_root",
|
|
230
|
+
"properties": {
|
|
231
|
+
"idea_id": {
|
|
232
|
+
"type": "string"
|
|
233
|
+
},
|
|
234
|
+
"project_id": {
|
|
235
|
+
"type": "string"
|
|
236
|
+
},
|
|
237
|
+
"repo_root": {
|
|
238
|
+
"type": "string"
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
},
|
|
243
|
+
"required": [
|
|
244
|
+
"current_user_message",
|
|
245
|
+
"context"
|
|
246
|
+
]
|
|
247
|
+
}
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
"name": "idea_compliance_check",
|
|
251
|
+
"description": "Validate a shaped idea against existing stories in queue, committed ideas, and codebase structure. Checks for duplicate work, architectural fit, and naming collisions before the idea is committed. Returns COMPLIANT, NON_COMPLIANT, or NEEDS_REVIEW with specific issues. Args: idea_id (string, required), idea_text (string, required - the full shaped idea text), repo_root (string, required), project_id (string, required)",
|
|
252
|
+
"parameters": {
|
|
253
|
+
"type": "object",
|
|
254
|
+
"properties": {
|
|
255
|
+
"idea_id": {
|
|
256
|
+
"type": "string",
|
|
257
|
+
"description": "The idea ID being validated"
|
|
258
|
+
},
|
|
259
|
+
"idea_text": {
|
|
260
|
+
"type": "string",
|
|
261
|
+
"description": "The full shaped idea text to validate against existing work"
|
|
262
|
+
},
|
|
263
|
+
"repo_root": {
|
|
264
|
+
"type": "string",
|
|
265
|
+
"description": "The project repository root path"
|
|
266
|
+
},
|
|
267
|
+
"project_id": {
|
|
268
|
+
"type": "string",
|
|
269
|
+
"description": "The DevFlow project ID"
|
|
270
|
+
}
|
|
271
|
+
},
|
|
272
|
+
"required": [
|
|
273
|
+
"idea_id",
|
|
274
|
+
"idea_text",
|
|
275
|
+
"repo_root",
|
|
276
|
+
"project_id"
|
|
277
|
+
]
|
|
278
|
+
}
|
|
279
|
+
},
|
|
280
|
+
{
|
|
281
|
+
"name": "goldilocks_check",
|
|
282
|
+
"description": "Evaluate whether a shaped idea is well-scoped for story generation. Returns verdict: just_right (2-20 stories), split (>20 stories), or enrichment_needed (<2 stories). Also returns story estimate, cohesive area, and split suggestions if verdict is split. Args: idea_id (string, required), idea_text (string, required - the full shaped idea text), repo_root (string, required), project_id (string, required)",
|
|
283
|
+
"parameters": {
|
|
284
|
+
"type": "object",
|
|
285
|
+
"properties": {
|
|
286
|
+
"idea_id": {
|
|
287
|
+
"type": "string",
|
|
288
|
+
"description": "The idea ID being evaluated"
|
|
289
|
+
},
|
|
290
|
+
"idea_text": {
|
|
291
|
+
"type": "string",
|
|
292
|
+
"description": "The full shaped idea text to evaluate for scoping"
|
|
293
|
+
},
|
|
294
|
+
"repo_root": {
|
|
295
|
+
"type": "string",
|
|
296
|
+
"description": "The project repository root path"
|
|
297
|
+
},
|
|
298
|
+
"project_id": {
|
|
299
|
+
"type": "string",
|
|
300
|
+
"description": "The DevFlow project ID"
|
|
301
|
+
}
|
|
302
|
+
},
|
|
303
|
+
"required": [
|
|
304
|
+
"idea_id",
|
|
305
|
+
"idea_text",
|
|
306
|
+
"repo_root",
|
|
307
|
+
"project_id"
|
|
308
|
+
]
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
]
|
|
312
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from devflow_engine.devin2.pi_runner import run_devin2_pi_agent
|
|
6
|
+
from devflow_engine.vendor.datalumina_genai.core.nodes.base import Node
|
|
7
|
+
from devflow_engine.vendor.datalumina_genai.core.task import TaskContext
|
|
8
|
+
from devin.nodes.shared.helpers import dfs_node_running, load_node_prompt_lines, pipeline_root, resolve_project_id, store_run, write_json
|
|
9
|
+
from devin.nodes.shared.models import DevinAgentResponse
|
|
10
|
+
|
|
11
|
+
class InsightAgentNode(Node):
|
|
12
|
+
async def process(self, task_context: TaskContext) -> TaskContext:
|
|
13
|
+
event = task_context.event
|
|
14
|
+
repo_root = Path(event.repo_root)
|
|
15
|
+
store, run_id = store_run()
|
|
16
|
+
node_exec_id = store.create_node_attempt(run_id=run_id, node_id='insight_agent', node_name='InsightAgent', attempt=1)
|
|
17
|
+
project_id = str(task_context.metadata.get('project_id') or resolve_project_id(repo_root, idea_id=event.idea_id))
|
|
18
|
+
dfs_node_running(project_id=project_id, run_id=run_id, node_id='insight_agent', summary='Running Devin insight agent', idea_id=event.idea_id)
|
|
19
|
+
guidance = load_node_prompt_lines(__file__) + [
|
|
20
|
+
"Return response_kind='redirect'.",
|
|
21
|
+
'Answer the current project-specific question directly.',
|
|
22
|
+
'Keep the reply concise and grounded in provided context.',
|
|
23
|
+
'Use Emit_Start_Working before running a devflow inspection call.',
|
|
24
|
+
'Use Emit_Stop_Working after the call completes.',
|
|
25
|
+
'Use Emit_Response for mid-turn progress or info messages.',
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
# Build session_id for emit tools (matches pattern used in ideation)
|
|
29
|
+
session_id = f"insight:{project_id}:{event.idea_id}"
|
|
30
|
+
|
|
31
|
+
context_payload = {
|
|
32
|
+
'idea_id': event.idea_id,
|
|
33
|
+
'current_user_message': str(task_context.metadata.get('raw_text') or event.raw_text or ''),
|
|
34
|
+
'route': task_context.metadata.get('route') or {},
|
|
35
|
+
'expected_status': 'redirect',
|
|
36
|
+
'project_id': project_id,
|
|
37
|
+
'repo_root': str(repo_root),
|
|
38
|
+
'session_id': session_id,
|
|
39
|
+
}
|
|
40
|
+
result = run_devin2_pi_agent(repo_root=repo_root, stage_name='devin_insight_response', route_arm='insight', context_payload=context_payload, operational_guidance=guidance, output_model=DevinAgentResponse, timeout_seconds=90)
|
|
41
|
+
model = DevinAgentResponse.model_validate(result.response_model.model_dump())
|
|
42
|
+
response_payload = {'idea_id': event.idea_id, 'pipeline_dir': str(pipeline_root(repo_root, idea_id=event.idea_id, pipeline_key=event.pipeline_key)), 'response_message': model.response_message, 'response_kind': model.response_kind, 'suggested_next_step': model.suggested_next_step, 'follow_up_questions': model.follow_up_questions, 'response_style_notes': model.style_notes}
|
|
43
|
+
out_path = pipeline_root(repo_root, idea_id=event.idea_id, pipeline_key=event.pipeline_key) / 'insight_response.json'
|
|
44
|
+
write_json(out_path, response_payload)
|
|
45
|
+
store.add_artifact(run_id=run_id, node_exec_id=node_exec_id, kind='devin_insight_response', uri=str(out_path), metadata={'response_kind': model.response_kind})
|
|
46
|
+
store.mark_node_finished(node_exec_id=node_exec_id, status='succeeded', output=response_payload)
|
|
47
|
+
task_context.metadata['response_guidance'] = response_payload
|
|
48
|
+
task_context.metadata['agent_loop_terminal'] = {'status': model.response_kind, **response_payload}
|
|
49
|
+
return task_context
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""InsightAgent playground — unit test InsightAgentNode against scenario inputs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import asyncio
|
|
7
|
+
import importlib.util
|
|
8
|
+
import json
|
|
9
|
+
import uuid
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from devin.nodes.insight.node import InsightAgentNode
|
|
13
|
+
from devin.nodes.shared.helpers import set_runtime_store
|
|
14
|
+
from devin.nodes.shared.models import DevinAgentResponse, DevinChatDagEvent, ScenarioResult
|
|
15
|
+
from devflow_engine.stores.execution_store import ExecutionStore
|
|
16
|
+
from devflow_engine.vendor.datalumina_genai.core.task import TaskContext
|
|
17
|
+
|
|
18
|
+
SCENARIO_DIR = Path(__file__).with_name('scenarios')
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _load_module(path: Path):
|
|
22
|
+
spec = importlib.util.spec_from_file_location(path.stem, path)
|
|
23
|
+
module = importlib.util.module_from_spec(spec)
|
|
24
|
+
assert spec and spec.loader
|
|
25
|
+
spec.loader.exec_module(module)
|
|
26
|
+
return module
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _collect_scenarios():
|
|
30
|
+
pairs = []
|
|
31
|
+
for scenario_path in sorted(SCENARIO_DIR.glob('*.py')):
|
|
32
|
+
if scenario_path.name.endswith('_evals.py'):
|
|
33
|
+
continue
|
|
34
|
+
eval_path = scenario_path.with_name(f'{scenario_path.stem}_evals.py')
|
|
35
|
+
if eval_path.exists():
|
|
36
|
+
pairs.append((_load_module(scenario_path), _load_module(eval_path)))
|
|
37
|
+
return pairs
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
async def _run_scenario(scenario_module, eval_module, repo_root: Path, project_id: str) -> ScenarioResult:
|
|
41
|
+
"""Run a single scenario against the real InsightAgentNode with proper runtime store."""
|
|
42
|
+
input_payload = dict(scenario_module.INPUT_PAYLOAD)
|
|
43
|
+
expected = dict(scenario_module.EXPECTED_BEHAVIOR)
|
|
44
|
+
|
|
45
|
+
idea_id = input_payload.get('idea_id', f'{project_id}_scenario_{scenario_module.SCENARIO_NAME}')
|
|
46
|
+
raw_text = input_payload.get('current_user_message', '')
|
|
47
|
+
|
|
48
|
+
event = DevinChatDagEvent(
|
|
49
|
+
repo_root=str(repo_root),
|
|
50
|
+
idea_id=idea_id,
|
|
51
|
+
raw_text=raw_text,
|
|
52
|
+
pipeline_key=f'insight-scenario-{scenario_module.SCENARIO_NAME}',
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
metadata = {
|
|
56
|
+
'raw_text': raw_text,
|
|
57
|
+
'route': {'route_arm': 'insight'},
|
|
58
|
+
'project_id': project_id,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
task_context = TaskContext(event=event, metadata=metadata)
|
|
62
|
+
|
|
63
|
+
# Set up runtime store for this scenario (matching ideation playground pattern)
|
|
64
|
+
db_path = repo_root / '.devflow' / 'execution.sqlite'
|
|
65
|
+
store = ExecutionStore(db_path=db_path)
|
|
66
|
+
run_id = store.create_run(
|
|
67
|
+
dag_id='devin_chat_two_arm_dag',
|
|
68
|
+
dag_version='1.0',
|
|
69
|
+
root_correlation_id=str(uuid.uuid4()),
|
|
70
|
+
config={'project_id': project_id, 'scenario': scenario_module.SCENARIO_NAME},
|
|
71
|
+
)
|
|
72
|
+
set_runtime_store(store, run_id)
|
|
73
|
+
|
|
74
|
+
node = InsightAgentNode(task_context=None)
|
|
75
|
+
try:
|
|
76
|
+
result_ctx = await node.process(task_context)
|
|
77
|
+
response_guidance = result_ctx.metadata.get('response_guidance', {})
|
|
78
|
+
agent_terminal = result_ctx.metadata.get('agent_loop_terminal', {})
|
|
79
|
+
except Exception as exc:
|
|
80
|
+
set_runtime_store(None, None)
|
|
81
|
+
return ScenarioResult(
|
|
82
|
+
scenario_name=scenario_module.SCENARIO_NAME,
|
|
83
|
+
passed=False,
|
|
84
|
+
actual_output={'error': str(exc)},
|
|
85
|
+
expected_behavior=expected,
|
|
86
|
+
notes=[f'Node raised exception: {exc}'],
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
set_runtime_store(None, None)
|
|
90
|
+
|
|
91
|
+
actual_output = {
|
|
92
|
+
'response_message': response_guidance.get('response_message', ''),
|
|
93
|
+
'response_kind': response_guidance.get('response_kind', agent_terminal.get('status', '')),
|
|
94
|
+
'suggested_next_step': response_guidance.get('suggested_next_step', ''),
|
|
95
|
+
'follow_up_questions': response_guidance.get('follow_up_questions', []),
|
|
96
|
+
'style_notes': response_guidance.get('response_style_notes', []),
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
passed, notes = eval_module.evaluate(actual_output)
|
|
101
|
+
except Exception as exc:
|
|
102
|
+
passed = False
|
|
103
|
+
notes = [f'Eval raised exception: {exc}']
|
|
104
|
+
|
|
105
|
+
return ScenarioResult(
|
|
106
|
+
scenario_name=scenario_module.SCENARIO_NAME,
|
|
107
|
+
passed=passed,
|
|
108
|
+
actual_output=actual_output,
|
|
109
|
+
expected_behavior=expected,
|
|
110
|
+
notes=notes,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def run_all() -> list[ScenarioResult]:
|
|
115
|
+
"""Run all insight scenarios against the InsightAgentNode."""
|
|
116
|
+
results = []
|
|
117
|
+
for scenario_mod, eval_mod in _collect_scenarios():
|
|
118
|
+
result = asyncio.run(_run_scenario(scenario_mod, eval_mod, Path.cwd(), 'proj_75f63d30'))
|
|
119
|
+
results.append(result)
|
|
120
|
+
return results
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
if __name__ == '__main__':
|
|
124
|
+
parser = argparse.ArgumentParser(description='InsightAgent playground')
|
|
125
|
+
parser.add_argument('--real-pi', action='store_true', help='Run with real PI harness (slower, uses API budget)')
|
|
126
|
+
parser.add_argument('--project', type=str, default='proj_75f63d30', help='DevFlow project ID')
|
|
127
|
+
parser.add_argument('--repo-root', type=str, default='/Users/devflow/repos/Spicy-Server', help='Repo root path')
|
|
128
|
+
args = parser.parse_args()
|
|
129
|
+
|
|
130
|
+
repo_root = Path(args.repo_root)
|
|
131
|
+
project_id = args.project
|
|
132
|
+
|
|
133
|
+
print(f"Running InsightAgent eval against Spicy-Server ({project_id})")
|
|
134
|
+
print(f"Repo root: {repo_root}")
|
|
135
|
+
print("-" * 60)
|
|
136
|
+
|
|
137
|
+
results = []
|
|
138
|
+
for scenario_mod, eval_mod in _collect_scenarios():
|
|
139
|
+
print(f"Running scenario: {scenario_mod.SCENARIO_NAME} ...", flush=True)
|
|
140
|
+
result = asyncio.run(_run_scenario(scenario_mod, eval_mod, repo_root, project_id))
|
|
141
|
+
results.append(result)
|
|
142
|
+
status = "PASS" if result.passed else "FAIL"
|
|
143
|
+
print(f" → {status}", flush=True)
|
|
144
|
+
|
|
145
|
+
print("-" * 60)
|
|
146
|
+
print(f"\nResults: {sum(1 for r in results if r.passed)}/{len(results)} passed\n")
|
|
147
|
+
|
|
148
|
+
for result in results:
|
|
149
|
+
status = "PASS" if result.passed else "FAIL"
|
|
150
|
+
print(f" [{status}] {result.scenario_name}")
|
|
151
|
+
for note in result.notes:
|
|
152
|
+
print(f" note: {note}")
|
|
153
|
+
|
|
154
|
+
print("\n" + json.dumps([item.model_dump() for item in results], indent=2, sort_keys=True))
|