devflow-engine 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devflow_engine/__init__.py +3 -0
- devflow_engine/agentic_prompts.py +100 -0
- devflow_engine/agentic_runtime.py +398 -0
- devflow_engine/api_key_flow_harness.py +539 -0
- devflow_engine/api_keys.py +357 -0
- devflow_engine/bootstrap/__init__.py +2 -0
- devflow_engine/bootstrap/provision_from_template.py +84 -0
- devflow_engine/cli/__init__.py +0 -0
- devflow_engine/cli/app.py +7270 -0
- devflow_engine/core/__init__.py +0 -0
- devflow_engine/core/config.py +86 -0
- devflow_engine/core/logging.py +29 -0
- devflow_engine/core/paths.py +45 -0
- devflow_engine/core/toml_kv.py +33 -0
- devflow_engine/devflow_event_worker.py +1292 -0
- devflow_engine/devflow_state.py +201 -0
- devflow_engine/devin2/__init__.py +9 -0
- devflow_engine/devin2/agent_definition.py +120 -0
- devflow_engine/devin2/pi_runner.py +204 -0
- devflow_engine/devin_orchestration.py +69 -0
- devflow_engine/docs/prompts/anti-patterns.md +42 -0
- devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
- devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
- devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
- devflow_engine/doctor/__init__.py +2 -0
- devflow_engine/doctor/triage.py +140 -0
- devflow_engine/error/__init__.py +0 -0
- devflow_engine/error/remediation.py +21 -0
- devflow_engine/errors/error_solver_dag.py +522 -0
- devflow_engine/errors/runtime_observability.py +67 -0
- devflow_engine/idea/__init__.py +4 -0
- devflow_engine/idea/actors.py +481 -0
- devflow_engine/idea/agentic.py +465 -0
- devflow_engine/idea/analyze.py +93 -0
- devflow_engine/idea/devin_chat_dag.py +1 -0
- devflow_engine/idea/diff.py +99 -0
- devflow_engine/idea/drafts.py +446 -0
- devflow_engine/idea/idea_creation_dag.py +643 -0
- devflow_engine/idea/ideation_enrichment.py +355 -0
- devflow_engine/idea/ideation_enrichment_worker.py +19 -0
- devflow_engine/idea/paths.py +28 -0
- devflow_engine/idea/promote.py +53 -0
- devflow_engine/idea/redaction.py +27 -0
- devflow_engine/idea/repo_tools.py +1277 -0
- devflow_engine/idea/response_mode.py +30 -0
- devflow_engine/idea/story_pipeline.py +1585 -0
- devflow_engine/idea/sufficiency.py +376 -0
- devflow_engine/idea/traditional_stories.py +1257 -0
- devflow_engine/implementation/__init__.py +0 -0
- devflow_engine/implementation/alembic_preflight.py +700 -0
- devflow_engine/implementation/dag.py +8450 -0
- devflow_engine/implementation/green_gate.py +93 -0
- devflow_engine/implementation/prompts.py +108 -0
- devflow_engine/implementation/test_runtime.py +623 -0
- devflow_engine/integration/__init__.py +19 -0
- devflow_engine/integration/agentic.py +66 -0
- devflow_engine/integration/dag.py +3539 -0
- devflow_engine/integration/prompts.py +114 -0
- devflow_engine/integration/supabase_schema.sql +31 -0
- devflow_engine/integration/supabase_sync.py +177 -0
- devflow_engine/llm/__init__.py +1 -0
- devflow_engine/llm/cli_one_shot.py +84 -0
- devflow_engine/llm/cli_stream.py +371 -0
- devflow_engine/llm/execution_context.py +26 -0
- devflow_engine/llm/invoke.py +1322 -0
- devflow_engine/llm/provider_api.py +304 -0
- devflow_engine/llm/repo_knowledge.py +588 -0
- devflow_engine/llm_primitives.py +315 -0
- devflow_engine/orchestration.py +62 -0
- devflow_engine/planning/__init__.py +0 -0
- devflow_engine/planning/analyze_repo.py +92 -0
- devflow_engine/planning/render_drafts.py +133 -0
- devflow_engine/playground/__init__.py +0 -0
- devflow_engine/playground/hooks.py +26 -0
- devflow_engine/playwright_workflow/__init__.py +5 -0
- devflow_engine/playwright_workflow/dag.py +1317 -0
- devflow_engine/process/__init__.py +5 -0
- devflow_engine/process/dag.py +59 -0
- devflow_engine/project_registration/__init__.py +3 -0
- devflow_engine/project_registration/dag.py +1581 -0
- devflow_engine/project_registry.py +109 -0
- devflow_engine/prompts/devin/generic/prompt.md +6 -0
- devflow_engine/prompts/devin/ideation/prompt.md +263 -0
- devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
- devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
- devflow_engine/prompts/devin/insight/prompt.md +11 -0
- devflow_engine/prompts/devin/insight/scenarios.md +5 -0
- devflow_engine/prompts/devin/intake/prompt.md +15 -0
- devflow_engine/prompts/devin/iterate/prompt.md +12 -0
- devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
- devflow_engine/prompts/devin/shared/principles.md +246 -0
- devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
- devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
- devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
- devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
- devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
- devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
- devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
- devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
- devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
- devflow_engine/prompts/implementation/red/prompt.md +27 -0
- devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
- devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
- devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
- devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
- devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
- devflow_engine/prompts/integration/README.md +185 -0
- devflow_engine/prompts/integration/green/example.md +67 -0
- devflow_engine/prompts/integration/green/green/prompt.md +10 -0
- devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
- devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
- devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
- devflow_engine/prompts/integration/green_enrich/example.md +79 -0
- devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
- devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
- devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
- devflow_engine/prompts/integration/red/example.md +152 -0
- devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
- devflow_engine/prompts/integration/red/red/prompt.md +11 -0
- devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
- devflow_engine/prompts/integration/red_review/example.md +71 -0
- devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
- devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
- devflow_engine/prompts/integration/resolve/example.md +111 -0
- devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
- devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
- devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
- devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
- devflow_engine/prompts/integration/validate/example.md +143 -0
- devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
- devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
- devflow_engine/prompts/integration/write_workflows/example.md +100 -0
- devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
- devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
- devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
- devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
- devflow_engine/prompts/iterate/README.md +7 -0
- devflow_engine/prompts/iterate/coder/prompt.md +11 -0
- devflow_engine/prompts/iterate/framer/prompt.md +11 -0
- devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
- devflow_engine/prompts/iterate/observer/prompt.md +11 -0
- devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
- devflow_engine/prompts/recovery/execution/prompt.md +8 -0
- devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
- devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
- devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
- devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
- devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
- devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
- devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
- devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
- devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
- devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
- devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
- devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
- devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
- devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
- devflow_engine/recovery/__init__.py +3 -0
- devflow_engine/recovery/dag.py +2609 -0
- devflow_engine/recovery/models.py +220 -0
- devflow_engine/refactor.py +93 -0
- devflow_engine/registry/__init__.py +1 -0
- devflow_engine/registry/cards.py +238 -0
- devflow_engine/registry/domain_normalize.py +60 -0
- devflow_engine/registry/effects.py +65 -0
- devflow_engine/registry/enforce_report.py +150 -0
- devflow_engine/registry/module_cards_classify.py +164 -0
- devflow_engine/registry/module_cards_draft.py +184 -0
- devflow_engine/registry/module_cards_gate.py +59 -0
- devflow_engine/registry/packages.py +347 -0
- devflow_engine/registry/pathways.py +323 -0
- devflow_engine/review/__init__.py +11 -0
- devflow_engine/review/dag.py +588 -0
- devflow_engine/review/review_story.py +67 -0
- devflow_engine/scope_idea/__init__.py +3 -0
- devflow_engine/scope_idea/agentic.py +39 -0
- devflow_engine/scope_idea/dag.py +1069 -0
- devflow_engine/scope_idea/models.py +175 -0
- devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
- devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
- devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
- devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
- devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
- devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
- devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
- devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
- devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
- devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
- devflow_engine/skills/registry.example.yaml +42 -0
- devflow_engine/source_doc_assumptions.py +291 -0
- devflow_engine/source_doc_mutation_dag.py +1606 -0
- devflow_engine/source_doc_mutation_eval.py +417 -0
- devflow_engine/source_doc_mutation_worker.py +25 -0
- devflow_engine/source_docs_schema.py +207 -0
- devflow_engine/source_docs_updater.py +309 -0
- devflow_engine/source_scope/__init__.py +15 -0
- devflow_engine/source_scope/agentic.py +45 -0
- devflow_engine/source_scope/dag.py +1626 -0
- devflow_engine/source_scope/models.py +177 -0
- devflow_engine/stores/__init__.py +0 -0
- devflow_engine/stores/execution_store.py +3534 -0
- devflow_engine/story/__init__.py +0 -0
- devflow_engine/story/contracts.py +160 -0
- devflow_engine/story/discovery.py +47 -0
- devflow_engine/story/evidence.py +118 -0
- devflow_engine/story/hashing.py +27 -0
- devflow_engine/story/implemented_queue_purge.py +148 -0
- devflow_engine/story/indexer.py +105 -0
- devflow_engine/story/io.py +20 -0
- devflow_engine/story/markdown_contracts.py +298 -0
- devflow_engine/story/reconciliation.py +408 -0
- devflow_engine/story/validate_stories.py +149 -0
- devflow_engine/story/validate_tests_story.py +512 -0
- devflow_engine/story/validation.py +133 -0
- devflow_engine/ui_grounding/__init__.py +11 -0
- devflow_engine/ui_grounding/agentic.py +31 -0
- devflow_engine/ui_grounding/dag.py +874 -0
- devflow_engine/ui_grounding/models.py +224 -0
- devflow_engine/ui_grounding/pencil_bridge.py +247 -0
- devflow_engine/vendor/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
- devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
- devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
- devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
- devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
- devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
- devflow_engine/worker.py +1086 -0
- devflow_engine/worker_guard.py +233 -0
- devflow_engine-1.0.0.dist-info/METADATA +235 -0
- devflow_engine-1.0.0.dist-info/RECORD +393 -0
- devflow_engine-1.0.0.dist-info/WHEEL +4 -0
- devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
- devin/__init__.py +6 -0
- devin/dag.py +58 -0
- devin/dag_two_arm.py +138 -0
- devin/devin_chat_scenario_catalog.json +588 -0
- devin/devin_eval.py +677 -0
- devin/nodes/__init__.py +0 -0
- devin/nodes/ideation/__init__.py +0 -0
- devin/nodes/ideation/node.py +195 -0
- devin/nodes/ideation/playground.py +267 -0
- devin/nodes/ideation/prompt.md +65 -0
- devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
- devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
- devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
- devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
- devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
- devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
- devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
- devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
- devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
- devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
- devin/nodes/ideation/scenarios/vague_idea.py +16 -0
- devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
- devin/nodes/ideation/tools.json +312 -0
- devin/nodes/insight/__init__.py +0 -0
- devin/nodes/insight/node.py +49 -0
- devin/nodes/insight/playground.py +154 -0
- devin/nodes/insight/prompt.md +61 -0
- devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
- devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
- devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
- devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
- devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
- devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
- devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
- devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
- devin/nodes/insight/scenarios/operational_debugging.py +15 -0
- devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
- devin/nodes/insight/scenarios/operational_question.py +9 -0
- devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
- devin/nodes/insight/scenarios/queue_status.py +15 -0
- devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
- devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
- devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
- devin/nodes/insight/scenarios/worker_state_check.py +15 -0
- devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
- devin/nodes/insight/tools.json +126 -0
- devin/nodes/intake/__init__.py +0 -0
- devin/nodes/intake/node.py +27 -0
- devin/nodes/intake/playground.py +47 -0
- devin/nodes/intake/prompt.md +12 -0
- devin/nodes/intake/scenarios/ideation_routing.py +4 -0
- devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
- devin/nodes/intake/scenarios/insight_routing.py +4 -0
- devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
- devin/nodes/iterate/README.md +44 -0
- devin/nodes/iterate/__init__.py +1 -0
- devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
- devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
- devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
- devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
- devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
- devin/nodes/iterate/agent-roles.md +89 -0
- devin/nodes/iterate/agents/README.md +10 -0
- devin/nodes/iterate/artifacts.md +504 -0
- devin/nodes/iterate/contract.md +100 -0
- devin/nodes/iterate/eval-plan.md +74 -0
- devin/nodes/iterate/node.py +100 -0
- devin/nodes/iterate/pipeline/README.md +13 -0
- devin/nodes/iterate/playground-contract.md +76 -0
- devin/nodes/iterate/prompt.md +11 -0
- devin/nodes/iterate/scenarios/README.md +38 -0
- devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
- devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
- devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
- devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
- devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
- devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
- devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
- devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
- devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
- devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
- devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
- devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
- devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
- devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
- devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
- devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
- devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
- devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
- devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
- devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
- devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
- devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
- devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
- devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
- devin/nodes/shared/__init__.py +0 -0
- devin/nodes/shared/filemaker_expert.md +80 -0
- devin/nodes/shared/filemaker_expert.py +354 -0
- devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
- devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
- devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
- devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
- devin/nodes/shared/helpers.py +156 -0
- devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
- devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
- devin/nodes/shared/models.py +44 -0
- devin/nodes/shared/post.py +40 -0
- devin/nodes/shared/router.py +107 -0
- devin/nodes/shared/tools.py +191 -0
- devin/shared/devin-chat-rubric.md +237 -0
- devin/shared/devin-chat-scenario-suite.md +90 -0
- devin/shared/eval_doctrine.md +9 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"id": "layout_analysis",
|
|
4
|
+
"description": "Expert should identify all layouts in the 'My Page . Cleaner' section with counts and purposes",
|
|
5
|
+
"question": "What layouts does the 'My Page . Cleaner' section consist of? List all layouts in that section.",
|
|
6
|
+
"expected_keys": ["My Page . Cleaner", "My Page . Cleaner . Availability", "My Page . Cleaner . iPhone", "KanbanDetails", "geoLocation"],
|
|
7
|
+
"expected_structure": "table with layout name, ID, objects, purpose",
|
|
8
|
+
"scoring": {
|
|
9
|
+
"all_layouts_found": 2,
|
|
10
|
+
"object_counts": 1,
|
|
11
|
+
"purpose_descriptions": 1,
|
|
12
|
+
"table_format": 1
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"id": "script_flow",
|
|
17
|
+
"description": "Expert should identify key scripts in the inspection completion workflow and explain what happens",
|
|
18
|
+
"question": "What scripts handle the inspection completion workflow? What happens when an inspector marks an area as complete?",
|
|
19
|
+
"expected_keys": ["inspections . layout . save", "inspections . launch", "loadInspectionData", "loadAreas", "complete"],
|
|
20
|
+
"expected_structure": "list of scripts with IDs and step counts",
|
|
21
|
+
"scoring": {
|
|
22
|
+
"key_scripts_identified": 2,
|
|
23
|
+
"script_ids_present": 1,
|
|
24
|
+
"step_counts_present": 1,
|
|
25
|
+
"completion_logic_described": 1
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"id": "data_model",
|
|
30
|
+
"description": "Expert should map the relationships between Inspector, Worksheet, and WorkOrders tables",
|
|
31
|
+
"question": "What are the key relationships between Inspector, Worksheet, and WorkOrders tables? Show the key foreign key fields.",
|
|
32
|
+
"expected_keys": ["_worksheetID", "_custID", "_cleanerID", "Inspector", "Worksheet", "WorkOrders", "foreign key"],
|
|
33
|
+
"expected_structure": "table with direction, field, purpose columns",
|
|
34
|
+
"scoring": {
|
|
35
|
+
"all_three_tables_mentioned": 2,
|
|
36
|
+
"foreign_key_fields_identified": 2,
|
|
37
|
+
"relationship_directions_clear": 1
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"id": "journey_mapping",
|
|
42
|
+
"description": "Expert should trace a customer journey through layouts and scripts",
|
|
43
|
+
"question": "Map the customer journey for creating and completing an inspection. What layouts, scripts, and buttons are involved from start to finish?",
|
|
44
|
+
"expected_keys": ["Account Page", "Inspection Form III", "inspections . launch", "My Page", "save"],
|
|
45
|
+
"expected_structure": "phases with entry point, scripts, layouts",
|
|
46
|
+
"scoring": {
|
|
47
|
+
"entry_point_identified": 1,
|
|
48
|
+
"multiple_phases": 2,
|
|
49
|
+
"layouts_and_scripts_per_phase": 2,
|
|
50
|
+
"completion_end_state": 1
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"id": "cruft_detection",
|
|
55
|
+
"description": "Expert should identify code health issues from cruft summary",
|
|
56
|
+
"question": "What code health issues (uncalled scripts, unreachable layouts, unused fields) does Clean Sweep have?",
|
|
57
|
+
"expected_keys": ["uncalled_scripts", "unreachable_layouts", "count", "0"],
|
|
58
|
+
"expected_structure": "per-category counts with item lists",
|
|
59
|
+
"scoring": {
|
|
60
|
+
"all_categories_covered": 2,
|
|
61
|
+
"counts_present": 1,
|
|
62
|
+
"specific_items_if_any": 1
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
]
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Eval harness for Goldilocks Advisor.
|
|
2
|
+
|
|
3
|
+
Runs the goldilocks_check advisor through the PI subprocess (matching the
|
|
4
|
+
Pi-Pi pattern in devflow-tools.ts) against scenario fixtures and scores output.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python3 -m devin.nodes.shared.goldilocks_advisor_eval.runner
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import re
|
|
14
|
+
import subprocess
|
|
15
|
+
import time
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
_PROJECT_ROOT = Path(__file__).resolve().parents[5]
|
|
20
|
+
_AGENT_FILE = ".pi/agents/goldilocks-advisor/goldilocks-advisor.md"
|
|
21
|
+
_DEFAULT_REPO_ROOT = "/Users/devflow/repos/Spicy-Server"
|
|
22
|
+
_DEFAULT_PROJECT = "proj_75f63d30"
|
|
23
|
+
_DEFAULT_MODEL = "minimax/MiniMax-M2.7"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class ScenarioResult:
|
|
28
|
+
scenario_id: str
|
|
29
|
+
passed: bool
|
|
30
|
+
score: int
|
|
31
|
+
max_score: int
|
|
32
|
+
output: str
|
|
33
|
+
details: dict[str, int]
|
|
34
|
+
elapsed: float
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def load_agent_prompt() -> str:
|
|
38
|
+
content = _PROJECT_ROOT.joinpath(_AGENT_FILE).read_text()
|
|
39
|
+
match = re.match(r"^---\n[\s\S]*?\n---\n([\s\S]*)$", content, re.M)
|
|
40
|
+
return match.group(1).strip() if match else content
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def run_goldilocks_check(
|
|
44
|
+
idea_text: str,
|
|
45
|
+
idea_id: str,
|
|
46
|
+
repo_root: str = _DEFAULT_REPO_ROOT,
|
|
47
|
+
project_id: str = _DEFAULT_PROJECT,
|
|
48
|
+
model: str = _DEFAULT_MODEL,
|
|
49
|
+
timeout: int = 90,
|
|
50
|
+
) -> tuple[str, float]:
|
|
51
|
+
system_prompt = load_agent_prompt()
|
|
52
|
+
context_block = (
|
|
53
|
+
f"Context: repo_root={repo_root}, project_id={project_id}, idea_id={idea_id}\n\n"
|
|
54
|
+
f"Shaped Idea:\n{idea_text}\n\n"
|
|
55
|
+
f"Question: Estimate story count and evaluate scoping"
|
|
56
|
+
)
|
|
57
|
+
args = [
|
|
58
|
+
"pi", "--mode", "text", "--no-session", "--no-extensions",
|
|
59
|
+
"--model", model, "--tools", "read,grep,find,ls", "--thinking", "off",
|
|
60
|
+
"--append-system-prompt", system_prompt + "\n\n" + context_block,
|
|
61
|
+
f"Goldilocks scoping check for: {idea_text}",
|
|
62
|
+
]
|
|
63
|
+
start = time.time()
|
|
64
|
+
proc = subprocess.Popen(
|
|
65
|
+
args, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
|
66
|
+
stderr=subprocess.PIPE, text=True, cwd=str(Path.home()),
|
|
67
|
+
)
|
|
68
|
+
stdout, _ = proc.communicate(timeout=timeout)
|
|
69
|
+
elapsed = time.time() - start
|
|
70
|
+
return stdout.strip(), elapsed
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _extract_json(text: str) -> dict | None:
|
|
74
|
+
# Try to extract JSON from markdown code block or raw text
|
|
75
|
+
match = re.search(r"```json\s*([\s\S]+?)\s*```", text)
|
|
76
|
+
if match:
|
|
77
|
+
try:
|
|
78
|
+
return json.loads(match.group(1))
|
|
79
|
+
except json.JSONDecodeError:
|
|
80
|
+
pass
|
|
81
|
+
# Try raw JSON
|
|
82
|
+
try:
|
|
83
|
+
return json.loads(text)
|
|
84
|
+
except json.JSONDecodeError:
|
|
85
|
+
pass
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def score_scenario(output: str, scenario: dict) -> ScenarioResult:
|
|
90
|
+
score = 0
|
|
91
|
+
max_score = sum(scenario["scoring"].values())
|
|
92
|
+
details = {}
|
|
93
|
+
parsed = _extract_json(output)
|
|
94
|
+
expected_verdict = scenario["expected_verdict"]
|
|
95
|
+
|
|
96
|
+
# Verdict check
|
|
97
|
+
verdict = None
|
|
98
|
+
if parsed and "verdict" in parsed:
|
|
99
|
+
verdict = parsed["verdict"]
|
|
100
|
+
if verdict == expected_verdict:
|
|
101
|
+
details["verdict_correct"] = scenario["scoring"].get("verdict_correct", 0)
|
|
102
|
+
score += details["verdict_correct"]
|
|
103
|
+
else:
|
|
104
|
+
details["verdict_correct"] = 0
|
|
105
|
+
|
|
106
|
+
# Story estimate present
|
|
107
|
+
if parsed and "story_estimate" in parsed:
|
|
108
|
+
details["story_estimate_present"] = scenario["scoring"].get("story_estimate_present", 0)
|
|
109
|
+
score += details["story_estimate_present"]
|
|
110
|
+
else:
|
|
111
|
+
details["story_estimate_present"] = 0
|
|
112
|
+
|
|
113
|
+
# Story estimate in range for just_right
|
|
114
|
+
if expected_verdict == "just_right" and parsed:
|
|
115
|
+
est = parsed.get("story_estimate", 0)
|
|
116
|
+
if 2 <= est <= 20:
|
|
117
|
+
details["story_estimate_in_range"] = scenario["scoring"].get("story_estimate_in_range", 0)
|
|
118
|
+
score += details["story_estimate_in_range"]
|
|
119
|
+
else:
|
|
120
|
+
details["story_estimate_in_range"] = 0
|
|
121
|
+
elif expected_verdict == "split" and parsed:
|
|
122
|
+
est = parsed.get("story_estimate", 0)
|
|
123
|
+
if est > 20:
|
|
124
|
+
details["story_estimate_over_20"] = scenario["scoring"].get("story_estimate_over_20", 0)
|
|
125
|
+
score += details["story_estimate_over_20"]
|
|
126
|
+
else:
|
|
127
|
+
details["story_estimate_over_20"] = 0
|
|
128
|
+
elif expected_verdict == "enrichment_needed" and parsed:
|
|
129
|
+
gaps = parsed.get("enrichment_gaps", [])
|
|
130
|
+
if gaps:
|
|
131
|
+
details["enrichment_gaps_filled"] = scenario["scoring"].get("enrichment_gaps_filled", 0)
|
|
132
|
+
score += details["enrichment_gaps_filled"]
|
|
133
|
+
else:
|
|
134
|
+
details["enrichment_gaps_filled"] = 0
|
|
135
|
+
else:
|
|
136
|
+
details["story_estimate_in_range"] = 0
|
|
137
|
+
|
|
138
|
+
# Split suggestions
|
|
139
|
+
if expected_verdict == "split":
|
|
140
|
+
splits = parsed.get("split_suggestions", []) if parsed else []
|
|
141
|
+
if splits:
|
|
142
|
+
details["split_suggestions_present"] = scenario["scoring"].get("split_suggestions_present", 0)
|
|
143
|
+
score += details["split_suggestions_present"]
|
|
144
|
+
else:
|
|
145
|
+
details["split_suggestions_present"] = 0
|
|
146
|
+
|
|
147
|
+
# Cohesive area present
|
|
148
|
+
if parsed and "cohesive_area" in parsed:
|
|
149
|
+
details["cohesive_area_present"] = scenario["scoring"].get("cohesive_area_present", 0)
|
|
150
|
+
score += details["cohesive_area_present"]
|
|
151
|
+
else:
|
|
152
|
+
details["cohesive_area_present"] = 0
|
|
153
|
+
|
|
154
|
+
# Reasoning present
|
|
155
|
+
if parsed and "reasoning" in parsed:
|
|
156
|
+
details["reasoning_present"] = scenario["scoring"].get("reasoning_present", 0)
|
|
157
|
+
score += details["reasoning_present"]
|
|
158
|
+
else:
|
|
159
|
+
details["reasoning_present"] = 0
|
|
160
|
+
|
|
161
|
+
return ScenarioResult(
|
|
162
|
+
scenario_id=scenario["id"],
|
|
163
|
+
passed=score == max_score,
|
|
164
|
+
score=score,
|
|
165
|
+
max_score=max_score,
|
|
166
|
+
output=output[:500],
|
|
167
|
+
details=details,
|
|
168
|
+
elapsed=0.0,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def run_all_scenarios(scenarios_path: str | Path | None = None) -> list[ScenarioResult]:
|
|
173
|
+
if scenarios_path is None:
|
|
174
|
+
scenarios_path = Path(__file__).parent / "scenarios.json"
|
|
175
|
+
scenarios = json.loads(Path(scenarios_path).read_text())
|
|
176
|
+
results = []
|
|
177
|
+
for scenario in scenarios:
|
|
178
|
+
print(f"\nRunning: {scenario['id']}")
|
|
179
|
+
output, elapsed = run_goldilocks_check(
|
|
180
|
+
idea_text=scenario["idea_text"],
|
|
181
|
+
idea_id=scenario["idea_id"],
|
|
182
|
+
repo_root=scenario["repo_root"],
|
|
183
|
+
project_id=scenario["project_id"],
|
|
184
|
+
)
|
|
185
|
+
result = score_scenario(output, scenario)
|
|
186
|
+
result.elapsed = elapsed
|
|
187
|
+
results.append(result)
|
|
188
|
+
verdict = _extract_json(output).get("verdict", "?") if _extract_json(output) else "?"
|
|
189
|
+
print(f" Verdict: {verdict} | Score: {result.score}/{result.max_score} ({elapsed:.1f}s)")
|
|
190
|
+
if not result.passed:
|
|
191
|
+
print(f" Missing: {[k for k, v in result.details.items() if v == 0]}")
|
|
192
|
+
return results
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def print_report(results: list[ScenarioResult]) -> None:
|
|
196
|
+
total = sum(r.score for r in results)
|
|
197
|
+
max_total = sum(r.max_score for r in results)
|
|
198
|
+
print(f"\n{'='*60}")
|
|
199
|
+
print(f"GOLDILOCKS ADVISOR EVAL REPORT")
|
|
200
|
+
print(f"{'='*60}")
|
|
201
|
+
for r in results:
|
|
202
|
+
status = "✓ PASS" if r.passed else "✗ FAIL"
|
|
203
|
+
print(f"\n{status} {r.scenario_id} {r.score}/{r.max_score}")
|
|
204
|
+
for key, val in r.details.items():
|
|
205
|
+
icon = "✓" if val > 0 else "✗"
|
|
206
|
+
print(f" {icon} {key}: {val}")
|
|
207
|
+
print(f"\n{'='*60}")
|
|
208
|
+
print(f"TOTAL: {total}/{max_total} ({100*total/max_total:.0f}%)")
|
|
209
|
+
print(f"{'='*60}")
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
if __name__ == "__main__":
|
|
213
|
+
results = run_all_scenarios()
|
|
214
|
+
print_report(results)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"id": "small_idea_enrichment_needed",
|
|
4
|
+
"description": "Idea is too thin — less than 2 stories, should return enrichment_needed",
|
|
5
|
+
"idea_text": "Add a cancel button to the settings page",
|
|
6
|
+
"repo_root": "/Users/devflow/repos/Spicy-Server",
|
|
7
|
+
"project_id": "proj_75f63d30",
|
|
8
|
+
"idea_id": "proj_75f63d30_idea_eval_001",
|
|
9
|
+
"expected_verdict": "enrichment_needed",
|
|
10
|
+
"scoring": {
|
|
11
|
+
"verdict_correct": 2,
|
|
12
|
+
"story_estimate_present": 1,
|
|
13
|
+
"enrichment_gaps_filled": 1
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "medium_idea_just_right",
|
|
18
|
+
"description": "Well-scoped medium idea in the companion chat domain — should return just_right with estimate 2-20",
|
|
19
|
+
"idea_text": "Add greeting customization — let users configure different greeting messages per time of day (morning, afternoon, evening) that the companion uses when starting a new conversation session.",
|
|
20
|
+
"repo_root": "/Users/devflow/repos/Spicy-Server",
|
|
21
|
+
"project_id": "proj_75f63d30",
|
|
22
|
+
"idea_id": "proj_75f63d30_idea_eval_002",
|
|
23
|
+
"expected_verdict": "just_right",
|
|
24
|
+
"scoring": {
|
|
25
|
+
"verdict_correct": 2,
|
|
26
|
+
"story_estimate_in_range": 1,
|
|
27
|
+
"cohesive_area_present": 1
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"id": "large_idea_split",
|
|
32
|
+
"description": "Very large multi-domain idea — should return split with split_suggestions",
|
|
33
|
+
"idea_text": "Rebuild the entire client portal with a new React frontend, PostgreSQL backend, move from REST to GraphQL, implement real-time WebSocket notifications, add multi-tenant billing with Stripe integration, SSO via SAML, and a mobile app via React Native",
|
|
34
|
+
"repo_root": "/Users/devflow/repos/Spicy-Server",
|
|
35
|
+
"project_id": "proj_75f63d30",
|
|
36
|
+
"idea_id": "proj_75f63d30_idea_eval_003",
|
|
37
|
+
"expected_verdict": "split",
|
|
38
|
+
"scoring": {
|
|
39
|
+
"verdict_correct": 2,
|
|
40
|
+
"split_suggestions_present": 2,
|
|
41
|
+
"story_estimate_over_20": 1
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"id": "medium_idea_just_right_variation",
|
|
46
|
+
"description": "Another well-scoped idea in the companion chat domain",
|
|
47
|
+
"idea_text": "Add persona tone controls so users can switch between formal, casual, and brief response styles for the companion chat.",
|
|
48
|
+
"repo_root": "/Users/devflow/repos/Spicy-Server",
|
|
49
|
+
"project_id": "proj_75f63d30",
|
|
50
|
+
"idea_id": "proj_75f63d30_idea_eval_004",
|
|
51
|
+
"expected_verdict": "just_right",
|
|
52
|
+
"scoring": {
|
|
53
|
+
"verdict_correct": 2,
|
|
54
|
+
"story_estimate_present": 1,
|
|
55
|
+
"reasoning_present": 1
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
]
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from devflow_engine.agentic_prompts import load_agentic_prompt_lines, load_agentic_prompt_text
|
|
11
|
+
from devflow_engine.devflow_state import _postgrest_request, _resolve_supabase_rest_config, publish_devflow_state
|
|
12
|
+
from devflow_engine.project_registry import find_project_for_repo_root
|
|
13
|
+
from devflow_engine.stores.execution_store import ExecutionStore
|
|
14
|
+
from devflow_engine.idea.sufficiency import load_idea_source
|
|
15
|
+
|
|
16
|
+
_REPO_ROOT = Path(__file__).resolve().parents[4]
|
|
17
|
+
DAG_ID = 'devin_chat_dag'
|
|
18
|
+
_CURRENT_STORE: ExecutionStore | None = None
|
|
19
|
+
_CURRENT_RUN_ID: str | None = None
|
|
20
|
+
_SOFTWARE_SIGNAL_TERMS = {'app','application','api','automation','backend','bug','dashboard','feature','frontend','integration','mobile','portal','saas','software','system','tool','ui','web','website','workflow'}
|
|
21
|
+
_INSIGHT_SIGNAL_TERMS = {'status','queue','worker','repo','code','logs','error','why','how','where','what','investigate','explain','architecture','runtime','process','pipeline','see','verify','find','search','look','check','locate'}
|
|
22
|
+
_NON_SOFTWARE_SIGNAL_TERMS = {'blog post','flyer','hiring','logo','marketing','newsletter','podcast','seo','social media','video script'}
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class DevinChatDagResult:
|
|
26
|
+
exit_code: int
|
|
27
|
+
run_id: str
|
|
28
|
+
pipeline_dir: Path
|
|
29
|
+
message: str
|
|
30
|
+
outcome: dict[str, Any]
|
|
31
|
+
|
|
32
|
+
def set_runtime_store(store: ExecutionStore | None, run_id: str | None) -> None:
|
|
33
|
+
global _CURRENT_STORE, _CURRENT_RUN_ID
|
|
34
|
+
_CURRENT_STORE = store
|
|
35
|
+
_CURRENT_RUN_ID = run_id
|
|
36
|
+
|
|
37
|
+
def store_run() -> tuple[ExecutionStore, str]:
|
|
38
|
+
if _CURRENT_STORE is None or _CURRENT_RUN_ID is None:
|
|
39
|
+
raise RuntimeError('devin chat dag missing runtime store/run_id')
|
|
40
|
+
return _CURRENT_STORE, _CURRENT_RUN_ID
|
|
41
|
+
|
|
42
|
+
def write_json(path: Path, payload: dict[str, Any]) -> None:
|
|
43
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
44
|
+
path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
45
|
+
|
|
46
|
+
def parse_list_option(planes: list[str]) -> list[str]:
|
|
47
|
+
return sorted({str(item).strip() for item in planes if str(item).strip()})
|
|
48
|
+
|
|
49
|
+
def stable_id(prefix: str, payload: Any, *, size: int = 12) -> str:
|
|
50
|
+
digest = hashlib.sha256(json.dumps(payload, sort_keys=True).encode('utf-8')).hexdigest()
|
|
51
|
+
return f'{prefix}{digest[:size]}'
|
|
52
|
+
|
|
53
|
+
def build_pipeline_key(*, repo_root: Path, idea_id: str, text: str | None, source_path: Path | None, max_stories: int, planes: list[str]) -> str:
|
|
54
|
+
raw_text, _ = load_idea_source(text=text, source_path=source_path)
|
|
55
|
+
return stable_id('run_', {'idea_id': idea_id, 'max_stories': max_stories, 'planes': parse_list_option(planes), 'repo_root': str(repo_root), 'raw_text': raw_text})
|
|
56
|
+
|
|
57
|
+
def pipeline_root(repo_root: Path, *, idea_id: str, pipeline_key: str) -> Path:
|
|
58
|
+
return repo_root / '.devflow' / 'ideas' / idea_id / 'pipelines' / DAG_ID / pipeline_key
|
|
59
|
+
|
|
60
|
+
def resolve_project_id(repo_root: Path, *, idea_id: str) -> str:
|
|
61
|
+
idea_json = repo_root / '.devflow' / 'ideas' / idea_id / 'idea.json'
|
|
62
|
+
if idea_json.exists():
|
|
63
|
+
try:
|
|
64
|
+
payload = json.loads(idea_json.read_text(encoding='utf-8'))
|
|
65
|
+
project_id = str(payload.get('project_id') or '').strip()
|
|
66
|
+
if project_id:
|
|
67
|
+
return project_id
|
|
68
|
+
except Exception:
|
|
69
|
+
pass
|
|
70
|
+
project_entry = find_project_for_repo_root(repo_root)
|
|
71
|
+
project_id = None if project_entry is None else str(project_entry.get('project_id') or '').strip()
|
|
72
|
+
return project_id or f'unregistered:{idea_id}'
|
|
73
|
+
|
|
74
|
+
def dfs_running(*, project_id: str, run_id: str, summary: str, idea_id: str) -> None:
|
|
75
|
+
publish_devflow_state(project_id=project_id, run_id=run_id, current_state='running', current_status='processing', run_summary=summary, display='project', display_path=f'idea:{idea_id}')
|
|
76
|
+
|
|
77
|
+
def dfs_node_running(*, project_id: str, run_id: str, node_id: str, summary: str, idea_id: str) -> None:
|
|
78
|
+
dfs_running(project_id=project_id, run_id=run_id, summary=f'{summary} [{node_id}]', idea_id=idea_id)
|
|
79
|
+
|
|
80
|
+
def dfs_terminal(*, project_id: str, run_id: str, summary: str, current_state: str, current_status: str, idea_id: str, error_message: str | None = None) -> None:
|
|
81
|
+
publish_devflow_state(project_id=project_id, run_id=run_id, current_state=current_state, current_status=current_status, run_summary=summary, error_message=error_message, display='project', display_path=f'idea:{idea_id}')
|
|
82
|
+
|
|
83
|
+
def _resolve_node_prompt_path(node_file: str) -> Path:
|
|
84
|
+
node_path = Path(node_file).resolve()
|
|
85
|
+
local_prompt = node_path.with_name('prompt.md')
|
|
86
|
+
if local_prompt.exists():
|
|
87
|
+
return local_prompt
|
|
88
|
+
parts = list(node_path.parts)
|
|
89
|
+
try:
|
|
90
|
+
nodes_idx = parts.index('nodes')
|
|
91
|
+
node_name = parts[nodes_idx + 1]
|
|
92
|
+
except (ValueError, IndexError) as exc:
|
|
93
|
+
raise FileNotFoundError(f'Could not resolve prompt path for node file: {node_file}') from exc
|
|
94
|
+
shared_prompt = _REPO_ROOT / 'prompts' / 'devin' / node_name / 'prompt.md'
|
|
95
|
+
if shared_prompt.exists():
|
|
96
|
+
return shared_prompt
|
|
97
|
+
raise FileNotFoundError(f'No prompt.md found for node file: {node_file}')
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def load_node_prompt_text(node_file: str) -> str:
|
|
101
|
+
return load_agentic_prompt_text(str(_resolve_node_prompt_path(node_file)))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def load_node_prompt_lines(node_file: str) -> list[str]:
|
|
105
|
+
return load_agentic_prompt_lines(str(_resolve_node_prompt_path(node_file)))
|
|
106
|
+
|
|
107
|
+
def classify_route(raw_text: str) -> tuple[str, dict[str, Any]]:
|
|
108
|
+
lowered = raw_text.lower()
|
|
109
|
+
software_hits = sorted(term for term in _SOFTWARE_SIGNAL_TERMS if term in lowered)
|
|
110
|
+
insight_hits = sorted(term for term in _INSIGHT_SIGNAL_TERMS if term in lowered)
|
|
111
|
+
non_software_hits = sorted(term for term in _NON_SOFTWARE_SIGNAL_TERMS if term in lowered)
|
|
112
|
+
if any(token in lowered for token in ('queue', 'worker', 'status', 'repo', 'codebase', 'how does', 'where is', 'why is', 'what is happening')):
|
|
113
|
+
route_arm = 'insight'; reason = 'project-specific operational or code/explanation request'
|
|
114
|
+
elif any(phrase in lowered for phrase in ('can you see', 'can you find', 'can you locate', 'please verify', 'can you check', 'do you see', 'do you have the file', 'are the files in')):
|
|
115
|
+
route_arm = 'insight'; reason = 'repo content verification request'
|
|
116
|
+
elif software_hits and len(software_hits) >= len(non_software_hits):
|
|
117
|
+
route_arm = 'ideation'; reason = 'software/product change request'
|
|
118
|
+
elif any(token in lowered for token in ('build', 'create', 'add', 'implement', 'ship', 'plan')) and not non_software_hits and not insight_hits:
|
|
119
|
+
route_arm = 'ideation'; reason = 'forward-looking implementation/planning request'
|
|
120
|
+
elif insight_hits and 'project' in lowered:
|
|
121
|
+
route_arm = 'insight'; reason = 'project-specific investigation request'
|
|
122
|
+
else:
|
|
123
|
+
route_arm = 'neither'; reason = 'not clearly project-specific software delivery work'
|
|
124
|
+
return route_arm, {'route_arm': route_arm, 'software_signals': software_hits, 'insight_signals': insight_hits, 'non_software_signals': non_software_hits, 'reason': reason, 'classifier': 'deterministic_rules_v1'}
|
|
125
|
+
|
|
126
|
+
def build_human_facing_message(*, response_payload: dict[str, Any]) -> str:
|
|
127
|
+
response_message = str(response_payload.get('response_message') or '').strip()
|
|
128
|
+
next_step = str(response_payload.get('suggested_next_step') or '').strip()
|
|
129
|
+
if next_step:
|
|
130
|
+
next_step_sentence = f"Next, you can {next_step[0].lower() + next_step[1:] if next_step and next_step[0].isupper() else next_step}."
|
|
131
|
+
else:
|
|
132
|
+
next_step_sentence = ''
|
|
133
|
+
parts = [part for part in [response_message, next_step_sentence] if part]
|
|
134
|
+
return (' '.join(parts).strip() or 'I finished that step.') + "\n"
|
|
135
|
+
|
|
136
|
+
def maybe_post_agent_message(*, row: dict[str, Any]) -> dict[str, Any]:
|
|
137
|
+
if os.environ.get('PYTEST_CURRENT_TEST'):
|
|
138
|
+
return {'id': 'test-agent-agent-message', **row}
|
|
139
|
+
config = _resolve_supabase_rest_config()
|
|
140
|
+
if config is None:
|
|
141
|
+
return {'id': 'local-only-agent-agent-message', **row, 'status': 'local_only'}
|
|
142
|
+
url, key = config
|
|
143
|
+
table_url = f'{url}/rest/v1/agent_agent_messages'
|
|
144
|
+
try:
|
|
145
|
+
result = _postgrest_request(
|
|
146
|
+
method='POST',
|
|
147
|
+
url=table_url,
|
|
148
|
+
key=key,
|
|
149
|
+
body=row,
|
|
150
|
+
prefer='return=representation',
|
|
151
|
+
)
|
|
152
|
+
if isinstance(result, list) and result:
|
|
153
|
+
return result[0]
|
|
154
|
+
return {'id': result.get('id', 'unknown'), **row, 'status': 'posted'}
|
|
155
|
+
except Exception:
|
|
156
|
+
return {'id': 'local-only-agent-agent-message', **row, 'status': 'local_only'}
|