devflow-engine 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devflow_engine/__init__.py +3 -0
- devflow_engine/agentic_prompts.py +100 -0
- devflow_engine/agentic_runtime.py +398 -0
- devflow_engine/api_key_flow_harness.py +539 -0
- devflow_engine/api_keys.py +357 -0
- devflow_engine/bootstrap/__init__.py +2 -0
- devflow_engine/bootstrap/provision_from_template.py +84 -0
- devflow_engine/cli/__init__.py +0 -0
- devflow_engine/cli/app.py +7270 -0
- devflow_engine/core/__init__.py +0 -0
- devflow_engine/core/config.py +86 -0
- devflow_engine/core/logging.py +29 -0
- devflow_engine/core/paths.py +45 -0
- devflow_engine/core/toml_kv.py +33 -0
- devflow_engine/devflow_event_worker.py +1292 -0
- devflow_engine/devflow_state.py +201 -0
- devflow_engine/devin2/__init__.py +9 -0
- devflow_engine/devin2/agent_definition.py +120 -0
- devflow_engine/devin2/pi_runner.py +204 -0
- devflow_engine/devin_orchestration.py +69 -0
- devflow_engine/docs/prompts/anti-patterns.md +42 -0
- devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
- devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
- devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
- devflow_engine/doctor/__init__.py +2 -0
- devflow_engine/doctor/triage.py +140 -0
- devflow_engine/error/__init__.py +0 -0
- devflow_engine/error/remediation.py +21 -0
- devflow_engine/errors/error_solver_dag.py +522 -0
- devflow_engine/errors/runtime_observability.py +67 -0
- devflow_engine/idea/__init__.py +4 -0
- devflow_engine/idea/actors.py +481 -0
- devflow_engine/idea/agentic.py +465 -0
- devflow_engine/idea/analyze.py +93 -0
- devflow_engine/idea/devin_chat_dag.py +1 -0
- devflow_engine/idea/diff.py +99 -0
- devflow_engine/idea/drafts.py +446 -0
- devflow_engine/idea/idea_creation_dag.py +643 -0
- devflow_engine/idea/ideation_enrichment.py +355 -0
- devflow_engine/idea/ideation_enrichment_worker.py +19 -0
- devflow_engine/idea/paths.py +28 -0
- devflow_engine/idea/promote.py +53 -0
- devflow_engine/idea/redaction.py +27 -0
- devflow_engine/idea/repo_tools.py +1277 -0
- devflow_engine/idea/response_mode.py +30 -0
- devflow_engine/idea/story_pipeline.py +1585 -0
- devflow_engine/idea/sufficiency.py +376 -0
- devflow_engine/idea/traditional_stories.py +1257 -0
- devflow_engine/implementation/__init__.py +0 -0
- devflow_engine/implementation/alembic_preflight.py +700 -0
- devflow_engine/implementation/dag.py +8450 -0
- devflow_engine/implementation/green_gate.py +93 -0
- devflow_engine/implementation/prompts.py +108 -0
- devflow_engine/implementation/test_runtime.py +623 -0
- devflow_engine/integration/__init__.py +19 -0
- devflow_engine/integration/agentic.py +66 -0
- devflow_engine/integration/dag.py +3539 -0
- devflow_engine/integration/prompts.py +114 -0
- devflow_engine/integration/supabase_schema.sql +31 -0
- devflow_engine/integration/supabase_sync.py +177 -0
- devflow_engine/llm/__init__.py +1 -0
- devflow_engine/llm/cli_one_shot.py +84 -0
- devflow_engine/llm/cli_stream.py +371 -0
- devflow_engine/llm/execution_context.py +26 -0
- devflow_engine/llm/invoke.py +1322 -0
- devflow_engine/llm/provider_api.py +304 -0
- devflow_engine/llm/repo_knowledge.py +588 -0
- devflow_engine/llm_primitives.py +315 -0
- devflow_engine/orchestration.py +62 -0
- devflow_engine/planning/__init__.py +0 -0
- devflow_engine/planning/analyze_repo.py +92 -0
- devflow_engine/planning/render_drafts.py +133 -0
- devflow_engine/playground/__init__.py +0 -0
- devflow_engine/playground/hooks.py +26 -0
- devflow_engine/playwright_workflow/__init__.py +5 -0
- devflow_engine/playwright_workflow/dag.py +1317 -0
- devflow_engine/process/__init__.py +5 -0
- devflow_engine/process/dag.py +59 -0
- devflow_engine/project_registration/__init__.py +3 -0
- devflow_engine/project_registration/dag.py +1581 -0
- devflow_engine/project_registry.py +109 -0
- devflow_engine/prompts/devin/generic/prompt.md +6 -0
- devflow_engine/prompts/devin/ideation/prompt.md +263 -0
- devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
- devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
- devflow_engine/prompts/devin/insight/prompt.md +11 -0
- devflow_engine/prompts/devin/insight/scenarios.md +5 -0
- devflow_engine/prompts/devin/intake/prompt.md +15 -0
- devflow_engine/prompts/devin/iterate/prompt.md +12 -0
- devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
- devflow_engine/prompts/devin/shared/principles.md +246 -0
- devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
- devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
- devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
- devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
- devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
- devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
- devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
- devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
- devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
- devflow_engine/prompts/implementation/red/prompt.md +27 -0
- devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
- devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
- devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
- devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
- devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
- devflow_engine/prompts/integration/README.md +185 -0
- devflow_engine/prompts/integration/green/example.md +67 -0
- devflow_engine/prompts/integration/green/green/prompt.md +10 -0
- devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
- devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
- devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
- devflow_engine/prompts/integration/green_enrich/example.md +79 -0
- devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
- devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
- devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
- devflow_engine/prompts/integration/red/example.md +152 -0
- devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
- devflow_engine/prompts/integration/red/red/prompt.md +11 -0
- devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
- devflow_engine/prompts/integration/red_review/example.md +71 -0
- devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
- devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
- devflow_engine/prompts/integration/resolve/example.md +111 -0
- devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
- devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
- devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
- devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
- devflow_engine/prompts/integration/validate/example.md +143 -0
- devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
- devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
- devflow_engine/prompts/integration/write_workflows/example.md +100 -0
- devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
- devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
- devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
- devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
- devflow_engine/prompts/iterate/README.md +7 -0
- devflow_engine/prompts/iterate/coder/prompt.md +11 -0
- devflow_engine/prompts/iterate/framer/prompt.md +11 -0
- devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
- devflow_engine/prompts/iterate/observer/prompt.md +11 -0
- devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
- devflow_engine/prompts/recovery/execution/prompt.md +8 -0
- devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
- devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
- devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
- devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
- devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
- devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
- devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
- devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
- devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
- devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
- devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
- devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
- devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
- devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
- devflow_engine/recovery/__init__.py +3 -0
- devflow_engine/recovery/dag.py +2609 -0
- devflow_engine/recovery/models.py +220 -0
- devflow_engine/refactor.py +93 -0
- devflow_engine/registry/__init__.py +1 -0
- devflow_engine/registry/cards.py +238 -0
- devflow_engine/registry/domain_normalize.py +60 -0
- devflow_engine/registry/effects.py +65 -0
- devflow_engine/registry/enforce_report.py +150 -0
- devflow_engine/registry/module_cards_classify.py +164 -0
- devflow_engine/registry/module_cards_draft.py +184 -0
- devflow_engine/registry/module_cards_gate.py +59 -0
- devflow_engine/registry/packages.py +347 -0
- devflow_engine/registry/pathways.py +323 -0
- devflow_engine/review/__init__.py +11 -0
- devflow_engine/review/dag.py +588 -0
- devflow_engine/review/review_story.py +67 -0
- devflow_engine/scope_idea/__init__.py +3 -0
- devflow_engine/scope_idea/agentic.py +39 -0
- devflow_engine/scope_idea/dag.py +1069 -0
- devflow_engine/scope_idea/models.py +175 -0
- devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
- devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
- devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
- devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
- devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
- devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
- devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
- devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
- devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
- devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
- devflow_engine/skills/registry.example.yaml +42 -0
- devflow_engine/source_doc_assumptions.py +291 -0
- devflow_engine/source_doc_mutation_dag.py +1606 -0
- devflow_engine/source_doc_mutation_eval.py +417 -0
- devflow_engine/source_doc_mutation_worker.py +25 -0
- devflow_engine/source_docs_schema.py +207 -0
- devflow_engine/source_docs_updater.py +309 -0
- devflow_engine/source_scope/__init__.py +15 -0
- devflow_engine/source_scope/agentic.py +45 -0
- devflow_engine/source_scope/dag.py +1626 -0
- devflow_engine/source_scope/models.py +177 -0
- devflow_engine/stores/__init__.py +0 -0
- devflow_engine/stores/execution_store.py +3534 -0
- devflow_engine/story/__init__.py +0 -0
- devflow_engine/story/contracts.py +160 -0
- devflow_engine/story/discovery.py +47 -0
- devflow_engine/story/evidence.py +118 -0
- devflow_engine/story/hashing.py +27 -0
- devflow_engine/story/implemented_queue_purge.py +148 -0
- devflow_engine/story/indexer.py +105 -0
- devflow_engine/story/io.py +20 -0
- devflow_engine/story/markdown_contracts.py +298 -0
- devflow_engine/story/reconciliation.py +408 -0
- devflow_engine/story/validate_stories.py +149 -0
- devflow_engine/story/validate_tests_story.py +512 -0
- devflow_engine/story/validation.py +133 -0
- devflow_engine/ui_grounding/__init__.py +11 -0
- devflow_engine/ui_grounding/agentic.py +31 -0
- devflow_engine/ui_grounding/dag.py +874 -0
- devflow_engine/ui_grounding/models.py +224 -0
- devflow_engine/ui_grounding/pencil_bridge.py +247 -0
- devflow_engine/vendor/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
- devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
- devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
- devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
- devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
- devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
- devflow_engine/worker.py +1086 -0
- devflow_engine/worker_guard.py +233 -0
- devflow_engine-1.0.0.dist-info/METADATA +235 -0
- devflow_engine-1.0.0.dist-info/RECORD +393 -0
- devflow_engine-1.0.0.dist-info/WHEEL +4 -0
- devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
- devin/__init__.py +6 -0
- devin/dag.py +58 -0
- devin/dag_two_arm.py +138 -0
- devin/devin_chat_scenario_catalog.json +588 -0
- devin/devin_eval.py +677 -0
- devin/nodes/__init__.py +0 -0
- devin/nodes/ideation/__init__.py +0 -0
- devin/nodes/ideation/node.py +195 -0
- devin/nodes/ideation/playground.py +267 -0
- devin/nodes/ideation/prompt.md +65 -0
- devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
- devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
- devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
- devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
- devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
- devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
- devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
- devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
- devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
- devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
- devin/nodes/ideation/scenarios/vague_idea.py +16 -0
- devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
- devin/nodes/ideation/tools.json +312 -0
- devin/nodes/insight/__init__.py +0 -0
- devin/nodes/insight/node.py +49 -0
- devin/nodes/insight/playground.py +154 -0
- devin/nodes/insight/prompt.md +61 -0
- devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
- devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
- devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
- devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
- devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
- devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
- devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
- devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
- devin/nodes/insight/scenarios/operational_debugging.py +15 -0
- devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
- devin/nodes/insight/scenarios/operational_question.py +9 -0
- devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
- devin/nodes/insight/scenarios/queue_status.py +15 -0
- devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
- devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
- devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
- devin/nodes/insight/scenarios/worker_state_check.py +15 -0
- devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
- devin/nodes/insight/tools.json +126 -0
- devin/nodes/intake/__init__.py +0 -0
- devin/nodes/intake/node.py +27 -0
- devin/nodes/intake/playground.py +47 -0
- devin/nodes/intake/prompt.md +12 -0
- devin/nodes/intake/scenarios/ideation_routing.py +4 -0
- devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
- devin/nodes/intake/scenarios/insight_routing.py +4 -0
- devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
- devin/nodes/iterate/README.md +44 -0
- devin/nodes/iterate/__init__.py +1 -0
- devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
- devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
- devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
- devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
- devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
- devin/nodes/iterate/agent-roles.md +89 -0
- devin/nodes/iterate/agents/README.md +10 -0
- devin/nodes/iterate/artifacts.md +504 -0
- devin/nodes/iterate/contract.md +100 -0
- devin/nodes/iterate/eval-plan.md +74 -0
- devin/nodes/iterate/node.py +100 -0
- devin/nodes/iterate/pipeline/README.md +13 -0
- devin/nodes/iterate/playground-contract.md +76 -0
- devin/nodes/iterate/prompt.md +11 -0
- devin/nodes/iterate/scenarios/README.md +38 -0
- devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
- devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
- devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
- devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
- devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
- devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
- devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
- devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
- devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
- devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
- devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
- devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
- devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
- devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
- devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
- devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
- devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
- devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
- devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
- devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
- devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
- devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
- devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
- devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
- devin/nodes/shared/__init__.py +0 -0
- devin/nodes/shared/filemaker_expert.md +80 -0
- devin/nodes/shared/filemaker_expert.py +354 -0
- devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
- devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
- devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
- devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
- devin/nodes/shared/helpers.py +156 -0
- devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
- devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
- devin/nodes/shared/models.py +44 -0
- devin/nodes/shared/post.py +40 -0
- devin/nodes/shared/router.py +107 -0
- devin/nodes/shared/tools.py +191 -0
- devin/shared/devin-chat-rubric.md +237 -0
- devin/shared/devin-chat-scenario-suite.md +90 -0
- devin/shared/eval_doctrine.md +9 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""Eval harness for Idea Compliance Advisor.
|
|
2
|
+
|
|
3
|
+
Runs the idea_compliance_check advisor through the PI subprocess (matching the
|
|
4
|
+
Pi-Pi pattern in devflow-tools.ts) against scenario fixtures and scores output.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python3 -m devin.nodes.shared.idea_compliance_advisor_eval.runner
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import re
|
|
14
|
+
import subprocess
|
|
15
|
+
import time
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
_PROJECT_ROOT = Path(__file__).resolve().parents[5]
|
|
20
|
+
_AGENT_FILE = ".pi/agents/idea-compliance-advisor/idea-compliance-advisor.md"
|
|
21
|
+
_DEFAULT_REPO_ROOT = "/Users/devflow/repos/Spicy-Server"
|
|
22
|
+
_DEFAULT_PROJECT = "proj_75f63d30"
|
|
23
|
+
_DEFAULT_MODEL = "minimax/MiniMax-M2.7"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class ScenarioResult:
|
|
28
|
+
scenario_id: str
|
|
29
|
+
passed: bool
|
|
30
|
+
score: int
|
|
31
|
+
max_score: int
|
|
32
|
+
output: str
|
|
33
|
+
details: dict[str, int]
|
|
34
|
+
elapsed: float
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def load_agent_prompt() -> str:
|
|
38
|
+
content = _PROJECT_ROOT.joinpath(_AGENT_FILE).read_text()
|
|
39
|
+
match = re.match(r"^---\n[\s\S]*?\n---\n([\s\S]*)$", content, re.M)
|
|
40
|
+
return match.group(1).strip() if match else content
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def run_compliance_check(
|
|
44
|
+
idea_text: str,
|
|
45
|
+
idea_id: str,
|
|
46
|
+
repo_root: str = _DEFAULT_REPO_ROOT,
|
|
47
|
+
project_id: str = _DEFAULT_PROJECT,
|
|
48
|
+
model: str = _DEFAULT_MODEL,
|
|
49
|
+
timeout: int = 180,
|
|
50
|
+
) -> tuple[str, float]:
|
|
51
|
+
system_prompt = load_agent_prompt()
|
|
52
|
+
context_block = (
|
|
53
|
+
f"Context: repo_root={repo_root}, project_id={project_id}, idea_id={idea_id}\n\n"
|
|
54
|
+
f"Shaped Idea:\n{idea_text}\n\n"
|
|
55
|
+
f"Question: Is this idea compliant?"
|
|
56
|
+
)
|
|
57
|
+
args = [
|
|
58
|
+
"pi", "--mode", "text", "--no-session", "--no-extensions",
|
|
59
|
+
"--model", model, "--tools", "read,grep,find,ls", "--thinking", "off",
|
|
60
|
+
"--append-system-prompt", system_prompt + "\n\n" + context_block,
|
|
61
|
+
f"Validate idea compliance for: {idea_text}",
|
|
62
|
+
]
|
|
63
|
+
start = time.time()
|
|
64
|
+
proc = subprocess.Popen(
|
|
65
|
+
args, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
|
66
|
+
stderr=subprocess.PIPE, text=True, cwd=str(Path.home()),
|
|
67
|
+
)
|
|
68
|
+
stdout, _ = proc.communicate(timeout=timeout)
|
|
69
|
+
elapsed = time.time() - start
|
|
70
|
+
return stdout.strip(), elapsed
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _extract_json(text: str) -> dict | None:
|
|
74
|
+
match = re.search(r"```json\s*([\s\S]+?)\s*```", text)
|
|
75
|
+
if match:
|
|
76
|
+
try:
|
|
77
|
+
return json.loads(match.group(1))
|
|
78
|
+
except json.JSONDecodeError:
|
|
79
|
+
pass
|
|
80
|
+
try:
|
|
81
|
+
return json.loads(text)
|
|
82
|
+
except json.JSONDecodeError:
|
|
83
|
+
pass
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _extract_verdict(text: str) -> str | None:
|
|
88
|
+
parsed = _extract_json(text)
|
|
89
|
+
if parsed and "verdict" in parsed:
|
|
90
|
+
return parsed["verdict"]
|
|
91
|
+
match = re.search(r"Verdict[:\s]+([A-Z_]+)", text)
|
|
92
|
+
if match:
|
|
93
|
+
return match.group(1)
|
|
94
|
+
match = re.search(r"\*\*(COMPLIANT|NON_COMPLIANT|NEEDS_REVIEW)\*\*", text)
|
|
95
|
+
if match:
|
|
96
|
+
return match.group(1)
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def score_scenario(output: str, scenario: dict) -> ScenarioResult:
|
|
101
|
+
score = 0
|
|
102
|
+
max_score = sum(scenario["scoring"].values())
|
|
103
|
+
details = {}
|
|
104
|
+
parsed = _extract_json(output)
|
|
105
|
+
expected_verdict = scenario["expected_verdict"]
|
|
106
|
+
|
|
107
|
+
verdict = (parsed["verdict"] if parsed and "verdict" in parsed else
|
|
108
|
+
_extract_verdict(output)) or "?"
|
|
109
|
+
|
|
110
|
+
# Verdict correct (always scored)
|
|
111
|
+
if verdict == expected_verdict:
|
|
112
|
+
details["verdict_correct"] = scenario["scoring"].get("verdict_correct", 0)
|
|
113
|
+
score += details["verdict_correct"]
|
|
114
|
+
else:
|
|
115
|
+
details["verdict_correct"] = 0
|
|
116
|
+
|
|
117
|
+
if not parsed:
|
|
118
|
+
return ScenarioResult(
|
|
119
|
+
scenario_id=scenario["id"],
|
|
120
|
+
passed=score == max_score,
|
|
121
|
+
score=score,
|
|
122
|
+
max_score=max_score,
|
|
123
|
+
output=output[:500],
|
|
124
|
+
details=details,
|
|
125
|
+
elapsed=0.0,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
def gate_status(key: str) -> str:
|
|
129
|
+
val = parsed.get(key, {})
|
|
130
|
+
if isinstance(val, dict):
|
|
131
|
+
return val.get("status", "")
|
|
132
|
+
return ""
|
|
133
|
+
|
|
134
|
+
# --- COMPLIANT scoring: all gates pass ---
|
|
135
|
+
if expected_verdict == "COMPLIANT":
|
|
136
|
+
gate_checks = [
|
|
137
|
+
("gate_1_architecture", "pass", "gate_1_architecture_pass"),
|
|
138
|
+
("gate_2_methods", "pass", "gate_2_methods_pass"),
|
|
139
|
+
("gate_3_story_dup", "pass", "gate_3_story_dup_pass"),
|
|
140
|
+
("gate_4_idea_dup", "pass", "gate_4_idea_dup_pass"),
|
|
141
|
+
("gate_5_naming", "pass", "gate_5_naming_pass"),
|
|
142
|
+
]
|
|
143
|
+
for gate_key, expected_status, score_key in gate_checks:
|
|
144
|
+
actual = gate_status(gate_key)
|
|
145
|
+
if actual == expected_status:
|
|
146
|
+
details[score_key] = scenario["scoring"].get(score_key, 0)
|
|
147
|
+
score += details[score_key]
|
|
148
|
+
else:
|
|
149
|
+
details[score_key] = 0
|
|
150
|
+
|
|
151
|
+
# --- NON_COMPLIANT scoring: hard gate violation ---
|
|
152
|
+
elif expected_verdict == "NON_COMPLIANT":
|
|
153
|
+
if gate_status("gate_1_architecture") == "fail":
|
|
154
|
+
details["gate_1_architecture_fail"] = scenario["scoring"].get(
|
|
155
|
+
"gate_1_architecture_fail", 0)
|
|
156
|
+
score += details["gate_1_architecture_fail"]
|
|
157
|
+
if gate_status("gate_2_methods") == "fail":
|
|
158
|
+
details["gate_2_methods_fail_tier3"] = scenario["scoring"].get(
|
|
159
|
+
"gate_2_methods_fail_tier3", 0)
|
|
160
|
+
score += details["gate_2_methods_fail_tier3"]
|
|
161
|
+
if gate_status("gate_3_story_dup") == "fail":
|
|
162
|
+
details["gate_3_story_dup_fail"] = scenario["scoring"].get(
|
|
163
|
+
"gate_3_story_dup_fail", 0)
|
|
164
|
+
score += details["gate_3_story_dup_fail"]
|
|
165
|
+
if gate_status("gate_4_idea_dup") == "fail":
|
|
166
|
+
details["gate_4_idea_dup_fail"] = scenario["scoring"].get(
|
|
167
|
+
"gate_4_idea_dup_fail", 0)
|
|
168
|
+
score += details["gate_4_idea_dup_fail"]
|
|
169
|
+
|
|
170
|
+
# --- NEEDS_REVIEW scoring ---
|
|
171
|
+
elif expected_verdict == "NEEDS_REVIEW":
|
|
172
|
+
if gate_status("gate_2_methods") == "needs_review":
|
|
173
|
+
details["gate_2_needs_review_tier2"] = scenario["scoring"].get(
|
|
174
|
+
"gate_2_needs_review_tier2", 0)
|
|
175
|
+
score += details["gate_2_needs_review_tier2"]
|
|
176
|
+
if gate_status("gate_4_idea_dup") == "needs_review":
|
|
177
|
+
details["gate_4_needs_review"] = scenario["scoring"].get(
|
|
178
|
+
"gate_4_needs_review", 0)
|
|
179
|
+
score += details["gate_4_needs_review"]
|
|
180
|
+
if gate_status("gate_5_naming") == "needs_review":
|
|
181
|
+
details["gate_5_needs_review"] = scenario["scoring"].get(
|
|
182
|
+
"gate_5_needs_review", 0)
|
|
183
|
+
score += details["gate_5_needs_review"]
|
|
184
|
+
|
|
185
|
+
# Issues checks (any issues present scores)
|
|
186
|
+
issues = parsed.get("issues", [])
|
|
187
|
+
issues_checks = [
|
|
188
|
+
"issues_specific",
|
|
189
|
+
"issues_describes_package_needed",
|
|
190
|
+
"issues_describes_overlap",
|
|
191
|
+
"issues_naming_noted",
|
|
192
|
+
]
|
|
193
|
+
for k in issues_checks:
|
|
194
|
+
if k in scenario["scoring"] and any(issues):
|
|
195
|
+
details[k] = scenario["scoring"].get(k, 0)
|
|
196
|
+
score += details[k]
|
|
197
|
+
break
|
|
198
|
+
|
|
199
|
+
return ScenarioResult(
|
|
200
|
+
scenario_id=scenario["id"],
|
|
201
|
+
passed=score == max_score,
|
|
202
|
+
score=score,
|
|
203
|
+
max_score=max_score,
|
|
204
|
+
output=output[:500],
|
|
205
|
+
details=details,
|
|
206
|
+
elapsed=0.0,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def run_all_scenarios(scenarios_path: str | Path | None = None) -> list[ScenarioResult]:
|
|
211
|
+
if scenarios_path is None:
|
|
212
|
+
scenarios_path = Path(__file__).parent / "scenarios.json"
|
|
213
|
+
scenarios = json.loads(Path(scenarios_path).read_text())
|
|
214
|
+
results = []
|
|
215
|
+
for scenario in scenarios:
|
|
216
|
+
print(f"\nRunning: {scenario['id']}")
|
|
217
|
+
output, elapsed = run_compliance_check(
|
|
218
|
+
idea_text=scenario["idea_text"],
|
|
219
|
+
idea_id=scenario["idea_id"],
|
|
220
|
+
repo_root=scenario["repo_root"],
|
|
221
|
+
project_id=scenario["project_id"],
|
|
222
|
+
)
|
|
223
|
+
result = score_scenario(output, scenario)
|
|
224
|
+
result.elapsed = elapsed
|
|
225
|
+
results.append(result)
|
|
226
|
+
verdict = _extract_verdict(output) or "?"
|
|
227
|
+
print(f" Verdict: {verdict} | Score: {result.score}/{result.max_score} ({elapsed:.1f}s)")
|
|
228
|
+
if not result.passed:
|
|
229
|
+
print(f" Missing: {[k for k, v in result.details.items() if v == 0]}")
|
|
230
|
+
return results
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def print_report(results: list[ScenarioResult]) -> None:
|
|
234
|
+
total = sum(r.score for r in results)
|
|
235
|
+
max_total = sum(r.max_score for r in results)
|
|
236
|
+
print(f"\n{'='*60}")
|
|
237
|
+
print(f"IDEA COMPLIANCE ADVISOR EVAL REPORT")
|
|
238
|
+
print(f"{'='*60}")
|
|
239
|
+
for r in results:
|
|
240
|
+
status = "✓ PASS" if r.passed else "✗ FAIL"
|
|
241
|
+
print(f"\n{status} {r.scenario_id} {r.score}/{r.max_score}")
|
|
242
|
+
for key, val in r.details.items():
|
|
243
|
+
icon = "✓" if val > 0 else "✗"
|
|
244
|
+
print(f" {icon} {key}: {val}")
|
|
245
|
+
print(f"\n{'='*60}")
|
|
246
|
+
print(f"TOTAL: {total}/{max_total} ({100*total/max_total:.0f}%)")
|
|
247
|
+
print(f"{'='*60}")
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
if __name__ == "__main__":
|
|
251
|
+
results = run_all_scenarios()
|
|
252
|
+
print_report(results)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"id": "compliant_memory_feature",
|
|
4
|
+
"description": "Memory summarization fits companion chat domain — uses existing pydantic-ai episodic pattern, no new package — all gates pass",
|
|
5
|
+
"idea_text": "Add a memory summarization feature that periodically condenses older conversation turns in the companion chat to save context window space while preserving key facts",
|
|
6
|
+
"idea_id": "proj_75f63d30_idea_eval_c001",
|
|
7
|
+
"repo_root": "/Users/devflow/repos/Spicy-Server",
|
|
8
|
+
"project_id": "proj_75f63d30",
|
|
9
|
+
"expected_verdict": "COMPLIANT",
|
|
10
|
+
"scoring": {
|
|
11
|
+
"verdict_correct": 2,
|
|
12
|
+
"gate_1_architecture_pass": 1,
|
|
13
|
+
"gate_2_methods_pass": 1,
|
|
14
|
+
"gate_3_story_dup_pass": 1,
|
|
15
|
+
"gate_4_idea_dup_pass": 1,
|
|
16
|
+
"gate_5_naming_pass": 1
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"id": "non_compliant_billing_portal",
|
|
21
|
+
"description": "Invoice/portal system is fundamental architecture violation for companion chat — NON_COMPLIANT on gate 1",
|
|
22
|
+
"idea_text": "Add an invoice management system with PDF generation, payment tracking, and overdue reminder emails for clients",
|
|
23
|
+
"idea_id": "proj_75f63d30_idea_eval_nc001",
|
|
24
|
+
"repo_root": "/Users/devflow/repos/Spicy-Server",
|
|
25
|
+
"project_id": "proj_75f63d30",
|
|
26
|
+
"expected_verdict": "NON_COMPLIANT",
|
|
27
|
+
"scoring": {
|
|
28
|
+
"verdict_correct": 2,
|
|
29
|
+
"gate_1_architecture_fail": 1,
|
|
30
|
+
"issues_specific": 1
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"id": "needs_review_new_pkg_sentiment",
|
|
35
|
+
"description": "Sentiment analysis is a new domain for companion chat, not replicating existing patterns — NEEDS_REVIEW Tier 2 on gate 2",
|
|
36
|
+
"idea_text": "Add a new gating context type that evaluates user sentiment before allowing companion responses above a certain emotional intensity threshold",
|
|
37
|
+
"idea_id": "proj_75f63d30_idea_eval_c002",
|
|
38
|
+
"repo_root": "/Users/devflow/repos/Spicy-Server",
|
|
39
|
+
"project_id": "proj_75f63d30",
|
|
40
|
+
"expected_verdict": "NEEDS_REVIEW",
|
|
41
|
+
"scoring": {
|
|
42
|
+
"verdict_correct": 2,
|
|
43
|
+
"gate_2_needs_review_tier2": 1,
|
|
44
|
+
"issues_describes_package_needed": 1
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"id": "non_compliant_stripe_duplicates_existing",
|
|
49
|
+
"description": "Spicy-Server already has payment infrastructure — adding Stripe replicates existing pattern — NON_COMPLIANT Tier 3",
|
|
50
|
+
"idea_text": "Add Stripe payment processing so users can upgrade their subscription tier directly from the app",
|
|
51
|
+
"idea_id": "proj_75f63d30_idea_eval_nc002",
|
|
52
|
+
"repo_root": "/Users/devflow/repos/Spicy-Server",
|
|
53
|
+
"project_id": "proj_75f63d30",
|
|
54
|
+
"expected_verdict": "NON_COMPLIANT",
|
|
55
|
+
"scoring": {
|
|
56
|
+
"verdict_correct": 2,
|
|
57
|
+
"gate_2_methods_fail_tier3": 1,
|
|
58
|
+
"issues_specific": 1
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
"id": "needs_review_naming_collision",
|
|
63
|
+
"description": "Idea has a naming collision that needs resolution — NEEDS_REVIEW on gate 5",
|
|
64
|
+
"idea_text": "Add a module called SentimentAnalyzer that processes user messages and returns emotional intensity scores",
|
|
65
|
+
"idea_id": "proj_75f63d30_idea_eval_nr002",
|
|
66
|
+
"repo_root": "/Users/devflow/repos/Spicy-Server",
|
|
67
|
+
"project_id": "proj_75f63d30",
|
|
68
|
+
"expected_verdict": "NEEDS_REVIEW",
|
|
69
|
+
"scoring": {
|
|
70
|
+
"verdict_correct": 2,
|
|
71
|
+
"gate_5_needs_review": 1,
|
|
72
|
+
"issues_naming_noted": 1
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
]
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
RouteArm = Literal['ideation', 'insight', 'iterate', 'neither']
|
|
8
|
+
|
|
9
|
+
class DevinChatDagEvent(BaseModel):
|
|
10
|
+
repo_root: str
|
|
11
|
+
idea_id: str
|
|
12
|
+
raw_text: str | None = None
|
|
13
|
+
source_path: str | None = None
|
|
14
|
+
max_stories: int = 0
|
|
15
|
+
planes: list[str] = Field(default_factory=list)
|
|
16
|
+
pipeline_key: str
|
|
17
|
+
source_session_id: str | None = None # original session_id from the incoming message row
|
|
18
|
+
|
|
19
|
+
class DevinAgentResponse(BaseModel):
|
|
20
|
+
response_message: str = Field(min_length=1)
|
|
21
|
+
response_kind: Literal[
|
|
22
|
+
'redirect',
|
|
23
|
+
'needs_clarification',
|
|
24
|
+
'ready_for_downstream',
|
|
25
|
+
'ideation_contract_response',
|
|
26
|
+
'insight_response',
|
|
27
|
+
'operational_alert',
|
|
28
|
+
'needs_context',
|
|
29
|
+
'completed',
|
|
30
|
+
'blocked',
|
|
31
|
+
'needs_more_context',
|
|
32
|
+
'promote_to_idea',
|
|
33
|
+
'route_to_insight',
|
|
34
|
+
]
|
|
35
|
+
suggested_next_step: str = Field(min_length=1)
|
|
36
|
+
follow_up_questions: list[str] = Field(default_factory=list)
|
|
37
|
+
style_notes: list[str] = Field(default_factory=list)
|
|
38
|
+
|
|
39
|
+
class ScenarioResult(BaseModel):
|
|
40
|
+
scenario_name: str
|
|
41
|
+
passed: bool
|
|
42
|
+
actual_output: dict[str, Any]
|
|
43
|
+
expected_behavior: dict[str, Any]
|
|
44
|
+
notes: list[str] = Field(default_factory=list)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from devflow_engine.vendor.datalumina_genai.core.nodes.base import Node
|
|
7
|
+
from devflow_engine.vendor.datalumina_genai.core.task import TaskContext
|
|
8
|
+
from .helpers import build_human_facing_message, dfs_terminal, maybe_post_agent_message, pipeline_root, resolve_project_id, store_run, write_json
|
|
9
|
+
|
|
10
|
+
class DevinResponsePostNode(Node):
|
|
11
|
+
async def process(self, task_context: TaskContext) -> TaskContext:
|
|
12
|
+
event = task_context.event
|
|
13
|
+
repo_root = Path(event.repo_root)
|
|
14
|
+
store, run_id = store_run()
|
|
15
|
+
node_exec_id = store.create_node_attempt(run_id=run_id, node_id='devin_response_post', node_name='DevinResponsePost', attempt=1)
|
|
16
|
+
raw_project_id = str(task_context.metadata.get('project_id') or resolve_project_id(repo_root, idea_id=event.idea_id))
|
|
17
|
+
# Use None for project_id if it's not a valid UUID (i.e., unregistered)
|
|
18
|
+
try:
|
|
19
|
+
uuid.UUID(raw_project_id)
|
|
20
|
+
project_id = raw_project_id
|
|
21
|
+
except Exception:
|
|
22
|
+
project_id = None
|
|
23
|
+
response_payload = dict(task_context.metadata.get('response_guidance') or {})
|
|
24
|
+
if not response_payload:
|
|
25
|
+
raise RuntimeError('missing response payload for DevinResponsePostNode')
|
|
26
|
+
session_id = event.source_session_id or f'idea:{raw_project_id}:{event.idea_id}'
|
|
27
|
+
row = {'session_id': session_id, 'from_agent': 'devin', 'to_agent': 'charlie', 'message': str(response_payload.get('response_message') or '').strip(), 'metadata': {'project_id': raw_project_id, 'idea_id': event.idea_id, 'pipeline_key': event.pipeline_key, 'response_kind': response_payload.get('response_kind'), 'suggested_next_step': response_payload.get('suggested_next_step'), 'follow_up_questions': list(response_payload.get('follow_up_questions') or [])}, 'project_id': project_id, 'message_type': 'reply'}
|
|
28
|
+
created = maybe_post_agent_message(row=row)
|
|
29
|
+
post_path = pipeline_root(repo_root, idea_id=event.idea_id, pipeline_key=event.pipeline_key) / 'devin_response_post.json'
|
|
30
|
+
post_payload = {'request': row, 'response_record': created}
|
|
31
|
+
write_json(post_path, post_payload)
|
|
32
|
+
store.add_artifact(run_id=run_id, node_exec_id=node_exec_id, kind='idea_devin_response_post', uri=str(post_path), metadata={'session_id': session_id, 'response_kind': response_payload.get('response_kind')})
|
|
33
|
+
store.mark_node_finished(node_exec_id=node_exec_id, status='succeeded', output={'session_id': session_id, 'response_kind': response_payload.get('response_kind')})
|
|
34
|
+
task_context.metadata['response_post'] = post_payload
|
|
35
|
+
task_context.metadata['outcome'] = {'status': response_payload.get('response_kind') or 'completed', 'response_post': post_payload, **response_payload}
|
|
36
|
+
task_context.metadata['message'] = build_human_facing_message(response_payload=response_payload)
|
|
37
|
+
task_context.metadata['exit_code'] = 0
|
|
38
|
+
dfs_terminal(project_id=project_id, run_id=run_id, summary='Devin response completed', current_state='completed', current_status='completed', idea_id=event.idea_id)
|
|
39
|
+
task_context.stop_workflow()
|
|
40
|
+
return task_context
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from devflow_engine.llm.invoke import LlmInvocationRequest, invoke_llm
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def llm_route(*, raw_text: str, repo_root: Path, project_id: str | None = None) -> tuple[str, dict[str, Any]]:
|
|
10
|
+
"""Route a user message to insight, ideation, or neither using a lightweight LLM call.
|
|
11
|
+
|
|
12
|
+
Falls back to deterministic classification if the LLM call fails or times out.
|
|
13
|
+
"""
|
|
14
|
+
prompt = f"""You are a router for a Devin two-arm agent system. Read the user's message and decide which arm should handle it.
|
|
15
|
+
|
|
16
|
+
ARM DEFINITIONS:
|
|
17
|
+
- insight: questions about the current state of the codebase, repo, project, queues, workers, architecture, runtime behavior, or "can you see/find/verify" requests. Questions that need file-reading or system inspection.
|
|
18
|
+
- iterate: bounded implementation requests on an existing surface, like fixing a concrete bug, making a small behavior change, or tightening a specific UI on an existing page or workflow.
|
|
19
|
+
- ideation: forward-looking requests about building new features, adding/changing/creating software, planning implementations, shaping product requirements. Requests that shape or plan software changes.
|
|
20
|
+
- neither: anything that is not about software delivery or DevFlow operations (e.g., casual conversation, non-software topics).
|
|
21
|
+
|
|
22
|
+
USER MESSAGE:
|
|
23
|
+
{raw_text}
|
|
24
|
+
|
|
25
|
+
Respond with ONLY a JSON object:
|
|
26
|
+
{{"route_arm": "insight" | "iterate" | "ideation" | "neither", "reason": "one sentence explanation"}}""" # noqa: E501
|
|
27
|
+
|
|
28
|
+
prompt_payload = {
|
|
29
|
+
"raw_text": raw_text,
|
|
30
|
+
"repo_root": str(repo_root),
|
|
31
|
+
"project_id": project_id,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
result = invoke_llm(
|
|
36
|
+
LlmInvocationRequest(
|
|
37
|
+
purpose="devin_route_arm",
|
|
38
|
+
repo_root=repo_root,
|
|
39
|
+
prompt=prompt,
|
|
40
|
+
prompt_payload=prompt_payload,
|
|
41
|
+
delivery_model="final_only",
|
|
42
|
+
interaction_model="request_response",
|
|
43
|
+
response_contract="json_only",
|
|
44
|
+
timeout_seconds=15,
|
|
45
|
+
strength="light",
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
if result.ok and result.parsed_json:
|
|
49
|
+
arm = str(result.parsed_json.get("route_arm") or "neither").strip().lower()
|
|
50
|
+
if arm not in ("insight", "iterate", "ideation", "neither"):
|
|
51
|
+
arm = "neither"
|
|
52
|
+
reason = str(result.parsed_json.get("reason") or "")
|
|
53
|
+
return arm, {
|
|
54
|
+
"route_arm": arm,
|
|
55
|
+
"reason": reason,
|
|
56
|
+
"classifier": "llm_router_v1",
|
|
57
|
+
}
|
|
58
|
+
except Exception:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
# Fallback to deterministic
|
|
62
|
+
return _deterministic_fallback(raw_text)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _deterministic_fallback(raw_text: str) -> tuple[str, dict[str, Any]]:
|
|
66
|
+
"""Fast deterministic fallback when LLM routing is unavailable."""
|
|
67
|
+
lowered = raw_text.lower()
|
|
68
|
+
insight_signals = {"status", "queue", "worker", "repo", "code", "logs", "error",
|
|
69
|
+
"why", "how", "where", "what", "investigate", "explain",
|
|
70
|
+
"architecture", "runtime", "process", "pipeline", "see",
|
|
71
|
+
"verify", "find", "search", "look", "check", "locate"}
|
|
72
|
+
iterate_signals = {"fix", "broken", "breaks", "bug", "change", "tweak", "adjust",
|
|
73
|
+
"update", "existing", "current", "button", "form", "modal",
|
|
74
|
+
"page", "route", "screen", "component", "endpoint", "workflow"}
|
|
75
|
+
ideation_signals = {"app", "application", "api", "automation", "backend", "bug",
|
|
76
|
+
"dashboard", "feature", "frontend", "integration", "mobile",
|
|
77
|
+
"portal", "saas", "software", "system", "tool", "ui",
|
|
78
|
+
"web", "website", "workflow"}
|
|
79
|
+
non_software = {"blog post", "flyer", "hiring", "logo", "marketing",
|
|
80
|
+
"newsletter", "podcast", "seo", "social media", "video script"}
|
|
81
|
+
|
|
82
|
+
insight_hits = [t for t in insight_signals if t in lowered]
|
|
83
|
+
iterate_hits = [t for t in iterate_signals if t in lowered]
|
|
84
|
+
ideation_hits = [t for t in ideation_signals if t in lowered]
|
|
85
|
+
non_hits = [t for t in non_software if t in lowered]
|
|
86
|
+
|
|
87
|
+
# Explicit phrase checks
|
|
88
|
+
verification_phrases = (
|
|
89
|
+
"can you see", "can you find", "can you locate", "please verify",
|
|
90
|
+
"can you check", "do you see", "do you have the file", "are the files in",
|
|
91
|
+
)
|
|
92
|
+
if any(phrase in lowered for phrase in verification_phrases):
|
|
93
|
+
return "insight", {"route_arm": "insight", "reason": "repo content verification request", "classifier": "deterministic_fallback"}
|
|
94
|
+
if any(t in lowered for t in ("queue", "worker", "status", "repo", "codebase",
|
|
95
|
+
"how does", "where is", "why is", "what is happening")):
|
|
96
|
+
return "insight", {"route_arm": "insight", "reason": "operational/status question", "classifier": "deterministic_fallback"}
|
|
97
|
+
if any(t in lowered for t in ("fix", "bug fix", "small change", "quick change", "tweak", "adjust", "update")) and any(t in lowered for t in ("existing", "current", "page", "route", "component", "button", "form", "modal", "screen", "endpoint", "workflow", "bug", "error", "broken")):
|
|
98
|
+
return "iterate", {"route_arm": "iterate", "reason": "bounded change request on an existing surface", "classifier": "deterministic_fallback"}
|
|
99
|
+
if iterate_hits and not insight_hits and len(iterate_hits) >= len(ideation_hits):
|
|
100
|
+
return "iterate", {"route_arm": "iterate", "reason": "targeted implementation request", "classifier": "deterministic_fallback"}
|
|
101
|
+
if ideation_hits and len(ideation_hits) >= len(non_hits):
|
|
102
|
+
return "ideation", {"route_arm": "ideation", "reason": "software/product request", "classifier": "deterministic_fallback"}
|
|
103
|
+
if any(t in lowered for t in ("build", "create", "implement", "ship", "plan")) and not non_hits and not insight_hits:
|
|
104
|
+
return "ideation", {"route_arm": "ideation", "reason": "forward-looking implementation request", "classifier": "deterministic_fallback"}
|
|
105
|
+
if insight_hits and "project" in lowered:
|
|
106
|
+
return "insight", {"route_arm": "insight", "reason": "project-specific investigation", "classifier": "deterministic_fallback"}
|
|
107
|
+
return "neither", {"route_arm": "neither", "reason": "not clearly software delivery work", "classifier": "deterministic_fallback"}
|