devflow-engine 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devflow_engine/__init__.py +3 -0
- devflow_engine/agentic_prompts.py +100 -0
- devflow_engine/agentic_runtime.py +398 -0
- devflow_engine/api_key_flow_harness.py +539 -0
- devflow_engine/api_keys.py +357 -0
- devflow_engine/bootstrap/__init__.py +2 -0
- devflow_engine/bootstrap/provision_from_template.py +84 -0
- devflow_engine/cli/__init__.py +0 -0
- devflow_engine/cli/app.py +7270 -0
- devflow_engine/core/__init__.py +0 -0
- devflow_engine/core/config.py +86 -0
- devflow_engine/core/logging.py +29 -0
- devflow_engine/core/paths.py +45 -0
- devflow_engine/core/toml_kv.py +33 -0
- devflow_engine/devflow_event_worker.py +1292 -0
- devflow_engine/devflow_state.py +201 -0
- devflow_engine/devin2/__init__.py +9 -0
- devflow_engine/devin2/agent_definition.py +120 -0
- devflow_engine/devin2/pi_runner.py +204 -0
- devflow_engine/devin_orchestration.py +69 -0
- devflow_engine/docs/prompts/anti-patterns.md +42 -0
- devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
- devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
- devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
- devflow_engine/doctor/__init__.py +2 -0
- devflow_engine/doctor/triage.py +140 -0
- devflow_engine/error/__init__.py +0 -0
- devflow_engine/error/remediation.py +21 -0
- devflow_engine/errors/error_solver_dag.py +522 -0
- devflow_engine/errors/runtime_observability.py +67 -0
- devflow_engine/idea/__init__.py +4 -0
- devflow_engine/idea/actors.py +481 -0
- devflow_engine/idea/agentic.py +465 -0
- devflow_engine/idea/analyze.py +93 -0
- devflow_engine/idea/devin_chat_dag.py +1 -0
- devflow_engine/idea/diff.py +99 -0
- devflow_engine/idea/drafts.py +446 -0
- devflow_engine/idea/idea_creation_dag.py +643 -0
- devflow_engine/idea/ideation_enrichment.py +355 -0
- devflow_engine/idea/ideation_enrichment_worker.py +19 -0
- devflow_engine/idea/paths.py +28 -0
- devflow_engine/idea/promote.py +53 -0
- devflow_engine/idea/redaction.py +27 -0
- devflow_engine/idea/repo_tools.py +1277 -0
- devflow_engine/idea/response_mode.py +30 -0
- devflow_engine/idea/story_pipeline.py +1585 -0
- devflow_engine/idea/sufficiency.py +376 -0
- devflow_engine/idea/traditional_stories.py +1257 -0
- devflow_engine/implementation/__init__.py +0 -0
- devflow_engine/implementation/alembic_preflight.py +700 -0
- devflow_engine/implementation/dag.py +8450 -0
- devflow_engine/implementation/green_gate.py +93 -0
- devflow_engine/implementation/prompts.py +108 -0
- devflow_engine/implementation/test_runtime.py +623 -0
- devflow_engine/integration/__init__.py +19 -0
- devflow_engine/integration/agentic.py +66 -0
- devflow_engine/integration/dag.py +3539 -0
- devflow_engine/integration/prompts.py +114 -0
- devflow_engine/integration/supabase_schema.sql +31 -0
- devflow_engine/integration/supabase_sync.py +177 -0
- devflow_engine/llm/__init__.py +1 -0
- devflow_engine/llm/cli_one_shot.py +84 -0
- devflow_engine/llm/cli_stream.py +371 -0
- devflow_engine/llm/execution_context.py +26 -0
- devflow_engine/llm/invoke.py +1322 -0
- devflow_engine/llm/provider_api.py +304 -0
- devflow_engine/llm/repo_knowledge.py +588 -0
- devflow_engine/llm_primitives.py +315 -0
- devflow_engine/orchestration.py +62 -0
- devflow_engine/planning/__init__.py +0 -0
- devflow_engine/planning/analyze_repo.py +92 -0
- devflow_engine/planning/render_drafts.py +133 -0
- devflow_engine/playground/__init__.py +0 -0
- devflow_engine/playground/hooks.py +26 -0
- devflow_engine/playwright_workflow/__init__.py +5 -0
- devflow_engine/playwright_workflow/dag.py +1317 -0
- devflow_engine/process/__init__.py +5 -0
- devflow_engine/process/dag.py +59 -0
- devflow_engine/project_registration/__init__.py +3 -0
- devflow_engine/project_registration/dag.py +1581 -0
- devflow_engine/project_registry.py +109 -0
- devflow_engine/prompts/devin/generic/prompt.md +6 -0
- devflow_engine/prompts/devin/ideation/prompt.md +263 -0
- devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
- devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
- devflow_engine/prompts/devin/insight/prompt.md +11 -0
- devflow_engine/prompts/devin/insight/scenarios.md +5 -0
- devflow_engine/prompts/devin/intake/prompt.md +15 -0
- devflow_engine/prompts/devin/iterate/prompt.md +12 -0
- devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
- devflow_engine/prompts/devin/shared/principles.md +246 -0
- devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
- devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
- devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
- devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
- devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
- devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
- devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
- devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
- devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
- devflow_engine/prompts/implementation/red/prompt.md +27 -0
- devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
- devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
- devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
- devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
- devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
- devflow_engine/prompts/integration/README.md +185 -0
- devflow_engine/prompts/integration/green/example.md +67 -0
- devflow_engine/prompts/integration/green/green/prompt.md +10 -0
- devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
- devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
- devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
- devflow_engine/prompts/integration/green_enrich/example.md +79 -0
- devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
- devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
- devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
- devflow_engine/prompts/integration/red/example.md +152 -0
- devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
- devflow_engine/prompts/integration/red/red/prompt.md +11 -0
- devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
- devflow_engine/prompts/integration/red_review/example.md +71 -0
- devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
- devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
- devflow_engine/prompts/integration/resolve/example.md +111 -0
- devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
- devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
- devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
- devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
- devflow_engine/prompts/integration/validate/example.md +143 -0
- devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
- devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
- devflow_engine/prompts/integration/write_workflows/example.md +100 -0
- devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
- devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
- devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
- devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
- devflow_engine/prompts/iterate/README.md +7 -0
- devflow_engine/prompts/iterate/coder/prompt.md +11 -0
- devflow_engine/prompts/iterate/framer/prompt.md +11 -0
- devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
- devflow_engine/prompts/iterate/observer/prompt.md +11 -0
- devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
- devflow_engine/prompts/recovery/execution/prompt.md +8 -0
- devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
- devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
- devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
- devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
- devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
- devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
- devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
- devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
- devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
- devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
- devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
- devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
- devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
- devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
- devflow_engine/recovery/__init__.py +3 -0
- devflow_engine/recovery/dag.py +2609 -0
- devflow_engine/recovery/models.py +220 -0
- devflow_engine/refactor.py +93 -0
- devflow_engine/registry/__init__.py +1 -0
- devflow_engine/registry/cards.py +238 -0
- devflow_engine/registry/domain_normalize.py +60 -0
- devflow_engine/registry/effects.py +65 -0
- devflow_engine/registry/enforce_report.py +150 -0
- devflow_engine/registry/module_cards_classify.py +164 -0
- devflow_engine/registry/module_cards_draft.py +184 -0
- devflow_engine/registry/module_cards_gate.py +59 -0
- devflow_engine/registry/packages.py +347 -0
- devflow_engine/registry/pathways.py +323 -0
- devflow_engine/review/__init__.py +11 -0
- devflow_engine/review/dag.py +588 -0
- devflow_engine/review/review_story.py +67 -0
- devflow_engine/scope_idea/__init__.py +3 -0
- devflow_engine/scope_idea/agentic.py +39 -0
- devflow_engine/scope_idea/dag.py +1069 -0
- devflow_engine/scope_idea/models.py +175 -0
- devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
- devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
- devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
- devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
- devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
- devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
- devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
- devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
- devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
- devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
- devflow_engine/skills/registry.example.yaml +42 -0
- devflow_engine/source_doc_assumptions.py +291 -0
- devflow_engine/source_doc_mutation_dag.py +1606 -0
- devflow_engine/source_doc_mutation_eval.py +417 -0
- devflow_engine/source_doc_mutation_worker.py +25 -0
- devflow_engine/source_docs_schema.py +207 -0
- devflow_engine/source_docs_updater.py +309 -0
- devflow_engine/source_scope/__init__.py +15 -0
- devflow_engine/source_scope/agentic.py +45 -0
- devflow_engine/source_scope/dag.py +1626 -0
- devflow_engine/source_scope/models.py +177 -0
- devflow_engine/stores/__init__.py +0 -0
- devflow_engine/stores/execution_store.py +3534 -0
- devflow_engine/story/__init__.py +0 -0
- devflow_engine/story/contracts.py +160 -0
- devflow_engine/story/discovery.py +47 -0
- devflow_engine/story/evidence.py +118 -0
- devflow_engine/story/hashing.py +27 -0
- devflow_engine/story/implemented_queue_purge.py +148 -0
- devflow_engine/story/indexer.py +105 -0
- devflow_engine/story/io.py +20 -0
- devflow_engine/story/markdown_contracts.py +298 -0
- devflow_engine/story/reconciliation.py +408 -0
- devflow_engine/story/validate_stories.py +149 -0
- devflow_engine/story/validate_tests_story.py +512 -0
- devflow_engine/story/validation.py +133 -0
- devflow_engine/ui_grounding/__init__.py +11 -0
- devflow_engine/ui_grounding/agentic.py +31 -0
- devflow_engine/ui_grounding/dag.py +874 -0
- devflow_engine/ui_grounding/models.py +224 -0
- devflow_engine/ui_grounding/pencil_bridge.py +247 -0
- devflow_engine/vendor/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
- devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
- devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
- devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
- devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
- devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
- devflow_engine/worker.py +1086 -0
- devflow_engine/worker_guard.py +233 -0
- devflow_engine-1.0.0.dist-info/METADATA +235 -0
- devflow_engine-1.0.0.dist-info/RECORD +393 -0
- devflow_engine-1.0.0.dist-info/WHEEL +4 -0
- devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
- devin/__init__.py +6 -0
- devin/dag.py +58 -0
- devin/dag_two_arm.py +138 -0
- devin/devin_chat_scenario_catalog.json +588 -0
- devin/devin_eval.py +677 -0
- devin/nodes/__init__.py +0 -0
- devin/nodes/ideation/__init__.py +0 -0
- devin/nodes/ideation/node.py +195 -0
- devin/nodes/ideation/playground.py +267 -0
- devin/nodes/ideation/prompt.md +65 -0
- devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
- devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
- devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
- devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
- devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
- devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
- devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
- devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
- devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
- devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
- devin/nodes/ideation/scenarios/vague_idea.py +16 -0
- devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
- devin/nodes/ideation/tools.json +312 -0
- devin/nodes/insight/__init__.py +0 -0
- devin/nodes/insight/node.py +49 -0
- devin/nodes/insight/playground.py +154 -0
- devin/nodes/insight/prompt.md +61 -0
- devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
- devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
- devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
- devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
- devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
- devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
- devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
- devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
- devin/nodes/insight/scenarios/operational_debugging.py +15 -0
- devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
- devin/nodes/insight/scenarios/operational_question.py +9 -0
- devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
- devin/nodes/insight/scenarios/queue_status.py +15 -0
- devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
- devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
- devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
- devin/nodes/insight/scenarios/worker_state_check.py +15 -0
- devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
- devin/nodes/insight/tools.json +126 -0
- devin/nodes/intake/__init__.py +0 -0
- devin/nodes/intake/node.py +27 -0
- devin/nodes/intake/playground.py +47 -0
- devin/nodes/intake/prompt.md +12 -0
- devin/nodes/intake/scenarios/ideation_routing.py +4 -0
- devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
- devin/nodes/intake/scenarios/insight_routing.py +4 -0
- devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
- devin/nodes/iterate/README.md +44 -0
- devin/nodes/iterate/__init__.py +1 -0
- devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
- devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
- devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
- devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
- devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
- devin/nodes/iterate/agent-roles.md +89 -0
- devin/nodes/iterate/agents/README.md +10 -0
- devin/nodes/iterate/artifacts.md +504 -0
- devin/nodes/iterate/contract.md +100 -0
- devin/nodes/iterate/eval-plan.md +74 -0
- devin/nodes/iterate/node.py +100 -0
- devin/nodes/iterate/pipeline/README.md +13 -0
- devin/nodes/iterate/playground-contract.md +76 -0
- devin/nodes/iterate/prompt.md +11 -0
- devin/nodes/iterate/scenarios/README.md +38 -0
- devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
- devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
- devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
- devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
- devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
- devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
- devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
- devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
- devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
- devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
- devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
- devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
- devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
- devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
- devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
- devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
- devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
- devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
- devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
- devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
- devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
- devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
- devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
- devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
- devin/nodes/shared/__init__.py +0 -0
- devin/nodes/shared/filemaker_expert.md +80 -0
- devin/nodes/shared/filemaker_expert.py +354 -0
- devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
- devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
- devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
- devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
- devin/nodes/shared/helpers.py +156 -0
- devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
- devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
- devin/nodes/shared/models.py +44 -0
- devin/nodes/shared/post.py +40 -0
- devin/nodes/shared/router.py +107 -0
- devin/nodes/shared/tools.py +191 -0
- devin/shared/devin-chat-rubric.md +237 -0
- devin/shared/devin-chat-scenario-suite.md +90 -0
- devin/shared/eval_doctrine.md +9 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Devin Intake Routing Doctrine
|
|
2
|
+
|
|
3
|
+
Determine which Devin arm should handle the current turn.
|
|
4
|
+
|
|
5
|
+
- `ideation`: software/product shaping, feature requests, workflow design, implementation planning, readiness clarification, or “build/change/add/fix this in the project” requests.
|
|
6
|
+
- `insight`: project-specific questions about code, repo state, queue status, worker state, architecture, behavior, operations, or “what is happening / how does this work?” questions.
|
|
7
|
+
|
|
8
|
+
Routing rules:
|
|
9
|
+
- Prefer `insight` for repo/operations/status questions.
|
|
10
|
+
- Prefer `ideation` for forward-looking product/build requests.
|
|
11
|
+
- If the user asks for planning/refinement of a software change, keep it in `ideation`.
|
|
12
|
+
- If the user asks for explanation/investigation of the current system, keep it in `insight`.
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
SCENARIO_NAME = 'ideation_routing'
|
|
2
|
+
SCENARIO_DESCRIPTION = 'Routes a forward-looking software feature request into the ideation arm.'
|
|
3
|
+
INPUT_PAYLOAD = {'raw_text': 'Build a client portal that lets staff triage support requests and track approvals.'}
|
|
4
|
+
EXPECTED_BEHAVIOR = {'route_arm': 'ideation', 'reason_contains': 'software'}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
SCENARIO_NAME = 'insight_routing'
|
|
2
|
+
SCENARIO_DESCRIPTION = 'Routes a repo/operations question into the insight arm.'
|
|
3
|
+
INPUT_PAYLOAD = {'raw_text': 'What is the source-doc queue status for this project right now?'}
|
|
4
|
+
EXPECTED_BEHAVIOR = {'route_arm': 'insight', 'reason_contains': 'operational'}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Iterate node design scaffold
|
|
2
|
+
|
|
3
|
+
This directory is the design home for the proposed Devin `iterate` arm.
|
|
4
|
+
|
|
5
|
+
It now reflects Marcus's preferred agent-development pipeline explicitly:
|
|
6
|
+
|
|
7
|
+
1. objectives and requirements
|
|
8
|
+
2. evals
|
|
9
|
+
3. tools and boundaries
|
|
10
|
+
4. harness and playground
|
|
11
|
+
5. prompt deferred until the first four stages are accepted
|
|
12
|
+
|
|
13
|
+
## Directory map
|
|
14
|
+
|
|
15
|
+
- `contract.md` , top-level iterate contract, routing boundary, and orchestration shape
|
|
16
|
+
- `artifacts.md` , shared artifact contracts across the iterate lane
|
|
17
|
+
- `agent-roles.md` , high-level accountability split for Iterator, Framer, Observer, and Coder
|
|
18
|
+
- `pipeline/` , cross-agent design docs ordered by development stage
|
|
19
|
+
- `agents/` , per-agent design docs, each ordered by development stage
|
|
20
|
+
- `eval-plan.md` , consolidated eval inventory spanning routing, framing, observation, supervision, and completion truth
|
|
21
|
+
- `playground-contract.md` , lane-level harness and fixture contract
|
|
22
|
+
- `scenarios/` , planning docs for future scenario fixtures and eval implementations
|
|
23
|
+
|
|
24
|
+
## Design rules for this directory
|
|
25
|
+
|
|
26
|
+
- Design only, no implementation code
|
|
27
|
+
- No real prompt content yet
|
|
28
|
+
- Objectives lead, non-goals derive from objectives
|
|
29
|
+
- Evals come before tool affordances
|
|
30
|
+
- Tools are documented as bounded capabilities, not as permission theater
|
|
31
|
+
- Harness design tests contracts and truthfulness, not just style
|
|
32
|
+
|
|
33
|
+
## Current recommended orchestration
|
|
34
|
+
|
|
35
|
+
- pattern: advisor-primary
|
|
36
|
+
- primary owner: `Iterator`
|
|
37
|
+
- advisors: `Framer`, `Observer`
|
|
38
|
+
- supervised worker: `Coder`
|
|
39
|
+
|
|
40
|
+
The intent is one accountable owner with specialist artifacts constraining the coding loop, not a flat peer swarm.
|
|
41
|
+
|
|
42
|
+
## Prompt status
|
|
43
|
+
|
|
44
|
+
Prompt authoring is intentionally deferred. See `pipeline/05-prompt-deferred.md` for the acceptance rule.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Iterate node package."""
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# Iterate objectives and requirements
|
|
2
|
+
|
|
3
|
+
## Why this stage exists
|
|
4
|
+
|
|
5
|
+
This is stage 1 in Marcus's iterate design order:
|
|
6
|
+
1. objectives and requirements
|
|
7
|
+
2. evals
|
|
8
|
+
3. tools and boundaries
|
|
9
|
+
4. harness and playground
|
|
10
|
+
5. prompt content only after the first four stages are stable
|
|
11
|
+
|
|
12
|
+
If this file is vague, every later stage will drift. The point here is to lock the lane objective, routing boundary, role ownership, and completion requirements before anyone talks about tools or prompt wording.
|
|
13
|
+
|
|
14
|
+
## Lane objective
|
|
15
|
+
|
|
16
|
+
`iterate` owns a bounded change request on an existing surface and carries it from messy ask to one of three truthful outcomes:
|
|
17
|
+
- verified completion
|
|
18
|
+
- honest blocked verdict
|
|
19
|
+
- explicit promotion out of iterate
|
|
20
|
+
|
|
21
|
+
This lane is for task-scale execution, not for read-only diagnosis and not for broad product planning.
|
|
22
|
+
|
|
23
|
+
## What must be true for a request to belong in iterate
|
|
24
|
+
|
|
25
|
+
A request belongs in `iterate` only when all of these are plausibly true:
|
|
26
|
+
- the user wants a change, not just an explanation
|
|
27
|
+
- the target is an existing surface, behavior, or failure
|
|
28
|
+
- the work can be framed as a bounded task artifact
|
|
29
|
+
- success can be checked by a scoped observation seam or equivalent proof
|
|
30
|
+
- the change does not require new planning truth at story or feature scale
|
|
31
|
+
|
|
32
|
+
If any of those fail, routing should change instead of forcing the work through iterate.
|
|
33
|
+
|
|
34
|
+
## Routing requirements
|
|
35
|
+
|
|
36
|
+
### Route to `iterate`
|
|
37
|
+
Route to `iterate` when the ask is a targeted fix, quick change, or narrow improvement against an existing route, page, component, workflow step, API behavior, or failure mode.
|
|
38
|
+
|
|
39
|
+
### Route to `insight`
|
|
40
|
+
Route to `insight` when the user wants investigation, explanation, diagnosis, or read-only analysis without asking for implementation.
|
|
41
|
+
|
|
42
|
+
### Route to `idea`
|
|
43
|
+
Route to `idea` when framing reveals that the work is no longer task-scale, needs new product or workflow planning, or cannot be truthfully described as a bounded delta on an existing surface.
|
|
44
|
+
|
|
45
|
+
### Re-route during execution
|
|
46
|
+
Initial routing is not permanent. If Framer or Observer discovers that the task is actually read-only or broader-planning work, Iterator must re-route honestly instead of preserving the original lane choice for convenience.
|
|
47
|
+
|
|
48
|
+
## Truth requirements
|
|
49
|
+
|
|
50
|
+
### Task truth before coding
|
|
51
|
+
A coding attempt requires a bounded `task_artifact` with:
|
|
52
|
+
- current behavior
|
|
53
|
+
- desired behavior
|
|
54
|
+
- explicit success criteria
|
|
55
|
+
- known scope boundary
|
|
56
|
+
- blocking unknowns called out separately from assumptions
|
|
57
|
+
|
|
58
|
+
### Observation truth before completion
|
|
59
|
+
A completion claim requires an `observation_artifact` that establishes one of these:
|
|
60
|
+
- a confirmed repro that later stops reproducing, or
|
|
61
|
+
- a bounded red-to-green verification seam for the requested improvement
|
|
62
|
+
|
|
63
|
+
### Honest uncertainty
|
|
64
|
+
If evidence is missing, repro cannot be confirmed, or the green condition is weak, the lane must say that plainly. Missing truth is a blocker signal, not a reason to improvise confidence.
|
|
65
|
+
|
|
66
|
+
## Supervision requirements
|
|
67
|
+
|
|
68
|
+
### Iterator is the accountable owner
|
|
69
|
+
`Iterator` owns lane judgment, readiness to code, respawn decisions, routing changes, and final disposition.
|
|
70
|
+
|
|
71
|
+
### Framer and Observer constrain the loop
|
|
72
|
+
`Framer` and `Observer` are not optional flavor agents. Their artifacts are the contract that bounds what `Coder` is allowed to do and what `Iterator` is allowed to approve.
|
|
73
|
+
|
|
74
|
+
### Coder stays subordinate to artifacts
|
|
75
|
+
`Coder` implements the scoped delta and reports evidence. `Coder` does not redefine scope, success criteria, readiness, or completion truth.
|
|
76
|
+
|
|
77
|
+
## Scope requirements
|
|
78
|
+
|
|
79
|
+
The iterate lane must:
|
|
80
|
+
- stay at task scale
|
|
81
|
+
- name the affected surface as concretely as possible
|
|
82
|
+
- expose scope growth immediately
|
|
83
|
+
- prefer blocked or promoted verdicts over fake completion
|
|
84
|
+
- resist unrelated cleanup, opportunistic refactors, or stealth feature work
|
|
85
|
+
|
|
86
|
+
## Required shared artifacts
|
|
87
|
+
|
|
88
|
+
The lane contract depends on four shared artifacts:
|
|
89
|
+
- `task_artifact`, authored by Framer and approved for use by Iterator
|
|
90
|
+
- `observation_artifact`, authored by Observer and used by Iterator as the truth seam
|
|
91
|
+
- `iterator_run`, owned by Iterator with coder attempts recorded inside it
|
|
92
|
+
- `promotion_handoff`, authored by Iterator only when work exits iterate for `idea` or `insight`
|
|
93
|
+
|
|
94
|
+
These artifacts are the cross-agent operating contract. If an important judgment is not grounded in one of them, the design is underspecified.
|
|
95
|
+
|
|
96
|
+
All durable top-level iterate artifacts should use monotonic integer revisions so readiness and promotion decisions can point to concrete artifact revisions rather than vague "latest" state.
|
|
97
|
+
|
|
98
|
+
## Role ownership summary
|
|
99
|
+
|
|
100
|
+
- `Iterator` owns route fit, readiness, supervision, respawn logic, and final verdict
|
|
101
|
+
- `Framer` owns bounded task construction and promotion recommendations discovered during framing
|
|
102
|
+
- `Observer` owns evidence, repro, green-condition definition, and coding-readiness recommendation
|
|
103
|
+
- `Coder` owns implementation attempts and narrow verification execution under supervision
|
|
104
|
+
|
|
105
|
+
## Minimum acceptance bar for moving to stage 2
|
|
106
|
+
|
|
107
|
+
This stage is ready for eval design only when reviewers can answer all of these clearly:
|
|
108
|
+
- what counts as iterate versus insight versus idea
|
|
109
|
+
- what artifact truth must exist before coding starts
|
|
110
|
+
- what completion evidence Iterator is allowed to trust
|
|
111
|
+
- what each of the four agents owns and must not absorb
|
|
112
|
+
- what conditions force blockage or promotion instead of completion
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# Iterate evals by pipeline stage
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
This is stage 2 in Marcus's iterate design order. Its job is to prove that the stage-1 contract is testable before anyone specifies tools, harnesses, or prompts.
|
|
6
|
+
|
|
7
|
+
A good eval here answers: "what concrete failure would tell us this iterate design is lying, drifting, or collapsing ownership boundaries?"
|
|
8
|
+
|
|
9
|
+
## Eval design rules
|
|
10
|
+
|
|
11
|
+
Every eval in this file should check contract truth, not style.
|
|
12
|
+
|
|
13
|
+
That means evals should prefer:
|
|
14
|
+
- routing correctness over eloquent justification
|
|
15
|
+
- artifact quality over generic helpfulness
|
|
16
|
+
- ownership discipline over agent enthusiasm
|
|
17
|
+
- truthful blocked or promoted outcomes over forced completion
|
|
18
|
+
|
|
19
|
+
## Eval buckets
|
|
20
|
+
|
|
21
|
+
### 1. Route selection evals
|
|
22
|
+
These test whether intake and re-routing keep iterate limited to bounded change work.
|
|
23
|
+
|
|
24
|
+
Must pass examples:
|
|
25
|
+
- a concrete existing-surface bug fix routes to `iterate`
|
|
26
|
+
- a small behavior tweak routes to `iterate`
|
|
27
|
+
- a narrow UI improvement on an existing page routes to `iterate`
|
|
28
|
+
|
|
29
|
+
Must reject examples:
|
|
30
|
+
- investigation-only asks route to `insight`
|
|
31
|
+
- explanation-only asks route to `insight`
|
|
32
|
+
- broad feature or workflow requests route to `idea`
|
|
33
|
+
- initially small asks that expand during framing are promoted out of `iterate`
|
|
34
|
+
|
|
35
|
+
Failure signals:
|
|
36
|
+
- `iterate` absorbs read-only diagnostic work
|
|
37
|
+
- `iterate` absorbs broad planning work
|
|
38
|
+
- lane choice is treated as irreversible after better truth appears
|
|
39
|
+
|
|
40
|
+
### 2. Task artifact integrity evals
|
|
41
|
+
These test Framer's ability to convert a messy ask into a bounded contract.
|
|
42
|
+
|
|
43
|
+
Required checks:
|
|
44
|
+
- the artifact identifies current behavior and desired behavior distinctly
|
|
45
|
+
- success criteria are observable and not aspirational
|
|
46
|
+
- scope is narrow enough for task-scale execution
|
|
47
|
+
- assumptions are separated from facts
|
|
48
|
+
- blocking unknowns are separated from nonblocking unknowns
|
|
49
|
+
- a promotion recommendation appears when framing reveals non-iterate scope
|
|
50
|
+
- task artifact revisions increase monotonically when framing is amended
|
|
51
|
+
|
|
52
|
+
Failure signals:
|
|
53
|
+
- task artifact reads like a vague restatement of the user message
|
|
54
|
+
- success criteria cannot be verified later by Observer or Iterator
|
|
55
|
+
- scope is so broad that Coder would need to reinterpret the task
|
|
56
|
+
|
|
57
|
+
### 3. Observation artifact integrity evals
|
|
58
|
+
These test whether Observer creates a truthful seam for implementation and validation.
|
|
59
|
+
|
|
60
|
+
Required checks:
|
|
61
|
+
- evidence is grounded in logs, repro steps, failing seam output, or equivalent observable proof
|
|
62
|
+
- the artifact can report `not_confirmed` honestly when repro fails
|
|
63
|
+
- repeatability status is explicit
|
|
64
|
+
- expected green condition is concrete enough for Iterator to validate later
|
|
65
|
+
- missing evidence triggers a context request or blocked recommendation, not invented certainty
|
|
66
|
+
- observation artifact revisions increase monotonically when new truth is learned
|
|
67
|
+
|
|
68
|
+
Failure signals:
|
|
69
|
+
- repro is implied but not documented
|
|
70
|
+
- evidence summary has no traceable source
|
|
71
|
+
- green condition is too vague to distinguish success from partial progress
|
|
72
|
+
|
|
73
|
+
### 4. Supervision integrity evals
|
|
74
|
+
These test whether Iterator preserves the advisor-primary model.
|
|
75
|
+
|
|
76
|
+
Required checks:
|
|
77
|
+
- Iterator refuses to start coding before task and observation artifacts are sufficient
|
|
78
|
+
- Iterator uses Framer and Observer outputs as constraints, not as optional suggestions
|
|
79
|
+
- Iterator respawns Coder only with repair-specific context tied to the artifacts
|
|
80
|
+
- Iterator blocks or promotes when truth or scope no longer fits iterate
|
|
81
|
+
- promotion or reroute writes an iterate-owned handoff artifact with references back to the exact task and observation revisions used for the decision
|
|
82
|
+
|
|
83
|
+
Failure signals:
|
|
84
|
+
- Iterator bypasses missing artifact truth because coding "might help"
|
|
85
|
+
- Iterator lets Coder redefine the task, evidence, or completion bar
|
|
86
|
+
- Iterator returns completion after scope drift
|
|
87
|
+
|
|
88
|
+
### 5. Coder discipline evals
|
|
89
|
+
These test whether Coder behaves like a supervised worker instead of a peer decider.
|
|
90
|
+
|
|
91
|
+
Required checks:
|
|
92
|
+
- implementation stays within the bounded task and named surface
|
|
93
|
+
- verification stays narrow and relevant to the observation seam
|
|
94
|
+
- attempt reports say what changed, what passed, what failed, and what remains blocked
|
|
95
|
+
- second attempts respond to repair context rather than restarting from scratch conceptually
|
|
96
|
+
|
|
97
|
+
Failure signals:
|
|
98
|
+
- unrelated refactors or cleanup appear without authorization
|
|
99
|
+
- Coder claims success without matching the artifact green condition
|
|
100
|
+
- Coder silently broadens the solution to compensate for poor framing
|
|
101
|
+
|
|
102
|
+
### 6. Completion integrity evals
|
|
103
|
+
These test the final truth gate.
|
|
104
|
+
|
|
105
|
+
Required checks:
|
|
106
|
+
- no completion without a green seam or equivalent scoped proof
|
|
107
|
+
- no completion when requested user-visible behavior is still missing
|
|
108
|
+
- no completion when the observed failure still reproduces
|
|
109
|
+
- no completion after unauthorized scope growth
|
|
110
|
+
- blocked verdicts are allowed and scored as correct when truth is insufficient
|
|
111
|
+
- promotion verdicts are allowed and scored as correct when the task has become planning work
|
|
112
|
+
|
|
113
|
+
Failure signals:
|
|
114
|
+
- the system rewards optimistic claims over truthful disposition
|
|
115
|
+
- completion can happen without evidence that corresponds to the original ask
|
|
116
|
+
|
|
117
|
+
## Cross-agent eval expectations
|
|
118
|
+
|
|
119
|
+
The four-agent model should be explicitly visible in eval coverage:
|
|
120
|
+
- `Framer` evals prove task-bounding quality
|
|
121
|
+
- `Observer` evals prove evidence and green-condition quality
|
|
122
|
+
- `Coder` evals prove implementation discipline
|
|
123
|
+
- `Iterator` evals prove ownership of readiness, supervision, and final judgment
|
|
124
|
+
|
|
125
|
+
If a behavior matters but cannot be assigned to one of those owners, the contract is still blurry.
|
|
126
|
+
|
|
127
|
+
## Review rule
|
|
128
|
+
|
|
129
|
+
A stage-1 requirement is not real until there is a plausible eval that could fail it.
|
|
130
|
+
|
|
131
|
+
If reviewers cannot describe how a bad route, bad artifact, bad supervision choice, or fake completion would be caught, the iterate design is not ready to advance to stage 3.
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Iterate tools and boundaries
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
This is stage 3 in Marcus's iterate design order. By the time this file is written, the lane objective and eval expectations should already be stable.
|
|
6
|
+
|
|
7
|
+
The point here is not to list every possible runtime permission. The point is to define which capability classes each role needs in order to satisfy the stage-1 and stage-2 contract, and which capability classes would let that role collapse into another role.
|
|
8
|
+
|
|
9
|
+
## Tooling principle
|
|
10
|
+
|
|
11
|
+
Describe capabilities in responsibility terms, not vendor or provider terms.
|
|
12
|
+
|
|
13
|
+
Good examples:
|
|
14
|
+
- repo inspection
|
|
15
|
+
- log and trace inspection
|
|
16
|
+
- bounded test execution
|
|
17
|
+
- artifact read and write
|
|
18
|
+
|
|
19
|
+
Bad examples:
|
|
20
|
+
- naming a provider-specific tool without explaining which contract requirement it serves
|
|
21
|
+
- granting broad execution rights to compensate for unclear ownership
|
|
22
|
+
|
|
23
|
+
## Shared lane capability classes
|
|
24
|
+
|
|
25
|
+
The iterate lane as a whole needs access to these capability classes:
|
|
26
|
+
- conversation and history reading
|
|
27
|
+
- repository and project surface inspection
|
|
28
|
+
- logs, traces, screenshots, and repro surfaces when relevant
|
|
29
|
+
- artifact read and write surfaces
|
|
30
|
+
- narrow verification seam execution
|
|
31
|
+
- supervised implementation execution
|
|
32
|
+
|
|
33
|
+
These are lane-level needs. They are not automatically granted to every role equally.
|
|
34
|
+
|
|
35
|
+
## Role capability boundaries
|
|
36
|
+
|
|
37
|
+
### Iterator
|
|
38
|
+
Needs:
|
|
39
|
+
- read access to conversation context and shared artifacts
|
|
40
|
+
- enough inspection capability to judge route fit, readiness, and final evidence
|
|
41
|
+
- authority to spawn or supervise Coder
|
|
42
|
+
- write access to iterator-owned run records, monotonic revision updates, promotion handoff records, and final disposition
|
|
43
|
+
|
|
44
|
+
Must not become:
|
|
45
|
+
- the default implementation worker
|
|
46
|
+
- the substitute observer who handwaves missing evidence
|
|
47
|
+
- the substitute framer who rewrites the task mid-loop to make an attempt look successful
|
|
48
|
+
|
|
49
|
+
### Framer
|
|
50
|
+
Needs:
|
|
51
|
+
- conversation and history reading
|
|
52
|
+
- project and surface inspection sufficient to localize the ask
|
|
53
|
+
- artifact write access for `task_artifact`
|
|
54
|
+
|
|
55
|
+
Must not have responsibility for:
|
|
56
|
+
- running broad repro or verification work that belongs to Observer
|
|
57
|
+
- implementation execution
|
|
58
|
+
- final completion judgment
|
|
59
|
+
|
|
60
|
+
### Observer
|
|
61
|
+
Needs:
|
|
62
|
+
- evidence-source inspection such as logs, traces, screenshots, and repo context
|
|
63
|
+
- bounded repro execution and narrow red-seam creation
|
|
64
|
+
- artifact write access for `observation_artifact`
|
|
65
|
+
|
|
66
|
+
Must not have responsibility for:
|
|
67
|
+
- silently editing code to make repro disappear
|
|
68
|
+
- redefining task scope
|
|
69
|
+
- returning final completion on behalf of Iterator
|
|
70
|
+
|
|
71
|
+
### Coder
|
|
72
|
+
Needs:
|
|
73
|
+
- repository read and write access within the supervised work area
|
|
74
|
+
- implementation execution capability
|
|
75
|
+
- narrow verification execution aligned to the task and observation seam
|
|
76
|
+
- ability to append implementation and verification notes to the run record
|
|
77
|
+
|
|
78
|
+
Must not have responsibility for:
|
|
79
|
+
- changing route classification
|
|
80
|
+
- redefining success criteria
|
|
81
|
+
- deciding that missing evidence is good enough
|
|
82
|
+
- writing the final verdict as if implementation were the same thing as truth
|
|
83
|
+
|
|
84
|
+
## Boundary rules that should survive runtime implementation
|
|
85
|
+
|
|
86
|
+
- missing evidence should trigger context requests, blockage, or promotion, not bluffing
|
|
87
|
+
- missing task clarity should trigger framing repair, not coder improvisation
|
|
88
|
+
- scope growth should trigger promotion review, not quiet expansion of the task
|
|
89
|
+
- read-only user intent should route to `insight`, even if implementation would be easy
|
|
90
|
+
- artifact revisions should advance explicitly when durable top-level truth changes, rather than relying on implicit last-write-wins semantics
|
|
91
|
+
- only Iterator should stamp promotion linkage or write `promotion_handoff.json`
|
|
92
|
+
- provider convenience must not override role boundaries
|
|
93
|
+
|
|
94
|
+
## Practical anti-collapse checks
|
|
95
|
+
|
|
96
|
+
A tools design is probably wrong if any of these become normal:
|
|
97
|
+
- Iterator frequently edits code directly because it is faster
|
|
98
|
+
- Framer runs deep repros because the task artifact was thin
|
|
99
|
+
- Observer fixes small issues while inspecting evidence
|
|
100
|
+
- Coder rewrites task meaning in order to declare success
|
|
101
|
+
|
|
102
|
+
Those are not efficiency wins. They are signs that the lane contract is dissolving.
|
|
103
|
+
|
|
104
|
+
## Minimum acceptance bar for moving to stage 4
|
|
105
|
+
|
|
106
|
+
This stage is ready for harness and playground design only when reviewers can answer all of these clearly:
|
|
107
|
+
- which capability classes are required by the lane overall
|
|
108
|
+
- which of those capabilities belong to each role
|
|
109
|
+
- which boundary violations would invalidate the advisor-primary model
|
|
110
|
+
- how the tools model preserves Iterator as the single accountable owner
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Iterate harness and playground design
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
The harness should exercise the lane as an orchestration contract, not just as prompt prose.
|
|
6
|
+
|
|
7
|
+
## Required harness abilities
|
|
8
|
+
|
|
9
|
+
- inject route context and prior turns
|
|
10
|
+
- inspect authored artifacts at each stage
|
|
11
|
+
- inspect iterate artifacts under `.devflow/iterate/<task_id>/`
|
|
12
|
+
- simulate coder retries and near misses
|
|
13
|
+
- compare terminal claims against observation truth and success criteria
|
|
14
|
+
- verify that readiness state transitions happen before and after coder attempts at the right times
|
|
15
|
+
- verify monotonic revision bumps on top-level artifacts when framing, observation, or supervision state changes
|
|
16
|
+
- inspect `promotion_handoff.json` when work exits to `idea` or `insight`
|
|
17
|
+
- read attempt-scoped verifier artifacts without requiring them to be inlined into `iterator_run.json`
|
|
18
|
+
- assert deterministic attempt ordering via ordinal ids like `attempt-001`
|
|
19
|
+
- parse a shared verifier-artifact envelope before descending into verifier-specific payloads
|
|
20
|
+
|
|
21
|
+
## Minimum fixture families
|
|
22
|
+
|
|
23
|
+
- reproducible error fix
|
|
24
|
+
- targeted improvement with a bounded failing seam
|
|
25
|
+
- ambiguous request needing framing discipline
|
|
26
|
+
- non-confirmed issue requiring honest blockage
|
|
27
|
+
- broader request that must promote to `idea`
|
|
28
|
+
- read-only request that must route to `insight`
|
|
29
|
+
|
|
30
|
+
## Success condition
|
|
31
|
+
|
|
32
|
+
The harness is good when it can catch false-positive completions, scope drift, missing-artifact shortcuts, incorrect iterator state transitions, missing promotion linkage, broken artifact revision discipline, non-monotonic attempt ids, and verifier outputs that skip the shared normalization envelope.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Prompt deferred
|
|
2
|
+
|
|
3
|
+
Prompt authoring for the iterate lane is intentionally deferred.
|
|
4
|
+
|
|
5
|
+
Prompt work should start only after:
|
|
6
|
+
1. objectives and requirements are accepted
|
|
7
|
+
2. evals are accepted
|
|
8
|
+
3. tools and boundaries are accepted
|
|
9
|
+
4. harness and playground expectations are accepted
|
|
10
|
+
|
|
11
|
+
Until then, prompt placeholders are acceptable, but real prompt content is out of scope for this directory pass.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Coder, objectives and requirements
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
|
|
5
|
+
Implement the scoped delta described by the task and observation artifacts, then report the attempt honestly.
|
|
6
|
+
|
|
7
|
+
## Requirements
|
|
8
|
+
|
|
9
|
+
- stay inside the scoped task unless Iterator explicitly broadens it
|
|
10
|
+
- treat framing and observation artifacts as the governing contract
|
|
11
|
+
- implement the smallest change that can satisfy the green condition
|
|
12
|
+
- run the narrowest valid verification seam
|
|
13
|
+
- report what changed, what passed, what failed, and what remains blocked
|
|
14
|
+
- support repair-specific retries when Iterator respawns with tighter context
|
|
15
|
+
|
|
16
|
+
## Derived non-goals
|
|
17
|
+
|
|
18
|
+
- do not redefine the task contract
|
|
19
|
+
- do not broaden scope for opportunistic cleanup
|
|
20
|
+
- do not self-certify final completion
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Coder evals
|
|
2
|
+
|
|
3
|
+
- fixes a reproducible error without unrelated drift
|
|
4
|
+
- satisfies a targeted improvement seam with a minimal change
|
|
5
|
+
- reports partial progress honestly after a failed first pass
|
|
6
|
+
- stays within the scoped files or surfaces when the task is narrow
|
|
7
|
+
- provides actionable blocker detail when safe completion is impossible
|
|
8
|
+
- improves on a second attempt when respawned with repair context
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Coder tools and boundaries
|
|
2
|
+
|
|
3
|
+
## Needed capabilities
|
|
4
|
+
|
|
5
|
+
- read task and observation artifacts
|
|
6
|
+
- inspect and modify scoped project files
|
|
7
|
+
- run narrow verification commands or checks
|
|
8
|
+
- write attempt summaries into the iterator run record
|
|
9
|
+
|
|
10
|
+
## Boundary rules
|
|
11
|
+
|
|
12
|
+
- should not redefine task scope or user intent
|
|
13
|
+
- should not convert an observation gap into a fake success claim
|
|
14
|
+
- should not claim final victory without Iterator validation
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Coder harness and playground
|
|
2
|
+
|
|
3
|
+
The harness should inspect:
|
|
4
|
+
- whether the implementation stayed in scope
|
|
5
|
+
- whether the chosen verification seam was narrow and relevant
|
|
6
|
+
- whether the attempt report is honest about passes, failures, and blockers
|
|
7
|
+
- whether repair retries materially improve alignment
|
|
8
|
+
|
|
9
|
+
Key fixtures:
|
|
10
|
+
- reproducible fix succeeds in one pass
|
|
11
|
+
- first pass fails, second pass succeeds
|
|
12
|
+
- safe completion is impossible and the blocker is reported clearly
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Framer, objectives and requirements
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
|
|
5
|
+
Turn the user's messy request and relevant context into a bounded iterate task artifact that another role can safely act on.
|
|
6
|
+
|
|
7
|
+
## Requirements
|
|
8
|
+
|
|
9
|
+
- classify the request as error fix, quick change, or targeted improvement
|
|
10
|
+
- extract the most likely surface, route, file, component, or function hints when available
|
|
11
|
+
- distinguish current behavior from desired behavior
|
|
12
|
+
- write observable success criteria
|
|
13
|
+
- separate facts, assumptions, blocking unknowns, and nonblocking unknowns
|
|
14
|
+
- recommend stay iterate, investigate first, or promote to idea
|
|
15
|
+
|
|
16
|
+
## Derived non-goals
|
|
17
|
+
|
|
18
|
+
- do not perform observation work
|
|
19
|
+
- do not code
|
|
20
|
+
- do not broaden the task to make it sound more important
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Framer evals
|
|
2
|
+
|
|
3
|
+
- turns a messy fix request into a bounded task artifact
|
|
4
|
+
- preserves partial location hints instead of discarding them
|
|
5
|
+
- keeps a tiny request small rather than inflating it
|
|
6
|
+
- marks blocking unknowns honestly when the ask is underspecified
|
|
7
|
+
- distinguishes current behavior and desired behavior clearly
|
|
8
|
+
- recommends promotion when the request is no longer task-scale
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Framer tools and boundaries
|
|
2
|
+
|
|
3
|
+
## Needed capabilities
|
|
4
|
+
|
|
5
|
+
- read conversation context and project hints
|
|
6
|
+
- inspect lightweight repo clues when necessary to localize the surface
|
|
7
|
+
- write the task artifact
|
|
8
|
+
|
|
9
|
+
## Boundary rules
|
|
10
|
+
|
|
11
|
+
- should not collect evidence that belongs in observation truth
|
|
12
|
+
- should not run implementation changes
|
|
13
|
+
- should not hide uncertainty behind overconfident framing prose
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Framer harness and playground
|
|
2
|
+
|
|
3
|
+
The harness should inspect whether Framer outputs:
|
|
4
|
+
- a coherent task type
|
|
5
|
+
- a clear current versus desired behavior split
|
|
6
|
+
- observable success criteria
|
|
7
|
+
- explicit unknowns and promotion guidance
|
|
8
|
+
|
|
9
|
+
Key fixtures:
|
|
10
|
+
- messy error report with partial location
|
|
11
|
+
- tiny copy or behavior tweak
|
|
12
|
+
- underspecified but repairable request
|
devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Iterator, objectives and requirements
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
|
|
5
|
+
Own the iterate lane end to end and return one truthful outcome:
|
|
6
|
+
- completed
|
|
7
|
+
- blocked
|
|
8
|
+
- needs more context
|
|
9
|
+
- promote to idea
|
|
10
|
+
- route to insight
|
|
11
|
+
|
|
12
|
+
## Requirements
|
|
13
|
+
|
|
14
|
+
- synthesize Framer and Observer artifacts without collapsing their roles
|
|
15
|
+
- decide coding readiness before spawning Coder
|
|
16
|
+
- keep scope aligned to the original task contract
|
|
17
|
+
- issue repair-specific respawns when the first coding pass is close but incomplete
|
|
18
|
+
- refuse premature completion
|
|
19
|
+
- author the final lane verdict and rationale
|
|
20
|
+
|
|
21
|
+
## Derived non-goals
|
|
22
|
+
|
|
23
|
+
- do not serve as the primary coding worker
|
|
24
|
+
- do not hand-wave missing observation truth
|
|
25
|
+
- do not bury promotions or blockers inside optimistic language
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# Iterator evals
|
|
2
|
+
|
|
3
|
+
- declines to spawn Coder when task artifact is vague
|
|
4
|
+
- declines to spawn Coder when observation artifact is inconclusive for a required repro
|
|
5
|
+
- respawns Coder with a precise repair reason after a near miss
|
|
6
|
+
- refuses completion when the green condition is still red
|
|
7
|
+
- refuses completion when success criteria are only partially satisfied
|
|
8
|
+
- promotes to `idea` when scope expansion becomes necessary
|
|
9
|
+
- returns a blocked verdict when safe progress depends on missing evidence
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Iterator tools and boundaries
|
|
2
|
+
|
|
3
|
+
## Needed capabilities
|
|
4
|
+
|
|
5
|
+
- read framing, observation, and attempt artifacts
|
|
6
|
+
- supervise worker attempts
|
|
7
|
+
- run or request final narrow verification checks
|
|
8
|
+
- write final disposition and respawn rationale
|
|
9
|
+
|
|
10
|
+
## Boundary rules
|
|
11
|
+
|
|
12
|
+
- may coordinate all agents, but should not absorb coding as the default path
|
|
13
|
+
- may inspect verification evidence, but should not invent it
|
|
14
|
+
- may revise task handling, but should not casually overwrite Framer or Observer truth without explanation
|