devflow-engine 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devflow_engine/__init__.py +3 -0
- devflow_engine/agentic_prompts.py +100 -0
- devflow_engine/agentic_runtime.py +398 -0
- devflow_engine/api_key_flow_harness.py +539 -0
- devflow_engine/api_keys.py +357 -0
- devflow_engine/bootstrap/__init__.py +2 -0
- devflow_engine/bootstrap/provision_from_template.py +84 -0
- devflow_engine/cli/__init__.py +0 -0
- devflow_engine/cli/app.py +7270 -0
- devflow_engine/core/__init__.py +0 -0
- devflow_engine/core/config.py +86 -0
- devflow_engine/core/logging.py +29 -0
- devflow_engine/core/paths.py +45 -0
- devflow_engine/core/toml_kv.py +33 -0
- devflow_engine/devflow_event_worker.py +1292 -0
- devflow_engine/devflow_state.py +201 -0
- devflow_engine/devin2/__init__.py +9 -0
- devflow_engine/devin2/agent_definition.py +120 -0
- devflow_engine/devin2/pi_runner.py +204 -0
- devflow_engine/devin_orchestration.py +69 -0
- devflow_engine/docs/prompts/anti-patterns.md +42 -0
- devflow_engine/docs/prompts/devin-agent-prompt.md +55 -0
- devflow_engine/docs/prompts/devin2-agent-prompt.md +81 -0
- devflow_engine/docs/prompts/examples/devin-vapi-clone-reference-exchange.json +85 -0
- devflow_engine/doctor/__init__.py +2 -0
- devflow_engine/doctor/triage.py +140 -0
- devflow_engine/error/__init__.py +0 -0
- devflow_engine/error/remediation.py +21 -0
- devflow_engine/errors/error_solver_dag.py +522 -0
- devflow_engine/errors/runtime_observability.py +67 -0
- devflow_engine/idea/__init__.py +4 -0
- devflow_engine/idea/actors.py +481 -0
- devflow_engine/idea/agentic.py +465 -0
- devflow_engine/idea/analyze.py +93 -0
- devflow_engine/idea/devin_chat_dag.py +1 -0
- devflow_engine/idea/diff.py +99 -0
- devflow_engine/idea/drafts.py +446 -0
- devflow_engine/idea/idea_creation_dag.py +643 -0
- devflow_engine/idea/ideation_enrichment.py +355 -0
- devflow_engine/idea/ideation_enrichment_worker.py +19 -0
- devflow_engine/idea/paths.py +28 -0
- devflow_engine/idea/promote.py +53 -0
- devflow_engine/idea/redaction.py +27 -0
- devflow_engine/idea/repo_tools.py +1277 -0
- devflow_engine/idea/response_mode.py +30 -0
- devflow_engine/idea/story_pipeline.py +1585 -0
- devflow_engine/idea/sufficiency.py +376 -0
- devflow_engine/idea/traditional_stories.py +1257 -0
- devflow_engine/implementation/__init__.py +0 -0
- devflow_engine/implementation/alembic_preflight.py +700 -0
- devflow_engine/implementation/dag.py +8450 -0
- devflow_engine/implementation/green_gate.py +93 -0
- devflow_engine/implementation/prompts.py +108 -0
- devflow_engine/implementation/test_runtime.py +623 -0
- devflow_engine/integration/__init__.py +19 -0
- devflow_engine/integration/agentic.py +66 -0
- devflow_engine/integration/dag.py +3539 -0
- devflow_engine/integration/prompts.py +114 -0
- devflow_engine/integration/supabase_schema.sql +31 -0
- devflow_engine/integration/supabase_sync.py +177 -0
- devflow_engine/llm/__init__.py +1 -0
- devflow_engine/llm/cli_one_shot.py +84 -0
- devflow_engine/llm/cli_stream.py +371 -0
- devflow_engine/llm/execution_context.py +26 -0
- devflow_engine/llm/invoke.py +1322 -0
- devflow_engine/llm/provider_api.py +304 -0
- devflow_engine/llm/repo_knowledge.py +588 -0
- devflow_engine/llm_primitives.py +315 -0
- devflow_engine/orchestration.py +62 -0
- devflow_engine/planning/__init__.py +0 -0
- devflow_engine/planning/analyze_repo.py +92 -0
- devflow_engine/planning/render_drafts.py +133 -0
- devflow_engine/playground/__init__.py +0 -0
- devflow_engine/playground/hooks.py +26 -0
- devflow_engine/playwright_workflow/__init__.py +5 -0
- devflow_engine/playwright_workflow/dag.py +1317 -0
- devflow_engine/process/__init__.py +5 -0
- devflow_engine/process/dag.py +59 -0
- devflow_engine/project_registration/__init__.py +3 -0
- devflow_engine/project_registration/dag.py +1581 -0
- devflow_engine/project_registry.py +109 -0
- devflow_engine/prompts/devin/generic/prompt.md +6 -0
- devflow_engine/prompts/devin/ideation/prompt.md +263 -0
- devflow_engine/prompts/devin/ideation/scenarios.md +5 -0
- devflow_engine/prompts/devin/ideation_loop/prompt.md +6 -0
- devflow_engine/prompts/devin/insight/prompt.md +11 -0
- devflow_engine/prompts/devin/insight/scenarios.md +5 -0
- devflow_engine/prompts/devin/intake/prompt.md +15 -0
- devflow_engine/prompts/devin/iterate/prompt.md +12 -0
- devflow_engine/prompts/devin/shared/eval_doctrine.md +9 -0
- devflow_engine/prompts/devin/shared/principles.md +246 -0
- devflow_engine/prompts/devin_eval/assessment/prompt.md +18 -0
- devflow_engine/prompts/idea/api_ideation_agent/prompt.md +8 -0
- devflow_engine/prompts/idea/api_insight_agent/prompt.md +8 -0
- devflow_engine/prompts/idea/response_doctrine/prompt.md +18 -0
- devflow_engine/prompts/implementation/dependency_assessment/prompt.md +12 -0
- devflow_engine/prompts/implementation/green/green/prompt.md +11 -0
- devflow_engine/prompts/implementation/green/node_config/prompt.md +3 -0
- devflow_engine/prompts/implementation/green_review/outcome_review/prompt.md +5 -0
- devflow_engine/prompts/implementation/green_review/prior_run_review/prompt.md +5 -0
- devflow_engine/prompts/implementation/red/prompt.md +27 -0
- devflow_engine/prompts/implementation/redreview/prompt.md +23 -0
- devflow_engine/prompts/implementation/redreview_repair/prompt.md +16 -0
- devflow_engine/prompts/implementation/setupdoc/prompt.md +10 -0
- devflow_engine/prompts/implementation/story_planning/prompt.md +13 -0
- devflow_engine/prompts/implementation/test_design/prompt.md +27 -0
- devflow_engine/prompts/integration/README.md +185 -0
- devflow_engine/prompts/integration/green/example.md +67 -0
- devflow_engine/prompts/integration/green/green/prompt.md +10 -0
- devflow_engine/prompts/integration/green/node_config/prompt.md +42 -0
- devflow_engine/prompts/integration/green/past_prompts/20260417T212300/green/prompt.md +15 -0
- devflow_engine/prompts/integration/green/past_prompts/20260417T212300/node_config/prompt.md +42 -0
- devflow_engine/prompts/integration/green_enrich/example.md +79 -0
- devflow_engine/prompts/integration/green_enrich/green_enrich/prompt.md +9 -0
- devflow_engine/prompts/integration/green_enrich/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/green_enrich/prompt.md +14 -0
- devflow_engine/prompts/integration/green_enrich/past_prompts/20260417T212300/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red/code_repair/prompt.md +12 -0
- devflow_engine/prompts/integration/red/example.md +152 -0
- devflow_engine/prompts/integration/red/node_config/prompt.md +86 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/node_config/prompt.md +84 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red/prompt.md +16 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T212300/red_repair/prompt.md +15 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/node_config/prompt.md +84 -0
- devflow_engine/prompts/integration/red/past_prompts/20260417T215032/red_repair/prompt.md +11 -0
- devflow_engine/prompts/integration/red/red/prompt.md +11 -0
- devflow_engine/prompts/integration/red/red_repair/prompt.md +12 -0
- devflow_engine/prompts/integration/red_review/example.md +71 -0
- devflow_engine/prompts/integration/red_review/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/node_config/prompt.md +41 -0
- devflow_engine/prompts/integration/red_review/past_prompts/20260417T212300/red_review/prompt.md +15 -0
- devflow_engine/prompts/integration/red_review/red_review/prompt.md +9 -0
- devflow_engine/prompts/integration/resolve/example.md +111 -0
- devflow_engine/prompts/integration/resolve/node_config/prompt.md +64 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/node_config/prompt.md +64 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_implicated_users/prompt.md +15 -0
- devflow_engine/prompts/integration/resolve/past_prompts/20260417T212300/resolve_side_effects/prompt.md +15 -0
- devflow_engine/prompts/integration/resolve/resolve_implicated_users/prompt.md +10 -0
- devflow_engine/prompts/integration/resolve/resolve_side_effects/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/build_idea_acceptance_coverage/prompt.md +12 -0
- devflow_engine/prompts/integration/validate/code_repair/prompt.md +13 -0
- devflow_engine/prompts/integration/validate/example.md +143 -0
- devflow_engine/prompts/integration/validate/node_config/prompt.md +87 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/code_repair/prompt.md +19 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/node_config/prompt.md +67 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_enrich_gate/prompt.md +17 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T212300/validate_repair/prompt.md +16 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/code_repair/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/node_config/prompt.md +67 -0
- devflow_engine/prompts/integration/validate/past_prompts/20260417T215032/validate_repair/prompt.md +9 -0
- devflow_engine/prompts/integration/validate/validate_enrich_gate/prompt.md +10 -0
- devflow_engine/prompts/integration/validate/validate_repair/prompt.md +20 -0
- devflow_engine/prompts/integration/write_workflows/example.md +100 -0
- devflow_engine/prompts/integration/write_workflows/node_config/prompt.md +44 -0
- devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/node_config/prompt.md +44 -0
- devflow_engine/prompts/integration/write_workflows/past_prompts/20260417T212300/write_workflows/prompt.md +17 -0
- devflow_engine/prompts/integration/write_workflows/write_workflows/prompt.md +11 -0
- devflow_engine/prompts/iterate/README.md +7 -0
- devflow_engine/prompts/iterate/coder/prompt.md +11 -0
- devflow_engine/prompts/iterate/framer/prompt.md +11 -0
- devflow_engine/prompts/iterate/iterator/prompt.md +13 -0
- devflow_engine/prompts/iterate/observer/prompt.md +11 -0
- devflow_engine/prompts/recovery/diagnosis/prompt.md +7 -0
- devflow_engine/prompts/recovery/execution/prompt.md +8 -0
- devflow_engine/prompts/recovery/execution_verification/prompt.md +7 -0
- devflow_engine/prompts/recovery/failure_investigation/prompt.md +10 -0
- devflow_engine/prompts/recovery/preflight_health_repo_repair/prompt.md +8 -0
- devflow_engine/prompts/recovery/remediation_execution/prompt.md +11 -0
- devflow_engine/prompts/recovery/root_cause_investigation/prompt.md +12 -0
- devflow_engine/prompts/scope_idea/doctrine/prompt.md +7 -0
- devflow_engine/prompts/source_doc_eval/document/prompt.md +6 -0
- devflow_engine/prompts/source_doc_eval/targeted_mutation/prompt.md +9 -0
- devflow_engine/prompts/source_doc_mutation/domain_entities/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/product_brief/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/project_doc_coherence/prompt.md +7 -0
- devflow_engine/prompts/source_doc_mutation/project_doc_render/prompt.md +9 -0
- devflow_engine/prompts/source_doc_mutation/source_doc_coherence/prompt.md +5 -0
- devflow_engine/prompts/source_doc_mutation/source_doc_enrichment_coherence/prompt.md +6 -0
- devflow_engine/prompts/source_doc_mutation/user_workflows/prompt.md +6 -0
- devflow_engine/prompts/source_scope/doctrine/prompt.md +10 -0
- devflow_engine/prompts/ui_grounding/doctrine/prompt.md +7 -0
- devflow_engine/recovery/__init__.py +3 -0
- devflow_engine/recovery/dag.py +2609 -0
- devflow_engine/recovery/models.py +220 -0
- devflow_engine/refactor.py +93 -0
- devflow_engine/registry/__init__.py +1 -0
- devflow_engine/registry/cards.py +238 -0
- devflow_engine/registry/domain_normalize.py +60 -0
- devflow_engine/registry/effects.py +65 -0
- devflow_engine/registry/enforce_report.py +150 -0
- devflow_engine/registry/module_cards_classify.py +164 -0
- devflow_engine/registry/module_cards_draft.py +184 -0
- devflow_engine/registry/module_cards_gate.py +59 -0
- devflow_engine/registry/packages.py +347 -0
- devflow_engine/registry/pathways.py +323 -0
- devflow_engine/review/__init__.py +11 -0
- devflow_engine/review/dag.py +588 -0
- devflow_engine/review/review_story.py +67 -0
- devflow_engine/scope_idea/__init__.py +3 -0
- devflow_engine/scope_idea/agentic.py +39 -0
- devflow_engine/scope_idea/dag.py +1069 -0
- devflow_engine/scope_idea/models.py +175 -0
- devflow_engine/skills/builtins/devflow/queue_failure_investigation/SKILL.md +112 -0
- devflow_engine/skills/builtins/devflow/queue_idea_to_story/SKILL.md +120 -0
- devflow_engine/skills/builtins/devflow/queue_integration/SKILL.md +105 -0
- devflow_engine/skills/builtins/devflow/queue_recovery/SKILL.md +108 -0
- devflow_engine/skills/builtins/devflow/queue_runtime_core/SKILL.md +155 -0
- devflow_engine/skills/builtins/devflow/queue_story_implementation/SKILL.md +122 -0
- devflow_engine/skills/builtins/devin/idea_to_story_handoff/SKILL.md +120 -0
- devflow_engine/skills/builtins/devin/ideation/SKILL.md +168 -0
- devflow_engine/skills/builtins/devin/ideation/state-and-phrasing-reference.md +18 -0
- devflow_engine/skills/builtins/devin/insight/SKILL.md +22 -0
- devflow_engine/skills/registry.example.yaml +42 -0
- devflow_engine/source_doc_assumptions.py +291 -0
- devflow_engine/source_doc_mutation_dag.py +1606 -0
- devflow_engine/source_doc_mutation_eval.py +417 -0
- devflow_engine/source_doc_mutation_worker.py +25 -0
- devflow_engine/source_docs_schema.py +207 -0
- devflow_engine/source_docs_updater.py +309 -0
- devflow_engine/source_scope/__init__.py +15 -0
- devflow_engine/source_scope/agentic.py +45 -0
- devflow_engine/source_scope/dag.py +1626 -0
- devflow_engine/source_scope/models.py +177 -0
- devflow_engine/stores/__init__.py +0 -0
- devflow_engine/stores/execution_store.py +3534 -0
- devflow_engine/story/__init__.py +0 -0
- devflow_engine/story/contracts.py +160 -0
- devflow_engine/story/discovery.py +47 -0
- devflow_engine/story/evidence.py +118 -0
- devflow_engine/story/hashing.py +27 -0
- devflow_engine/story/implemented_queue_purge.py +148 -0
- devflow_engine/story/indexer.py +105 -0
- devflow_engine/story/io.py +20 -0
- devflow_engine/story/markdown_contracts.py +298 -0
- devflow_engine/story/reconciliation.py +408 -0
- devflow_engine/story/validate_stories.py +149 -0
- devflow_engine/story/validate_tests_story.py +512 -0
- devflow_engine/story/validation.py +133 -0
- devflow_engine/ui_grounding/__init__.py +11 -0
- devflow_engine/ui_grounding/agentic.py +31 -0
- devflow_engine/ui_grounding/dag.py +874 -0
- devflow_engine/ui_grounding/models.py +224 -0
- devflow_engine/ui_grounding/pencil_bridge.py +247 -0
- devflow_engine/vendor/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/__init__.py +11 -0
- devflow_engine/vendor/datalumina_genai/core/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/core/exceptions.py +9 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/__init__.py +0 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/agent.py +48 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/agent_streaming_node.py +26 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/base.py +89 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/concurrent.py +30 -0
- devflow_engine/vendor/datalumina_genai/core/nodes/router.py +69 -0
- devflow_engine/vendor/datalumina_genai/core/schema.py +72 -0
- devflow_engine/vendor/datalumina_genai/core/task.py +52 -0
- devflow_engine/vendor/datalumina_genai/core/validate.py +139 -0
- devflow_engine/vendor/datalumina_genai/core/workflow.py +200 -0
- devflow_engine/worker.py +1086 -0
- devflow_engine/worker_guard.py +233 -0
- devflow_engine-1.0.0.dist-info/METADATA +235 -0
- devflow_engine-1.0.0.dist-info/RECORD +393 -0
- devflow_engine-1.0.0.dist-info/WHEEL +4 -0
- devflow_engine-1.0.0.dist-info/entry_points.txt +3 -0
- devin/__init__.py +6 -0
- devin/dag.py +58 -0
- devin/dag_two_arm.py +138 -0
- devin/devin_chat_scenario_catalog.json +588 -0
- devin/devin_eval.py +677 -0
- devin/nodes/__init__.py +0 -0
- devin/nodes/ideation/__init__.py +0 -0
- devin/nodes/ideation/node.py +195 -0
- devin/nodes/ideation/playground.py +267 -0
- devin/nodes/ideation/prompt.md +65 -0
- devin/nodes/ideation/scenarios/continue_refinement.py +13 -0
- devin/nodes/ideation/scenarios/continue_refinement_evals.py +18 -0
- devin/nodes/ideation/scenarios/idea_fits_existing_patterns.py +17 -0
- devin/nodes/ideation/scenarios/idea_fits_existing_patterns_evals.py +16 -0
- devin/nodes/ideation/scenarios/large_idea_split.py +4 -0
- devin/nodes/ideation/scenarios/large_idea_split_evals.py +17 -0
- devin/nodes/ideation/scenarios/source_documentation_added.py +4 -0
- devin/nodes/ideation/scenarios/source_documentation_added_evals.py +16 -0
- devin/nodes/ideation/scenarios/user_says_create_it.py +30 -0
- devin/nodes/ideation/scenarios/user_says_create_it_evals.py +23 -0
- devin/nodes/ideation/scenarios/vague_idea.py +16 -0
- devin/nodes/ideation/scenarios/vague_idea_evals.py +47 -0
- devin/nodes/ideation/tools.json +312 -0
- devin/nodes/insight/__init__.py +0 -0
- devin/nodes/insight/node.py +49 -0
- devin/nodes/insight/playground.py +154 -0
- devin/nodes/insight/prompt.md +61 -0
- devin/nodes/insight/scenarios/architecture_pattern_query.py +15 -0
- devin/nodes/insight/scenarios/architecture_pattern_query_evals.py +25 -0
- devin/nodes/insight/scenarios/codebase_exploration.py +15 -0
- devin/nodes/insight/scenarios/codebase_exploration_evals.py +23 -0
- devin/nodes/insight/scenarios/devin_ideation_routing.py +19 -0
- devin/nodes/insight/scenarios/devin_ideation_routing_evals.py +39 -0
- devin/nodes/insight/scenarios/devin_insight_routing.py +20 -0
- devin/nodes/insight/scenarios/devin_insight_routing_evals.py +40 -0
- devin/nodes/insight/scenarios/operational_debugging.py +15 -0
- devin/nodes/insight/scenarios/operational_debugging_evals.py +23 -0
- devin/nodes/insight/scenarios/operational_question.py +9 -0
- devin/nodes/insight/scenarios/operational_question_evals.py +8 -0
- devin/nodes/insight/scenarios/queue_status.py +15 -0
- devin/nodes/insight/scenarios/queue_status_evals.py +23 -0
- devin/nodes/insight/scenarios/source_doc_explanation.py +14 -0
- devin/nodes/insight/scenarios/source_doc_explanation_evals.py +21 -0
- devin/nodes/insight/scenarios/worker_state_check.py +15 -0
- devin/nodes/insight/scenarios/worker_state_check_evals.py +22 -0
- devin/nodes/insight/tools.json +126 -0
- devin/nodes/intake/__init__.py +0 -0
- devin/nodes/intake/node.py +27 -0
- devin/nodes/intake/playground.py +47 -0
- devin/nodes/intake/prompt.md +12 -0
- devin/nodes/intake/scenarios/ideation_routing.py +4 -0
- devin/nodes/intake/scenarios/ideation_routing_evals.py +5 -0
- devin/nodes/intake/scenarios/insight_routing.py +4 -0
- devin/nodes/intake/scenarios/insight_routing_evals.py +5 -0
- devin/nodes/iterate/README.md +44 -0
- devin/nodes/iterate/__init__.py +1 -0
- devin/nodes/iterate/_archived_design_stages/01-objectives-requirements.md +112 -0
- devin/nodes/iterate/_archived_design_stages/02-evals.md +131 -0
- devin/nodes/iterate/_archived_design_stages/03-tools-and-boundaries.md +110 -0
- devin/nodes/iterate/_archived_design_stages/04-harness-and-playground.md +32 -0
- devin/nodes/iterate/_archived_design_stages/05-prompt-deferred.md +11 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/coder_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/03-tools-and-boundaries.md +13 -0
- devin/nodes/iterate/_archived_design_stages/framer_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/01-objectives-requirements.md +25 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/02-evals.md +9 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md +12 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md +20 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/02-evals.md +8 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/03-tools-and-boundaries.md +14 -0
- devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md +13 -0
- devin/nodes/iterate/agent-roles.md +89 -0
- devin/nodes/iterate/agents/README.md +10 -0
- devin/nodes/iterate/artifacts.md +504 -0
- devin/nodes/iterate/contract.md +100 -0
- devin/nodes/iterate/eval-plan.md +74 -0
- devin/nodes/iterate/node.py +100 -0
- devin/nodes/iterate/pipeline/README.md +13 -0
- devin/nodes/iterate/playground-contract.md +76 -0
- devin/nodes/iterate/prompt.md +11 -0
- devin/nodes/iterate/scenarios/README.md +38 -0
- devin/nodes/iterate/scenarios/artifact-and-loop-scenarios.md +101 -0
- devin/nodes/iterate/scenarios/coder_artifact_alignment.py +32 -0
- devin/nodes/iterate/scenarios/coder_artifact_alignment_evals.py +45 -0
- devin/nodes/iterate/scenarios/coder_bounded_fix.py +27 -0
- devin/nodes/iterate/scenarios/coder_bounded_fix_evals.py +45 -0
- devin/nodes/iterate/scenarios/devin_iterate_routing.py +21 -0
- devin/nodes/iterate/scenarios/devin_iterate_routing_evals.py +36 -0
- devin/nodes/iterate/scenarios/framer_scope_boundary.py +25 -0
- devin/nodes/iterate/scenarios/framer_scope_boundary_evals.py +57 -0
- devin/nodes/iterate/scenarios/framer_task_framing.py +25 -0
- devin/nodes/iterate/scenarios/framer_task_framing_evals.py +58 -0
- devin/nodes/iterate/scenarios/iterate_error_fix.py +21 -0
- devin/nodes/iterate/scenarios/iterate_error_fix_evals.py +39 -0
- devin/nodes/iterate/scenarios/iterate_quick_change.py +21 -0
- devin/nodes/iterate/scenarios/iterate_quick_change_evals.py +35 -0
- devin/nodes/iterate/scenarios/iterate_to_idea_promotion.py +23 -0
- devin/nodes/iterate/scenarios/iterate_to_idea_promotion_evals.py +53 -0
- devin/nodes/iterate/scenarios/iterate_to_insight_reroute.py +23 -0
- devin/nodes/iterate/scenarios/iterate_to_insight_reroute_evals.py +53 -0
- devin/nodes/iterate/scenarios/observer_evidence_seam.py +28 -0
- devin/nodes/iterate/scenarios/observer_evidence_seam_evals.py +55 -0
- devin/nodes/iterate/scenarios/observer_repro_creation.py +28 -0
- devin/nodes/iterate/scenarios/observer_repro_creation_evals.py +45 -0
- devin/nodes/iterate/scenarios/routing-matrix.md +45 -0
- devin/nodes/shared/__init__.py +0 -0
- devin/nodes/shared/filemaker_expert.md +80 -0
- devin/nodes/shared/filemaker_expert.py +354 -0
- devin/nodes/shared/filemaker_expert_eval/runner.py +176 -0
- devin/nodes/shared/filemaker_expert_eval/scenarios.json +65 -0
- devin/nodes/shared/goldilocks_advisor_eval/runner.py +214 -0
- devin/nodes/shared/goldilocks_advisor_eval/scenarios.json +58 -0
- devin/nodes/shared/helpers.py +156 -0
- devin/nodes/shared/idea_compliance_advisor_eval/runner.py +252 -0
- devin/nodes/shared/idea_compliance_advisor_eval/scenarios.json +75 -0
- devin/nodes/shared/models.py +44 -0
- devin/nodes/shared/post.py +40 -0
- devin/nodes/shared/router.py +107 -0
- devin/nodes/shared/tools.py +191 -0
- devin/shared/devin-chat-rubric.md +237 -0
- devin/shared/devin-chat-scenario-suite.md +90 -0
- devin/shared/eval_doctrine.md +9 -0
devin/nodes/iterate/_archived_design_stages/iterator_agent_design/04-harness-and-playground.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Iterator harness and playground
|
|
2
|
+
|
|
3
|
+
The harness should be able to inspect:
|
|
4
|
+
- readiness decisions
|
|
5
|
+
- respawn decisions
|
|
6
|
+
- terminal verdict selection
|
|
7
|
+
- consistency between verdict, artifacts, and observed verification state
|
|
8
|
+
|
|
9
|
+
Key fixtures:
|
|
10
|
+
- near miss then repair success
|
|
11
|
+
- missing truth blocks coding
|
|
12
|
+
- unauthorized scope growth forces promotion
|
devin/nodes/iterate/_archived_design_stages/observer_agent_design/01-objectives-requirements.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Observer, objectives and requirements
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
|
|
5
|
+
Convert the framed task into observable truth that can safely govern coding and completion judgment.
|
|
6
|
+
|
|
7
|
+
## Requirements
|
|
8
|
+
|
|
9
|
+
- inspect logs, traces, state, or repro surfaces relevant to the framed task
|
|
10
|
+
- confirm an error when evidence exists
|
|
11
|
+
- report `not_confirmed` or `inconclusive` honestly when it does not
|
|
12
|
+
- define a bounded failing seam for targeted improvements when direct repro is not the right frame
|
|
13
|
+
- document the expected green condition
|
|
14
|
+
- recommend whether the task is ready for Coder
|
|
15
|
+
|
|
16
|
+
## Derived non-goals
|
|
17
|
+
|
|
18
|
+
- do not implement fixes
|
|
19
|
+
- do not invent evidence
|
|
20
|
+
- do not claim completion
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Observer evals
|
|
2
|
+
|
|
3
|
+
- confirms a reported failure when logs or repro evidence support it
|
|
4
|
+
- reports `not_confirmed` honestly when the issue cannot be reproduced
|
|
5
|
+
- creates a bounded red seam for a targeted improvement
|
|
6
|
+
- provides repro steps another role can execute
|
|
7
|
+
- records the expected green condition clearly
|
|
8
|
+
- returns `needs_more_context` when truth is genuinely insufficient
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Observer tools and boundaries
|
|
2
|
+
|
|
3
|
+
## Needed capabilities
|
|
4
|
+
|
|
5
|
+
- inspect logs and traces
|
|
6
|
+
- run bounded repro checks
|
|
7
|
+
- inspect relevant repo or runtime surfaces needed to define a failing seam
|
|
8
|
+
- write the observation artifact
|
|
9
|
+
|
|
10
|
+
## Boundary rules
|
|
11
|
+
|
|
12
|
+
- should not silently patch code while observing
|
|
13
|
+
- should not blur evidence with assumptions
|
|
14
|
+
- should not escalate weak signals into false certainty
|
devin/nodes/iterate/_archived_design_stages/observer_agent_design/04-harness-and-playground.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Observer harness and playground
|
|
2
|
+
|
|
3
|
+
The harness should inspect:
|
|
4
|
+
- evidence summaries
|
|
5
|
+
- repro steps
|
|
6
|
+
- repeatability status
|
|
7
|
+
- green-condition definition
|
|
8
|
+
- readiness verdict
|
|
9
|
+
|
|
10
|
+
Key fixtures:
|
|
11
|
+
- error confirmed by logs
|
|
12
|
+
- user-reported error not confirmed
|
|
13
|
+
- targeted improvement with a red seam
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Iterate agent roles and boundaries
|
|
2
|
+
|
|
3
|
+
## System shape
|
|
4
|
+
|
|
5
|
+
The iterate lane has four named agents, but not four equal peers.
|
|
6
|
+
|
|
7
|
+
- `Iterator` is the accountable lane owner
|
|
8
|
+
- `Framer` and `Observer` are specialist advisors
|
|
9
|
+
- `Coder` is a supervised implementation worker
|
|
10
|
+
|
|
11
|
+
That role split should stay visible in every stage of the design pipeline.
|
|
12
|
+
|
|
13
|
+
## Iterator
|
|
14
|
+
|
|
15
|
+
Primary owner of the task.
|
|
16
|
+
|
|
17
|
+
Owns:
|
|
18
|
+
- iterate-lane accountability
|
|
19
|
+
- scope discipline
|
|
20
|
+
- advisor coordination
|
|
21
|
+
- readiness judgment before coding
|
|
22
|
+
- spawning and supervising Coder
|
|
23
|
+
- final validation and completion judgment
|
|
24
|
+
- promotion or blockage decisions
|
|
25
|
+
|
|
26
|
+
Must not:
|
|
27
|
+
- act as the primary code writer
|
|
28
|
+
- redefine Framer or Observer truth casually
|
|
29
|
+
- claim success without verification evidence
|
|
30
|
+
|
|
31
|
+
## Framer
|
|
32
|
+
|
|
33
|
+
Turns raw user text and relevant history into a bounded task artifact.
|
|
34
|
+
|
|
35
|
+
Owns:
|
|
36
|
+
- task typing
|
|
37
|
+
- locating the likely surface
|
|
38
|
+
- separating facts from assumptions
|
|
39
|
+
- writing explicit success criteria
|
|
40
|
+
- identifying blocking versus nonblocking unknowns
|
|
41
|
+
- recommending stay iterate, investigate first, or promote to idea
|
|
42
|
+
|
|
43
|
+
Must not:
|
|
44
|
+
- do observation work that belongs to Observer
|
|
45
|
+
- implement code
|
|
46
|
+
- broaden scope beyond the ask
|
|
47
|
+
|
|
48
|
+
## Observer
|
|
49
|
+
|
|
50
|
+
Converts the framed task into observable truth.
|
|
51
|
+
|
|
52
|
+
Owns:
|
|
53
|
+
- log inspection
|
|
54
|
+
- minimal repro attempts for errors
|
|
55
|
+
- bounded failing seams for improvements
|
|
56
|
+
- repeatability judgment
|
|
57
|
+
- explicit green-condition definition
|
|
58
|
+
- recommendation on coding readiness
|
|
59
|
+
|
|
60
|
+
Must not:
|
|
61
|
+
- silently fix the issue
|
|
62
|
+
- invent evidence
|
|
63
|
+
- claim completion
|
|
64
|
+
|
|
65
|
+
## Coder
|
|
66
|
+
|
|
67
|
+
Supervised implementation worker.
|
|
68
|
+
|
|
69
|
+
Owns:
|
|
70
|
+
- implementing the scoped delta
|
|
71
|
+
- using task and observation artifacts as the contract
|
|
72
|
+
- running the narrowest valid verification seam
|
|
73
|
+
- reporting what changed, what passed, what failed, and what remains blocked
|
|
74
|
+
|
|
75
|
+
Must not:
|
|
76
|
+
- rewrite the task contract
|
|
77
|
+
- broaden scope because cleanup seems appealing
|
|
78
|
+
- self-certify completion without Iterator validation
|
|
79
|
+
|
|
80
|
+
## Ownership rule
|
|
81
|
+
|
|
82
|
+
Iterator owns truth and completion judgment.
|
|
83
|
+
Coder owns implementation attempts.
|
|
84
|
+
Framer and Observer are specialized advisors whose artifacts constrain the coding loop.
|
|
85
|
+
|
|
86
|
+
## Where the detailed design now lives
|
|
87
|
+
|
|
88
|
+
- cross-agent stage docs: `pipeline/`
|
|
89
|
+
- per-agent stage docs: `agents/`
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Per-agent design docs
|
|
2
|
+
|
|
3
|
+
Each agent has its own folder with the same ordered design stages:
|
|
4
|
+
|
|
5
|
+
1. objectives and requirements
|
|
6
|
+
2. evals
|
|
7
|
+
3. tools and boundaries
|
|
8
|
+
4. harness and playground
|
|
9
|
+
|
|
10
|
+
This keeps review aligned with Marcus's pipeline while still making each role inspectable on its own.
|
|
@@ -0,0 +1,504 @@
|
|
|
1
|
+
# Iterate artifact contracts
|
|
2
|
+
|
|
3
|
+
## Why artifacts matter in this lane
|
|
4
|
+
|
|
5
|
+
The iterate lane is not a loose conversation between four peers. It is an advisor-primary loop with one accountable owner.
|
|
6
|
+
|
|
7
|
+
Artifacts are what keep that structure real:
|
|
8
|
+
- `Framer` converts the ask into a bounded task contract
|
|
9
|
+
- `Observer` converts the task into observable truth
|
|
10
|
+
- `Coder` records supervised implementation attempts
|
|
11
|
+
- `Iterator` decides readiness, respawns, blockage, promotion, and final disposition
|
|
12
|
+
|
|
13
|
+
If those judgments are not expressed in shared artifacts, the lane will drift into hidden state and role collapse.
|
|
14
|
+
|
|
15
|
+
## Pipeline position
|
|
16
|
+
|
|
17
|
+
These artifact contracts support Marcus's pipeline order:
|
|
18
|
+
1. objectives and requirements define what truth the lane needs
|
|
19
|
+
2. evals define how weak or dishonest artifacts would fail
|
|
20
|
+
3. tools and boundaries define who can create and modify which artifact
|
|
21
|
+
4. harness and playground later test the artifact flow
|
|
22
|
+
|
|
23
|
+
This file therefore describes the shared contract surface that the four-agent model depends on.
|
|
24
|
+
|
|
25
|
+
## Canonical task primitive
|
|
26
|
+
|
|
27
|
+
`IterateTask` is smaller than a story. It is a targeted change packet for work that still fits iterate.
|
|
28
|
+
|
|
29
|
+
Suggested base shape:
|
|
30
|
+
|
|
31
|
+
```yaml
|
|
32
|
+
IterateTask:
|
|
33
|
+
task_id: string
|
|
34
|
+
project_id: string
|
|
35
|
+
source_message: string
|
|
36
|
+
turn_history: []
|
|
37
|
+
task_type: error_fix | quick_change | targeted_improvement
|
|
38
|
+
where:
|
|
39
|
+
surface: string | null
|
|
40
|
+
route_hint: string | null
|
|
41
|
+
component_hint: string | null
|
|
42
|
+
file_hint: string | null
|
|
43
|
+
function_hint: string | null
|
|
44
|
+
current_behavior: string
|
|
45
|
+
desired_behavior: string
|
|
46
|
+
success_criteria: string[]
|
|
47
|
+
constraints: string[]
|
|
48
|
+
assumptions: string[]
|
|
49
|
+
blocking_unknowns: string[]
|
|
50
|
+
nonblocking_unknowns: string[]
|
|
51
|
+
promotion_recommendation: stay_iterate | investigate_first | promote_to_idea
|
|
52
|
+
expected_user_outcome: string | null
|
|
53
|
+
what_happened: string | null
|
|
54
|
+
log_hint: string | null
|
|
55
|
+
repro_hint: string | null
|
|
56
|
+
task_details:
|
|
57
|
+
error_fix?:
|
|
58
|
+
suspected_failure_mode: string | null
|
|
59
|
+
user_reported_error_text: string | null
|
|
60
|
+
quick_change?:
|
|
61
|
+
requested_delta_summary: string
|
|
62
|
+
acceptance_examples: string[]
|
|
63
|
+
targeted_improvement?:
|
|
64
|
+
target_metric_or_quality: string | null
|
|
65
|
+
bounded_red_seam_hint: string | null
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
The payload above is the business body of the task. Durable artifact files should wrap that body in a small revision envelope rather than inventing a different schema per write.
|
|
69
|
+
## Persisted location decision
|
|
70
|
+
|
|
71
|
+
Iterate artifacts should live under a lane-owned root:
|
|
72
|
+
|
|
73
|
+
```text
|
|
74
|
+
.devflow/iterate/<task_id>/task_artifact.json
|
|
75
|
+
.devflow/iterate/<task_id>/observation_artifact.json
|
|
76
|
+
.devflow/iterate/<task_id>/iterator_run.json
|
|
77
|
+
.devflow/iterate/<task_id>/promotion_handoff.json
|
|
78
|
+
.devflow/iterate/<task_id>/attempts/<attempt_id>/verification_summary.json
|
|
79
|
+
.devflow/iterate/<task_id>/attempts/<attempt_id>/verifier_output.json
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Where `attempt_id` uses the exact format `attempt-<NNN...>`:
|
|
83
|
+
- prefix is always the literal `attempt-`
|
|
84
|
+
- suffix is a zero-padded decimal ordinal starting at `001`
|
|
85
|
+
- the numeric portion is at least 3 digits and may widen past 3 digits only after `999`
|
|
86
|
+
- ids are assigned monotonically within one `task_id` and are never reused
|
|
87
|
+
|
|
88
|
+
Examples:
|
|
89
|
+
- first attempt: `attempt-001`
|
|
90
|
+
- second attempt: `attempt-002`
|
|
91
|
+
- one thousandth attempt: `attempt-1000`
|
|
92
|
+
|
|
93
|
+
This is now the preferred contract, not just a placeholder suggestion.
|
|
94
|
+
|
|
95
|
+
Why this root is the right default:
|
|
96
|
+
- it matches the existing repo convention of lane-specific durable artifacts under `.devflow/<lane>/...`
|
|
97
|
+
- the iterate lane owns a task-scale execution record, not an arbitrary conversation transcript
|
|
98
|
+
- `task_id` can carry or reference session lineage without making artifact lookup conversation-scoped
|
|
99
|
+
- promotion out of iterate should preserve the iterate record as the historical source of the attempted task, rather than relocating it mid-run
|
|
100
|
+
|
|
101
|
+
If cross-lane lineage matters, artifacts should point to upstream or downstream ids in metadata. The storage root should stay stable.
|
|
102
|
+
|
|
103
|
+
## Shared artifact chain
|
|
104
|
+
|
|
105
|
+
The four-agent lane should share this progression:
|
|
106
|
+
1. `Framer` writes or amends `task_artifact`
|
|
107
|
+
2. `Observer` writes `observation_artifact` against that task contract
|
|
108
|
+
3. `Iterator` records a readiness decision in `iterator_run`
|
|
109
|
+
4. `Coder` appends attempt records under `iterator_run`
|
|
110
|
+
5. per-attempt verifier details are stored under `attempts/<attempt_id>/`
|
|
111
|
+
6. `Iterator` records respawn reasons, blockage, promotion, or verified completion
|
|
112
|
+
7. if the lane exits to `idea` or `insight`, `Iterator` also writes `promotion_handoff.json`
|
|
113
|
+
|
|
114
|
+
This progression is the operating contract. It keeps advisors constraining the worker and keeps Iterator accountable for the final truth claim.
|
|
115
|
+
|
|
116
|
+
## Shared revision policy
|
|
117
|
+
|
|
118
|
+
Artifact revisioning should use explicit monotonic revision numbers, not content hashes and not opaque version strings.
|
|
119
|
+
|
|
120
|
+
Recommended envelope for durable top-level iterate artifacts:
|
|
121
|
+
|
|
122
|
+
```yaml
|
|
123
|
+
artifact_envelope:
|
|
124
|
+
artifact_kind: task_artifact | observation_artifact | iterator_run | promotion_handoff
|
|
125
|
+
artifact_id: string
|
|
126
|
+
task_id: string
|
|
127
|
+
revision: integer
|
|
128
|
+
supersedes_revision: integer | null
|
|
129
|
+
updated_at: string
|
|
130
|
+
updated_by: Framer | Observer | Iterator | Coder
|
|
131
|
+
payload: {...}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Policy decisions:
|
|
135
|
+
- `revision` starts at `1` and increments by `1` each time the same top-level artifact file is rewritten
|
|
136
|
+
- `supersedes_revision` is `null` on first write and otherwise points to the immediately prior revision number
|
|
137
|
+
- readers should treat `(artifact_kind, task_id, revision)` as the stable version identity for iterate artifacts
|
|
138
|
+
- `task_artifact.json`, `observation_artifact.json`, `iterator_run.json`, and `promotion_handoff.json` are revised in place using this envelope
|
|
139
|
+
- attempt-scoped files under `attempts/<attempt_id>/` are immutable per attempt by default and should not grow their own revision ladder unless a later design proves that necessary
|
|
140
|
+
|
|
141
|
+
Why this is the right default:
|
|
142
|
+
- monotonic integers are easy for agents, harnesses, and reviewers to compare
|
|
143
|
+
- revision numbers make readiness and promotion decisions auditable without introducing content-addressing complexity
|
|
144
|
+
- immutable attempt artifacts avoid accidental rewrites of verifier evidence
|
|
145
|
+
- the policy matches the lane's need for clear supervision history more than sophisticated storage deduplication
|
|
146
|
+
|
|
147
|
+
## Task artifact
|
|
148
|
+
|
|
149
|
+
### Purpose
|
|
150
|
+
The `task_artifact` defines the bounded ask before coding begins.
|
|
151
|
+
|
|
152
|
+
It should let any reviewer answer:
|
|
153
|
+
- what existing surface is being changed
|
|
154
|
+
- what is happening now
|
|
155
|
+
- what should happen instead
|
|
156
|
+
- how success will be judged
|
|
157
|
+
- what uncertainty still exists
|
|
158
|
+
- whether the task still belongs in iterate
|
|
159
|
+
|
|
160
|
+
### Required base fields
|
|
161
|
+
Inside the `payload` body, the required task fields are:
|
|
162
|
+
- `task_id`
|
|
163
|
+
- `task_type`
|
|
164
|
+
- `project_id`
|
|
165
|
+
- `source_message`
|
|
166
|
+
- `where`
|
|
167
|
+
- `current_behavior`
|
|
168
|
+
- `desired_behavior`
|
|
169
|
+
- `success_criteria`
|
|
170
|
+
- `constraints`
|
|
171
|
+
- `assumptions`
|
|
172
|
+
- `blocking_unknowns`
|
|
173
|
+
- `nonblocking_unknowns`
|
|
174
|
+
- `promotion_recommendation`
|
|
175
|
+
- `task_details`
|
|
176
|
+
|
|
177
|
+
At the envelope level, `task_artifact.json` also requires:
|
|
178
|
+
- `artifact_kind=task_artifact`
|
|
179
|
+
- `artifact_id`
|
|
180
|
+
- `revision`
|
|
181
|
+
- `supersedes_revision`
|
|
182
|
+
- `updated_at`
|
|
183
|
+
- `updated_by=Framer`
|
|
184
|
+
|
|
185
|
+
### Schema strategy decision
|
|
186
|
+
The task contract should use one shared base schema with a discriminated `task_type` plus a nested `task_details` section for type-specific requirements.
|
|
187
|
+
|
|
188
|
+
Why this is the right shape:
|
|
189
|
+
- all iterate tasks still need the same cross-agent spine for framing, observation, supervision, and completion
|
|
190
|
+
- `Framer`, `Observer`, and `Iterator` need one stable place to read core fields regardless of subtype
|
|
191
|
+
- the lane currently has a small, known subtype set, so a discriminator is simpler than maintaining separate top-level schemas
|
|
192
|
+
- subtype-specific strictness can still grow without fragmenting the shared contract
|
|
193
|
+
|
|
194
|
+
The design should therefore avoid separate `error_fix_task_artifact`, `quick_change_task_artifact`, and `targeted_improvement_task_artifact` roots unless the lane later proves the shared spine is breaking down.
|
|
195
|
+
|
|
196
|
+
### Quality bar
|
|
197
|
+
A valid `task_artifact` is:
|
|
198
|
+
- specific enough that Coder does not need to reinterpret the request
|
|
199
|
+
- narrow enough to stay task-scale
|
|
200
|
+
- explicit about facts versus assumptions
|
|
201
|
+
- honest about missing information
|
|
202
|
+
- written so Observer can derive a real verification seam
|
|
203
|
+
|
|
204
|
+
## Observation artifact
|
|
205
|
+
|
|
206
|
+
### Purpose
|
|
207
|
+
The `observation_artifact` turns the framed task into observable truth.
|
|
208
|
+
|
|
209
|
+
It should let any reviewer answer:
|
|
210
|
+
- what evidence exists
|
|
211
|
+
- whether the failure or gap was confirmed
|
|
212
|
+
- how repeatable the issue is
|
|
213
|
+
- what exact condition must turn green for Iterator to approve completion
|
|
214
|
+
|
|
215
|
+
### Required fields
|
|
216
|
+
Inside the `payload` body, the required observation fields are:
|
|
217
|
+
- `task_id`
|
|
218
|
+
- `mode`
|
|
219
|
+
- `evidence_summary`
|
|
220
|
+
- `log_sources`
|
|
221
|
+
- `log_evidence`
|
|
222
|
+
- `repro_steps`
|
|
223
|
+
- `repro_artifacts`
|
|
224
|
+
- `red_test_paths`
|
|
225
|
+
- `repeatability_status`
|
|
226
|
+
- `current_failure`
|
|
227
|
+
- `expected_green_condition`
|
|
228
|
+
- `confidence`
|
|
229
|
+
- `observer_verdict`
|
|
230
|
+
|
|
231
|
+
At the envelope level, `observation_artifact.json` also requires:
|
|
232
|
+
- `artifact_kind=observation_artifact`
|
|
233
|
+
- `artifact_id`
|
|
234
|
+
- `revision`
|
|
235
|
+
- `supersedes_revision`
|
|
236
|
+
- `updated_at`
|
|
237
|
+
- `updated_by=Observer`
|
|
238
|
+
|
|
239
|
+
### Quality bar
|
|
240
|
+
A valid `observation_artifact` is:
|
|
241
|
+
- evidence-based rather than interpretive only
|
|
242
|
+
- explicit when repro is confirmed, not confirmed, or partially confirmed
|
|
243
|
+
- concrete enough that Iterator can later compare final state against the same seam
|
|
244
|
+
- honest when truth is insufficient for safe completion claims
|
|
245
|
+
|
|
246
|
+
## Iterator run record
|
|
247
|
+
|
|
248
|
+
### Purpose
|
|
249
|
+
`iterator_run` preserves supervision truth across attempts.
|
|
250
|
+
|
|
251
|
+
It should let any reviewer answer:
|
|
252
|
+
- when Iterator judged the task ready
|
|
253
|
+
- what each coder attempt changed
|
|
254
|
+
- why respawns happened
|
|
255
|
+
- whether the terminal outcome was completion, blocked, or promoted
|
|
256
|
+
|
|
257
|
+
### Required fields
|
|
258
|
+
Inside the `payload` body, `iterator_run` requires:
|
|
259
|
+
- `task_id`
|
|
260
|
+
- `run_state`
|
|
261
|
+
- `readiness`
|
|
262
|
+
- `attempts`
|
|
263
|
+
- `latest_attempt`
|
|
264
|
+
- `respawn_count`
|
|
265
|
+
- `promotion`
|
|
266
|
+
- `final_verdict`
|
|
267
|
+
|
|
268
|
+
At the envelope level, `iterator_run.json` also requires:
|
|
269
|
+
- `artifact_kind=iterator_run`
|
|
270
|
+
- `artifact_id`
|
|
271
|
+
- `revision`
|
|
272
|
+
- `supersedes_revision`
|
|
273
|
+
- `updated_at`
|
|
274
|
+
- `updated_by`, usually `Iterator` and sometimes `Coder` for attempt append operations under Iterator supervision
|
|
275
|
+
|
|
276
|
+
### Readiness state decision
|
|
277
|
+
Iterator readiness should be a first-class top-level structure, not just an event hidden inside attempt history.
|
|
278
|
+
|
|
279
|
+
Recommended shape:
|
|
280
|
+
|
|
281
|
+
```yaml
|
|
282
|
+
iterator_run:
|
|
283
|
+
task_id: string
|
|
284
|
+
run_state: framing | observing | blocked_pre_coding | ready_for_coder | coding_in_progress | awaiting_iterator_review | needs_respawn | completed | blocked | promoted
|
|
285
|
+
readiness:
|
|
286
|
+
status: not_ready | ready_for_coder | blocked | promoted
|
|
287
|
+
decided_at: string
|
|
288
|
+
decided_by: Iterator
|
|
289
|
+
based_on:
|
|
290
|
+
task_artifact_revision: integer
|
|
291
|
+
observation_artifact_revision: integer
|
|
292
|
+
reason: string
|
|
293
|
+
attempts: []
|
|
294
|
+
latest_attempt: string | null
|
|
295
|
+
respawn_count: integer
|
|
296
|
+
promotion:
|
|
297
|
+
status: none | to_idea | to_insight
|
|
298
|
+
decided_at: string | null
|
|
299
|
+
decided_by: Iterator | null
|
|
300
|
+
reason: string | null
|
|
301
|
+
based_on:
|
|
302
|
+
task_artifact_revision: integer
|
|
303
|
+
observation_artifact_revision: integer | null
|
|
304
|
+
iterator_run_revision: integer
|
|
305
|
+
handoff_ref: string | null
|
|
306
|
+
downstream:
|
|
307
|
+
lane: idea | insight | null
|
|
308
|
+
downstream_id: string | null
|
|
309
|
+
downstream_artifact_ref: string | null
|
|
310
|
+
final_verdict: null | completed | blocked | promoted
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
Why this should be explicit:
|
|
314
|
+
- readiness is an Iterator-owned gate, not merely another attempt note
|
|
315
|
+
- the harness needs to inspect whether coding started too early
|
|
316
|
+
- blocked or promoted outcomes can happen before any coder attempt exists
|
|
317
|
+
- top-level state makes pre-coding and post-attempt transitions auditable
|
|
318
|
+
|
|
319
|
+
Attempt history should still record when readiness changed, but the canonical current state belongs at top level.
|
|
320
|
+
|
|
321
|
+
### Promotion linkage decision
|
|
322
|
+
When iterate exits to `idea` or `insight`, the durable linkage contract should be split into two parts:
|
|
323
|
+
1. `iterator_run.payload.promotion`, which records the lane decision inline in the supervisory spine
|
|
324
|
+
2. `promotion_handoff.json`, which stores the actual handoff payload that the downstream lane can consume
|
|
325
|
+
|
|
326
|
+
Recommended `promotion_handoff` payload shape:
|
|
327
|
+
|
|
328
|
+
```yaml
|
|
329
|
+
promotion_handoff:
|
|
330
|
+
task_id: string
|
|
331
|
+
target_lane: idea | insight
|
|
332
|
+
decided_at: string
|
|
333
|
+
decided_by: Iterator
|
|
334
|
+
reason: string
|
|
335
|
+
handoff_summary: string
|
|
336
|
+
based_on:
|
|
337
|
+
task_artifact_revision: integer
|
|
338
|
+
observation_artifact_revision: integer | null
|
|
339
|
+
iterator_run_revision: integer
|
|
340
|
+
source_refs:
|
|
341
|
+
task_artifact_ref: string
|
|
342
|
+
observation_artifact_ref: string | null
|
|
343
|
+
iterator_run_ref: string
|
|
344
|
+
downstream:
|
|
345
|
+
downstream_id: string | null
|
|
346
|
+
downstream_artifact_ref: string | null
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
At the envelope level, `promotion_handoff.json` should also use:
|
|
350
|
+
- `artifact_kind=promotion_handoff`
|
|
351
|
+
- `artifact_id`
|
|
352
|
+
- `revision`, normally `1` unless the handoff record itself is amended
|
|
353
|
+
- `supersedes_revision`
|
|
354
|
+
- `updated_at`
|
|
355
|
+
- `updated_by=Iterator`
|
|
356
|
+
|
|
357
|
+
Mandatory linkage fields for every promotion or reroute:
|
|
358
|
+
- in `iterator_run.payload.promotion`: `status`, `reason`, `based_on`, `handoff_ref`, and `downstream.lane`
|
|
359
|
+
- in `promotion_handoff.json`: `target_lane`, `handoff_summary`, `based_on`, and `source_refs`
|
|
360
|
+
|
|
361
|
+
Decision on downstream refs:
|
|
362
|
+
- `handoff_ref` to the iterate-owned `promotion_handoff.json` is mandatory
|
|
363
|
+
- direct downstream refs such as `downstream_id` or `downstream_artifact_ref` are optional at promotion time and may remain `null` if the downstream lane has not yet allocated durable state
|
|
364
|
+
- if the downstream lane does allocate an artifact or session synchronously, that ref should be filled in, but iterate should not block truthful promotion on that allocation
|
|
365
|
+
|
|
366
|
+
Why this is the right default:
|
|
367
|
+
- iterate can close truthfully without depending on downstream side effects
|
|
368
|
+
- the handoff remains durable and inspectable even if downstream work starts later
|
|
369
|
+
- the linkage gives `idea` and `insight` enough upstream provenance without forcing a cross-lane transaction
|
|
370
|
+
|
|
371
|
+
### Attempt record expectations
|
|
372
|
+
Each attempt entry should summarize supervised coding work without swallowing raw verifier detail.
|
|
373
|
+
|
|
374
|
+
`attempt_id` should be a human-readable, sortable sequence id, not a timestamp and not an opaque UUID.
|
|
375
|
+
|
|
376
|
+
Exact policy:
|
|
377
|
+
- format is `attempt-<NNN...>`
|
|
378
|
+
- numbering starts at `attempt-001`
|
|
379
|
+
- numbering advances by `1` for each new coder spawn under the same `task_id`
|
|
380
|
+
- ids remain stable even if a later attempt is blocked, superseded, or leads to promotion
|
|
381
|
+
- `latest_attempt` in `iterator_run` should point to the highest assigned ordinal, not the most recently successful attempt
|
|
382
|
+
|
|
383
|
+
Why this is the right default:
|
|
384
|
+
- iterate runs are linear supervisory loops, so ordinal identity matches the mental model better than random ids
|
|
385
|
+
- lexical sort and chronological sort stay aligned
|
|
386
|
+
- reviewers can discuss respawns unambiguously as "attempt-002 failed verification" without decoding timestamps
|
|
387
|
+
- harness fixtures can assert attempt order deterministically
|
|
388
|
+
|
|
389
|
+
Suggested attempt shape:
|
|
390
|
+
|
|
391
|
+
```yaml
|
|
392
|
+
attempt:
|
|
393
|
+
attempt_id: attempt-001
|
|
394
|
+
spawned_at: string
|
|
395
|
+
coder_summary: string
|
|
396
|
+
changed_surfaces: string[]
|
|
397
|
+
verification_summary_ref: string
|
|
398
|
+
verifier_output_ref: string | null
|
|
399
|
+
iterator_review:
|
|
400
|
+
disposition: success | respawn | blocked
|
|
401
|
+
reason: string
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
### Verifier output placement decision
|
|
405
|
+
`iterator_run.json` should contain durable summaries and file references, while verbose verifier evidence should live in attempt-scoped files.
|
|
406
|
+
|
|
407
|
+
Normalization decision:
|
|
408
|
+
- all verifier types must emit the same top-level JSON envelope in both `verification_summary.json` and `verifier_output.json`
|
|
409
|
+
- verifier-specific structure is allowed only inside a dedicated nested payload field
|
|
410
|
+
- consumers should be able to inspect verdict, seam alignment, and artifact refs without knowing which verifier produced the record
|
|
411
|
+
|
|
412
|
+
Why this split is the right default:
|
|
413
|
+
- `iterator_run` is the audit spine and should stay readable across multiple retries
|
|
414
|
+
- raw verifier evidence can be large and verifier-specific
|
|
415
|
+
- a shared envelope gives Iterator, harnesses, and downstream reviewers one stable parsing contract
|
|
416
|
+
- nested verifier-specific payloads preserve useful detail without forcing false uniformity across test runners, screenshots, logs, or manual checks
|
|
417
|
+
|
|
418
|
+
The run record should therefore keep:
|
|
419
|
+
- concise attempt-level verification summaries
|
|
420
|
+
- stable refs to attempt-scoped verifier files
|
|
421
|
+
- Iterator's disposition against that evidence
|
|
422
|
+
|
|
423
|
+
It should not try to inline full test logs, stack traces, or tool-native output blobs by default.
|
|
424
|
+
|
|
425
|
+
Recommended normalized `verification_summary.json` shape:
|
|
426
|
+
|
|
427
|
+
```yaml
|
|
428
|
+
verification_summary:
|
|
429
|
+
artifact_kind: verification_summary
|
|
430
|
+
task_id: string
|
|
431
|
+
attempt_id: attempt-001
|
|
432
|
+
verifier_kind: test_run | browser_check | screenshot_diff | log_check | manual_probe | mixed
|
|
433
|
+
generated_at: string
|
|
434
|
+
produced_by: Coder
|
|
435
|
+
overall_result: pass | fail | inconclusive | not_run
|
|
436
|
+
green_condition_alignment:
|
|
437
|
+
status: satisfied | not_satisfied | unknown
|
|
438
|
+
against: string
|
|
439
|
+
notes: string
|
|
440
|
+
checks:
|
|
441
|
+
- check_id: string
|
|
442
|
+
label: string
|
|
443
|
+
result: pass | fail | inconclusive | not_run
|
|
444
|
+
summary: string
|
|
445
|
+
artifact_refs: string[]
|
|
446
|
+
summary: string
|
|
447
|
+
blocker_notes: string[]
|
|
448
|
+
output_ref: string | null
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
Recommended normalized `verifier_output.json` shape:
|
|
452
|
+
|
|
453
|
+
```yaml
|
|
454
|
+
verifier_output:
|
|
455
|
+
artifact_kind: verifier_output
|
|
456
|
+
task_id: string
|
|
457
|
+
attempt_id: attempt-001
|
|
458
|
+
verifier_kind: test_run | browser_check | screenshot_diff | log_check | manual_probe | mixed
|
|
459
|
+
generated_at: string
|
|
460
|
+
produced_by: Coder
|
|
461
|
+
overall_result: pass | fail | inconclusive | not_run
|
|
462
|
+
green_condition_alignment:
|
|
463
|
+
status: satisfied | not_satisfied | unknown
|
|
464
|
+
against: string
|
|
465
|
+
notes: string
|
|
466
|
+
evidence_refs: string[]
|
|
467
|
+
native_payload:
|
|
468
|
+
# verifier-specific structure lives only here
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
Normalization rules that should stay stable:
|
|
472
|
+
- `overall_result` uses the shared enum above for every verifier type
|
|
473
|
+
- `green_condition_alignment` is mandatory even when the verifier cannot decide, in which case `status=unknown`
|
|
474
|
+
- `checks` belongs in `verification_summary.json` because Iterator often needs a readable rollup rather than only a raw blob
|
|
475
|
+
- `native_payload` in `verifier_output.json` may contain tool-specific objects, arrays, text blocks, or structured traces
|
|
476
|
+
- additional attempt-scoped files such as screenshots, traces, or junit XML may exist, but they should be referenced from the normalized JSON rather than replacing it
|
|
477
|
+
|
|
478
|
+
### Quality bar
|
|
479
|
+
A valid `iterator_run`:
|
|
480
|
+
- captures attempt history rather than only the final state
|
|
481
|
+
- records Iterator-owned judgments distinctly from Coder-authored notes
|
|
482
|
+
- explains terminal disposition in a way that can be audited later
|
|
483
|
+
- keeps the canonical state readable even when several attempts accumulate
|
|
484
|
+
|
|
485
|
+
## Agent-to-artifact mapping
|
|
486
|
+
|
|
487
|
+
- `Framer` authors and amends `task_artifact`
|
|
488
|
+
- `Observer` authors `observation_artifact`
|
|
489
|
+
- `Coder` contributes attempt records and verification summaries under `iterator_run`, with raw verifier detail stored in attempt-scoped files
|
|
490
|
+
- `Iterator` owns `run_state`, `readiness`, respawn reasons, promotion linkage, and the final disposition in `iterator_run`
|
|
491
|
+
- `Iterator` authors `promotion_handoff.json` whenever work exits iterate for `idea` or `insight`
|
|
492
|
+
|
|
493
|
+
This mapping should remain stable even if runtime execution details change.
|
|
494
|
+
|
|
495
|
+
## Remaining open seams
|
|
496
|
+
|
|
497
|
+
At this stage, the two previously open seams are now closed for the pre-prompt design pass:
|
|
498
|
+
- `attempt_id` format is fixed to monotonic ordinal ids of the form `attempt-001`
|
|
499
|
+
- attempt-scoped verifier artifacts use a shared top-level JSON envelope, with verifier-specific structure nested under `native_payload`
|
|
500
|
+
|
|
501
|
+
Still intentionally open for a later design pass:
|
|
502
|
+
1. whether `checks[].check_id` should follow a repo-wide naming convention shared with other lanes
|
|
503
|
+
2. whether `verifier_kind=mixed` should stay a single artifact or fan out into multiple verifier records when one attempt runs several distinct verification modes
|
|
504
|
+
3. whether any attempt-scoped artifacts beyond the normalized JSON pair should become mandatory for certain verifier kinds, such as screenshots for UI checks or junit XML for test suites
|