npm - @exaudeus/workrail - Versions diffs - 0.5.0 → 0.6.1-beta.0 - Mend

@exaudeus/workrail 0.5.0 → 0.6.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/workflows/deep-documentation-workflow.json +101 -55
package/workflows/systemic-bug-investigation-with-loops.json +143 -146

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@exaudeus/workrail",
-  "version": "0.5.0",
+  "version": "0.6.1-beta.0",
   "description": "MCP server for structured workflow orchestration and step-by-step task guidance",
   "license": "MIT",
   "bin": {

package/workflows/deep-documentation-workflow.json CHANGED Viewed

@@ -72,7 +72,28 @@
         "INTELLIGENT QUESTIONING: Use analysis findings to ask targeted, valuable clarifying questions.",
         "ADAPTIVE STRUCTURE: Adjust documentation format and depth based on scope complexity and user needs.",
         "CONTEXT PRESERVATION: Maintain detailed workflow context for seamless resumption across sessions.",
-        "COLLABORATIVE READY: Structure workflow for easy handoffs and team collaboration on large scopes."
+        "COLLABORATIVE READY: Structure workflow for easy handoffs and team collaboration on large scopes.",
+        "**VERTICAL SLICE FUNCTIONS:**",
+        "fun findSliceCandidates(scope, analysis) = 'Propose slice candidates grouped by feature/aspect (e.g., Realtime, Sending, Notifications, Storage, API). Use dependency clusters, call graphs, and data-flow boundaries. Return sliceCandidates[].'",
+        "fun chooseSliceHeuristics(preferences) = 'Select grouping heuristics: user-journey, bounded-context, subsystem, data-flow, interface-surface. Set sliceHeuristics context variable.'",
+        "fun deriveSlices(candidates, heuristics, components) = 'Generate slice definitions with: name, rationale, includedComponents[], entryPoints[], dependencies[], dataFlows[], risks[], plannedDocSections[]. Set sliceDocuments context variable.'",
+        "fun mapComponentsToSlices(components, slices) = 'Assign each component to ≥1 slice (allow overlaps if justified). Produce coverage map and set orphanComponents[]. Update Coverage Matrix by slice.'",
+        "fun checkSliceCoverageGate() = 'Ensure 100% component coverage across slices; no orphanComponents; overlaps have rationale. Update Quality Gates with status.'",
+        "fun optimizeSliceDocStructure(slices) = 'For each slice, create doc skeleton: Overview, Responsibilities, Public APIs, Data Flow, Dependencies, Error Handling, Edge Cases, Troubleshooting, Quality Signals.'",
+        "**DIVIO/TEMPLATE & QUALITY FUNCTIONS:**",
+        "fun createSliceTemplates(slices) = 'Create per-slice pages: Tutorial.md, HowTo.md, Concepts.md, Reference.md with standard headers and placeholders.'",
+        "fun enforceDivioSections(slice) = 'Verify required sections exist per page. Missing sections become blockers.'",
+        "fun generateSliceQuickstart(slice) = 'Produce a runnable Quickstart achieving first success in <5 minutes. Record commands and expected outputs.'",
+        "fun importReferenceArtifacts(slice) = 'Import OpenAPI/GraphQL/TypeDoc/Sphinx outputs into Reference.md with anchors and language tabs.'",
+        "fun addInteractiveSamples(slice) = 'Add Mermaid flow diagrams and Postman/cURL/API console samples for key paths.'",
+        "fun generateDiagrams(slice) = 'Create Mermaid diagrams: sequence (key flows), component (architecture), state (lifecycles) as applicable; save under docs/diagrams.'",
+        "fun generateTables(slice) = 'Produce tables for API endpoints, config options, errors, metrics; include columns for version, deprecation, owners.'",
+        "fun lintLinksAndCrossRefs(docs) = 'Check all intra/inter-doc links, anchors, and references; produce report and fix list.'",
+        "fun requireOwnership(slice) = 'Ensure owner metadata (team, maintainers) exists for each slice; update governance table.'",
+        "fun checkHelloWorldGate(slice) = 'Validate Quickstart is runnable, complete, and <5 minutes including prerequisites.'",
+        "fun checkLinkLintGate(docs) = 'Block progression if any broken links/anchors remain. Append evidence to progress doc.'",
+        "fun checkOwnershipGate(slices) = 'Block progression if any slice lacks owners; show missing entries and required actions.'",
+        "fun checkVisualsGate(slice) = 'Enforce presence of required visuals: ≥1 sequence diagram, ≥1 component diagram, and ≥1 key table per slice unless not applicable with rationale.'"
     ],
     "steps": [
@@ -267,6 +288,34 @@
             ],
             "requireConfirmation": true
         },
+        {
+            "id": "phase-4a-vertical-slice-derivation",
+            "title": "Phase 4a: Vertical Slice Derivation & Mapping",
+            "prompt": "**VERTICAL SLICE PLANNING** - Derive a set of focused, standalone documents grouped by aspect/feature of the scope.\n\n**INPUTS**:\n- Use results from Phases 1-2 (analysis, existing docs) and answers from Phase 3 clarifications\n\n**TASKS**:\n1) Use findSliceCandidates(scope, analysis) to propose candidate slices (e.g., Realtime, Sending, Receiving, Storage, API, Admin, Observability)\n2) Use chooseSliceHeuristics(userPreferences) to pick grouping strategy (bounded context, user journey, subsystem, data flow)\n3) Use deriveSlices(sliceCandidates, sliceHeuristics, registeredComponents) to generate sliceDocuments[] with: name, rationale, includedComponents[], entryPoints[], dependencies[], dataFlows[], risks[], plannedDocSections[]\n4) Use mapComponentsToSlices(registeredComponents, sliceDocuments) to ensure every component is assigned; set orphanComponents[] if any\n5) Use optimizeSliceDocStructure(sliceDocuments) to define consistent per-slice document skeleton\n\n**OUTPUTS**:\n- Set context: useSlices=true, sliceHeuristics, sliceDocuments[]\n- Create SlicePlan.md summarizing slices with coverage table and rationales\n- Update DOCUMENTATION_CONTEXT.md with slice plan and navigation\n- If useSlices=true, plan to set documentationStructure = sliceDocuments in Phase 4\n\n**PROGRESS TRACKING**:\n- Log analysis step for slice planning\n- Update Coverage Matrix by slice and component\n- Calculate preliminary documentation units count",
+            "agentRole": "You are designing vertical slices that are cohesive, maintainable, and map cleanly to code boundaries for standalone docs that compose the whole.",
+            "guidance": [
+                "Favor slices that align with dependency clusters and clear data-flow boundaries",
+                "Avoid giant slices; keep each doc focused and navigable",
+                "Allow limited overlap only when justified by shared cross-cutting code",
+                "Ensure each slice can be read as 'every page is page one'"
+            ],
+            "requireConfirmation": true
+        },
+        {
+            "id": "phase-4b-slice-coverage-gate",
+            "title": "Phase 4b: Slice Coverage Gate",
+            "prompt": "**SLICE COVERAGE GATE** - Enforce that all components are covered by at least one slice and no orphans remain.\n\n**VALIDATION**:\nUse checkSliceCoverageGate() to verify:\n- 100% of registered components mapped to sliceDocuments\n- Overlaps (components in >1 slice) have recorded rationale\n- No orphanComponents remain\n- Slice skeletons defined (optimizeSliceDocStructure)\n\n**ENFORCEMENT**:\nUse enforceProgressGates() to block if any condition fails. List specific missing components or rationale.\n\n**OUTPUT**:\n- Gate status recorded in progress doc\n- Proceed only when coverage is complete\n- Set documentationStructure = sliceDocuments when useSlices=true",
+            "agentRole": "You are enforcing coverage so vertical slices truly partition or appropriately overlay the scope without gaps.",
+            "guidance": [
+                "No component may remain unmapped",
+                "Keep slices cohesive and justifiable",
+                "Document any intentional overlaps with reasons"
+            ],
+            "validationCriteria": [
+                { "type": "contains", "value": "Slice Coverage: ✅", "message": "All components must be mapped to slices" }
+            ],
+            "requireConfirmation": true
+        },
         {
             "id": "phase-4-documentation-planning",
@@ -285,86 +334,83 @@
         {
             "id": "phase-5-comprehensive-documentation",
             "type": "loop",
-            "title": "Phase 5: Comprehensive Documentation Creation",
+            "title": "Phase 5: Comprehensive Documentation Creation (Per-Slice Divio Pages)",
             "loop": {
-                "type": "forEach",
+                "type": "forEach",
                 "items": "documentationStructure",
-                "itemVar": "currentDocument",
-                "indexVar": "docIndex",
-                "maxIterations": 15
+                "itemVar": "currentSlice",
+                "indexVar": "sliceIndex",
+                "maxIterations": 30
             },
             "body": [
                 {
-                    "id": "setup-document-structure",
-                    "title": "Setup Document Structure",
-                    "prompt": "**SETUP DOCUMENT STRUCTURE** - Initialize the documentation framework.\n\n**STRUCTURE SETUP**:\n- Create document header with navigation links\n- Set up main section framework from plan\n- Include table of contents for longer documents\n- Initialize document metadata and context\n\n**PROGRESS TRACKING**:\n- Call updateComponentStatus(documentComponents, 'documenting', 0, 'structure initialized')\n- Call logAnalysisStep('5-Creation', currentDocument.name, 'document structure created')\n\n**OUTPUT**: Document framework ready for content synthesis",
-                    "agentRole": "You are setting up the foundational structure for comprehensive technical documentation.",
+                    "id": "prepare-slice-templates",
+                    "title": "Prepare Slice Templates",
+                    "prompt": "Use createSliceTemplates([currentSlice]) and enforceDivioSections(currentSlice).\nCreate files: Tutorial.md, HowTo.md, Concepts.md, Reference.md with headers and placeholders.\nRecord ownership metadata and navigation links.",
+                    "agentRole": "You scaffold Divio-compliant pages for the slice.",
                     "guidance": [
-                        "Focus on creating a clear, navigable document framework",
-                        "Use the planned structure from Phase 4",
-                        "Ensure structure supports both human and agent consumption",
-                        "Initialize progress tracking for this document"
+                        "Ensure required sections exist on each page",
+                        "Set up standard headers and page navigation"
                     ],
                     "requireConfirmation": false
                 },
                 {
-                    "id": "synthesize-analysis-content",
-                    "title": "Synthesize Analysis Content",
-                    "prompt": "**SYNTHESIZE ANALYSIS CONTENT** - Integrate all analysis findings into the document.\n\n**CONTENT SYNTHESIS**: Use synthesizeDocumentation(scope, currentDocument)\n- **Analysis Integration**: Incorporate relevant findings from all 4 analysis phases\n- **Clarification Integration**: Weave in user answers to intelligent questions  \n- **Existing Doc Integration**: Include/reference valuable existing documentation\n- **Code Example Integration**: Include specific code snippets with file references\n\n**QUALITY STANDARDS**:\n- All technical claims backed by specific code references\n- Include file paths and line numbers for key assertions\n- Provide context for architectural decisions\n\n**OUTPUT**: Content-rich sections with integrated analysis findings",
-                    "agentRole": "You are synthesizing complex analysis findings into coherent, valuable documentation content.",
+                    "id": "generate-quickstart-and-hello-world",
+                    "title": "Generate Quickstart and Hello World Gate",
+                    "prompt": "Run generateSliceQuickstart(currentSlice). Produce a minimal runnable path to success in <5 minutes.\nThen run checkHelloWorldGate(currentSlice). Block if unmet and list fixes.",
+                    "agentRole": "You ensure each slice has a fast, runnable quickstart.",
+                    "guidance": [
+                        "Prioritize real, copy-pasteable commands",
+                        "Call out prerequisites explicitly"
+                    ],
+                    "requireConfirmation": true
+                },
+                {
+                    "id": "synthesize-concepts-and-howto",
+                    "title": "Write Concepts and How‑to Pages",
+                    "prompt": "Populate Concepts.md with architecture, responsibilities, data flow, dependencies, and design rationale from analysis.\nPopulate HowTo.md with the top 3-7 tasks developers perform, each step-by-step with code. Cross-link to Tutorial and Reference.",
+                    "agentRole": "You synthesize analysis into clear concepts and actionable how‑tos.",
                     "guidance": [
-                        "Draw from all previous analysis phases systematically",
-                        "Maintain technical accuracy with specific code references",
-                        "Integrate user clarifications naturally",
-                        "Balance depth with clarity for the intended audience"
+                        "Ground every assertion in code evidence",
+                        "Prefer tasks developers actually do"
                     ],
                     "requireConfirmation": false
                 },
                 {
-                    "id": "create-sections-systematically",
-                    "title": "Create Sections Systematically",
-                    "prompt": "**CREATE SECTIONS SYSTEMATICALLY** - Build each planned section with comprehensive content.\n\n**SECTION CREATION**:\n- Create each planned section systematically\n- Include content requirements from planning phase\n- Reference analysis sources for each section\n- Meet expected length and depth requirements\n- Add cross-references to related documents\n- Include troubleshooting information where relevant\n\n**PROGRESS TRACKING**:\n- Update progress for each completed section\n- Track content depth and coverage metrics\n\n**OUTPUT**: Complete sections meeting planned requirements",
-                    "agentRole": "You are methodically creating comprehensive sections that fulfill the documentation plan.",
+                    "id": "import-reference-and-samples",
+                    "title": "Import Reference and Add Interactive Samples",
+                    "prompt": "Use importReferenceArtifacts(currentSlice) to generate Reference.md from OpenAPI/TypeDoc/etc. Add language tabs.\nUse addInteractiveSamples(currentSlice) to include Mermaid diagrams and Postman/cURL.",
+                    "agentRole": "You generate authoritative, current reference and interactive artifacts.",
                     "guidance": [
-                        "Work through sections in logical order",
-                        "Ensure each section meets its planned requirements",
-                        "Reference specific analysis sources for credibility",
-                        "Maintain consistency across all sections"
+                        "Reference must be source-of-truth and up-to-date",
+                        "Include at least one sequence or flow diagram"
                     ],
                     "requireConfirmation": false
                 },
                 {
-                    "id": "optimize-for-agent-consumption",
-                    "title": "Optimize for Agent Consumption",
-                    "prompt": "**OPTIMIZE FOR AGENT CONSUMPTION** - Structure and format the document for optimal AI agent usage.\n\n**AGENT-FRIENDLY FORMATTING**:\n- Clear section headers for easy navigation\n- Bullet points and lists for scannability  \n- Code blocks with syntax highlighting\n- Consistent terminology and linking\n- Decision rationales prominently displayed\n- Metadata tags for search and organization\n\n**FINAL PROGRESS TRACKING**:\n- Call updateComponentStatus(documentComponents, 'documented', qualityScore, 'documentation completed')\n- Call logAnalysisStep('5-Creation', currentDocument.name, 'documentation file completed')\n- Use calculateCompletionMetrics() to update documentation completion percentage\n\n**OUTPUT**: Documentation optimized for both human and AI agent consumption",
-                    "agentRole": "You are finalizing documentation with optimal structure and formatting for agent consumption.",
+                    "id": "generate-visuals-and-tables",
+                    "title": "Generate Required Visuals and Tables",
+                    "prompt": "Run generateDiagrams(currentSlice) to produce sequence/component/state diagrams as applicable.\nRun generateTables(currentSlice) to create API/config/errors/metrics tables with required columns.",
+                    "agentRole": "You produce comprehensive visuals and structured tables for clarity.",
                     "guidance": [
-                        "Prioritize agent-friendly formatting without sacrificing human readability",
-                        "Include clear decision rationales and context",
-                        "Ensure consistent terminology throughout",
-                        "Complete all progress tracking requirements"
+                        "Prefer Mermaid for diagrams stored as text",
+                        "Ensure tables are complete and normalized"
                     ],
                     "requireConfirmation": false
                 },
                 {
-                    "id": "document-internal-review",
-                    "title": "Internal Quality Review",
-                    "prompt": "**INTERNAL QUALITY REVIEW** - Review the created documentation for quality and completeness.\n\n**REVIEW CRITERIA**:\n\n1. **Content Completeness**:\n   - ✅ All planned sections present and substantive?\n   - ✅ Analysis findings appropriately integrated?\n   - ✅ User clarifications incorporated?\n   - ✅ Code examples relevant and accurate?\n\n2. **Technical Accuracy**:\n   - ✅ All code references verified against actual files?\n   - ✅ File paths and line numbers accurate?\n   - ✅ Technical descriptions match implementation?\n   - ✅ Architecture descriptions consistent with code?\n\n3. **Documentation Quality**:\n   - ✅ Clear section organization and flow?\n   - ✅ Appropriate level of detail for audience?\n   - ✅ Good use of formatting (headers, lists, code blocks)?\n   - ✅ Cross-references working and helpful?\n\n4. **Agent Usability**:\n   - ✅ Easy for AI agents to parse and understand?\n   - ✅ Clear decision rationales provided?\n   - ✅ Sufficient context for future work?\n   - ✅ Good navigation and structure?\n\n**IMPROVEMENT IDENTIFICATION**:\n- Note any content gaps or unclear sections\n- Identify areas needing additional code examples\n- Check for inconsistencies with other documents\n- Assess overall value for intended purpose\n\n**QUALITY SCORE**: Rate document 1-10 on completeness and utility\n\n**OUTPUT**: \n- Document quality assessment with specific improvement recommendations\n- Set `documentQualityScore` for tracking\n- Update DOCUMENTATION_CONTEXT.md with review results",
-                    "agentRole": "You are conducting a thorough quality review to ensure the created documentation meets professional standards and serves its intended purpose effectively.",
+                    "id": "link-lint-and-review",
+                    "title": "Link Lint and Internal Review",
+                    "prompt": "Run lintLinksAndCrossRefs(currentSlice.docs). Block via checkLinkLintGate(currentSlice.docs) if any broken links.\nRun checkVisualsGate(currentSlice) to enforce diagrams/tables presence.\nPerform internal review; rate documentQualityScore (≥8 required).",
+                    "agentRole": "You enforce link integrity and quality standards.",
                     "guidance": [
-                        "Be thorough but practical in review - focus on significant issues",
-                        "Consider the document's role in the overall documentation suite",
-                        "Verify technical accuracy by checking against code analysis",
-                        "Assess value from both human and agent user perspectives"
+                        "All links and anchors must resolve",
+                        "Quality score below 8 triggers fixes"
                     ],
                     "requireConfirmation": {
                         "or": [
-                            {"var": "documentQualityScore", "lt": 8},
-                            {"var": "automationLevel", "equals": "Low"}
+                            { "var": "documentQualityScore", "lt": 8 },
+                            { "var": "automationLevel", "equals": "Low" }
                         ]
                     }
                 }
@@ -375,7 +421,7 @@
         {
             "id": "phase-5a-documentation-completion-gate",
             "title": "Phase 5a: Documentation Completion Gate",
-            "prompt": "**DOCUMENTATION COMPLETION GATE** - Verify all components are fully documented before final integration.\n\n**COMPLETION VALIDATION**:\nUse checkCoverageGate('documentation-complete') to verify:\n- ✅ Documentation Coverage: 100% of components have status 'documented'\n- ✅ Quality Threshold: All documents scored ≥8/10 in quality review\n- ✅ Cross-Reference Integrity: All internal links and references valid\n- ✅ Agent Optimization: All documents structured for agent consumption\n\n**COMPREHENSIVE CHECK**:\nUse enforceProgressGates() to ensure:\n- No components remain undocumented\n- All planned documents created and validated\n- Quality scores meet minimum thresholds\n- Documentation structure complete\n\n**FINAL METRICS**:\nUse calculateCompletionMetrics() to compute:\n- `documentationComplete` = 100% (required)\n- `overallQualityScore` = weighted average across all documents\n- `documentsAtRisk` = count of documents below quality threshold\n\n**BLOCKING CONDITIONS**:\nCannot proceed if:\n- Any component lacks documentation\n- Any document scored <8/10 in quality review\n- Cross-references broken or incomplete\n- Agent optimization requirements not met\n\n**OUTPUT**: Documentation completion gate status with specific requirements if blocked",
+            "prompt": "**DOCUMENTATION COMPLETION GATE** - Verify all components and slices are fully documented before final integration.\n\n**COMPLETION VALIDATION**:\nUse checkCoverageGate('documentation-complete') and checkSliceCoverageGate() to verify:\n- ✅ Documentation Coverage: 100% components documented across slices\n- ✅ Slice Quality: All slice pages (Tutorial/How‑to/Concepts/Reference) ≥8/10\n- ✅ Quickstart: All slices pass Hello World gate\n- ✅ Links: Link-lint gate passes for all docs\n- ✅ Agent Optimization: Documents structured for agent consumption\n\n**COMPREHENSIVE CHECK**:\nUse enforceProgressGates() to ensure:\n- No components or slices remain incomplete\n- All planned documents created and validated\n- Ownership metadata present per slice\n\n**FINAL METRICS**:\nUse calculateCompletionMetrics() to compute:\n- `documentationComplete` = 100% (required)\n- `overallQualityScore` = weighted average across slices\n- `documentsAtRisk` = count of pages below quality threshold\n\n**BLOCKING CONDITIONS**:\nCannot proceed if any slice fails quickstart/links/quality or any component unmapped.\n\n**OUTPUT**: Documentation completion gate status with specific requirements if blocked",
             "agentRole": "You are enforcing complete documentation coverage with quality standards before allowing final integration.",
             "guidance": [
                 "No component can remain undocumented",
@@ -391,7 +437,7 @@
                 },
                 {
                     "type": "contains",
-                    "value": "Quality Threshold: ✅",
+                    "value": "Slice Quality: ✅",
                     "message": "Cannot proceed until all documents meet quality standards (≥8/10)"
                 }
             ],
@@ -401,7 +447,7 @@
         {
             "id": "phase-6-final-integration",
             "title": "Phase 6: Final Integration & Navigation Setup",
-            "prompt": "**FINAL INTEGRATION** - Create navigation, cross-references, and final documentation package.\n\n**INTEGRATION TASKS**:\n\n1. **Create Master Index** (for multi-document suites):\n   - Overview of the entire documentation set\n   - Clear navigation to all documents\n   - Quick reference section with key information\n   - Links to most important sections across documents\n\n2. **Cross-Reference Validation**:\n   - Verify all inter-document links work\n   - Add missing cross-references between related sections  \n   - Create bidirectional linking where appropriate\n   - Add \"See also\" sections for related topics\n\n3. **Consistency Pass**:\n   - Standardize terminology across all documents\n   - Ensure consistent formatting and style\n   - Align section numbering and heading styles\n   - Verify code example formatting consistency\n\n4. **Completeness Verification**:\n   - Check all analysis findings are incorporated somewhere\n   - Verify all user clarifications are addressed\n   - Ensure all planned content areas are covered\n   - Validate against original scope boundaries\n\n5. **Agent Optimization**:\n   - Add summary sections for quick agent consumption\n   - Include key decision rationales prominently  \n   - Structure for easy chunking and retrieval\n   - Add metadata tags for search and organization\n\n**CREATE DOCUMENTATION PACKAGE**:\n- Master README or index file\n- All individual documentation files\n- Supporting diagrams or examples\n- Change log for future updates\n\n**FINAL VALIDATION**: Use validateDocumentation(docs, scope)\n- Completeness against original scope\n- Accuracy of all technical content  \n- Utility for stated documentation purpose\n- Maintainability and update process\n\n**OUTPUT**: Complete, integrated documentation package ready for use",
+            "prompt": "**FINAL INTEGRATION** - Create hub navigation, cross-references, and final documentation package.\n\n**INTEGRATION TASKS**:\n\n1. **Create Documentation Hub**:\n   - Index page listing all slices (Tutorial, How‑to, Concepts, Reference per slice)\n   - Global glossary and common patterns\n   - Version switcher and changelog links\n\n2. **Cross-Reference Validation**:\n   - Verify inter-slice links and \"See also\" references\n   - Ensure bidirectional links where appropriate\n\n3. **Consistency Pass**:\n   - Standardize terminology, style, and visuals across slices\n   - Ensure code and samples use consistent languages/tabs\n\n4. **Completeness Verification**:\n   - Confirm all slice gates passed (quickstart, links, quality)\n   - Validate coverage against original scope boundaries\n\n5. **Agent Optimization**:\n   - Add per-slice summaries and navigation metadata\n   - Ensure each page is \"Every Page is Page One\" compliant\n\n**CREATE DOCUMENTATION PACKAGE**:\n- Hub index\n- Per-slice pages\n- Diagrams and samples\n- Change log\n\n**FINAL VALIDATION**: Use validateDocumentation(docs, scope)\n- Completeness and accuracy\n- Usability and maintainability\n\n**OUTPUT**: Complete, integrated documentation package ready for use",
             "agentRole": "You are finalizing the documentation package, ensuring it works as a cohesive whole and serves its intended purpose effectively.",
             "guidance": [
                 "Focus on the user experience of consuming this documentation",

package/workflows/systemic-bug-investigation-with-loops.json CHANGED Viewed

@@ -9,6 +9,8 @@
         "What was the last known working version or state if applicable?",
         "Are there any time constraints or urgency factors for this investigation?",
         "What level of system access do you have? (full codebase, limited access, production logs only)",
+        "What existing documentation is available? (README files, architecture docs, API docs, design documents, runbooks)",
+        "Do you have access to existing logs? (production logs, error logs, debug logs, metrics, traces)",
         "Do you have preferences for handling large log volumes? (sub-chat analysis, inline summaries only, or no preference for automatic decision)"
     ],
     "preconditions": [
@@ -109,61 +111,8 @@
             "requireConfirmation": false
         },
         {
-            "id": "phase-0c-reproducibility-loop",
-            "type": "loop",
-            "title": "Phase 0c: Reproducibility Verification Loop",
-            "loop": {
-                "type": "for",
-                "count": 3,
-                "maxIterations": 3,
-                "iterationVar": "reproductionAttempt"
-            },
-            "body": [
-                {
-                    "id": "reproduce-bug",
-                    "title": "Reproduction Attempt {{reproductionAttempt}}/3",
-                    "prompt": "**REPRODUCTION ATTEMPT {{reproductionAttempt}}/3**\n\nExecute the provided reproduction steps:\n1. Follow exact steps from bug report\n2. Document outcome (Success/Failure)\n3. Note any variations in behavior\n4. Capture error messages/stack traces\n\n**Update context:**\n- Set `reproductionResults[{{reproductionAttempt - 1}}]` = true/false\n- If failed, document why\n- Track any intermittent patterns",
-                    "agentRole": "You are systematically verifying bug reproducibility to ensure solid investigation foundation.",
-                    "guidance": [
-                        "Execute exactly as specified",
-                        "Document any deviations",
-                        "Capture all error details"
-                    ],
-                    "requireConfirmation": false
-                }
-            ],
-            "requireConfirmation": false
-        },
-        {
-            "id": "phase-0d-reproducibility-assessment",
-            "title": "Phase 0d: Reproducibility Assessment",
-            "prompt": "**ASSESS REPRODUCIBILITY**\n\nBased on 3 reproduction attempts:\n- **Success Rate**: Calculate percentage\n- **Pattern Analysis**: Identify any intermittent patterns\n- **Minimal Reproduction**: Create simplified test case if needed\n\n**DECISION:**\n- If 100% reproducible: Proceed to Phase 1\n- If intermittent: Apply stress techniques and document patterns\n- If 0% reproducible: Request more information from user\n\n**Set `isReproducible` = true/false based on assessment**",
-            "agentRole": "You are assessing reproduction results to determine investigation viability.",
-            "guidance": [
-                "100% reproduction is ideal but not always required",
-                "Document intermittent patterns for investigation",
-                "Create minimal test case for complex scenarios"
-            ],
-            "validationCriteria": [
-                {
-                    "type": "contains",
-                    "value": "reproducib",
-                    "message": "Must make reproducibility determination"
-                }
-            ],
-            "hasValidation": true,
-            "runCondition": {
-                "var": "reproductionAttempt",
-                "equals": 3
-            }
-        },
-        {
-            "id": "phase-0e-tool-check",
-            "title": "Phase 0e: Tool Availability Verification",
-            "runCondition": {
-                "var": "isReproducible",
-                "equals": true
-            },
+            "id": "phase-0c-tool-check",
+            "title": "Phase 0c: Tool Availability Verification",
             "prompt": "**TOOL AVAILABILITY CHECK** - Verify required debugging tools before investigation.\n\n**CORE TOOLS CHECK:**\n1. **Analysis Tools**:\n   - grep_search: Text pattern searching\n   - read_file: File content reading\n   - codebase_search: Semantic code search\n   - Test availability, note any failures\n\n2. **Git Operations**:\n   - Check git availability: `git --version`\n   - If unavailable, set `gitAvailable = false`\n   - Plan fallback: manual change tracking\n\n3. **Build/Test Tools** (based on projectType):\n   - npm/yarn for JavaScript\n   - Maven/Gradle for Java\n   - pytest/unittest for Python\n   - Document which are available\n\n4. **Debugging Tools**:\n   - Language-specific debuggers\n   - Profilers if needed\n   - Log aggregation tools\n\n**FALLBACK STRATEGIES:**\n- grep_search fails → use file_search\n- codebase_search fails → use grep_search with context\n- Git unavailable → track changes in INVESTIGATION_CONTEXT.md\n- Build tools missing → focus on static analysis\n\n**OUTPUT**:\n- Set `availableTools` context variable\n- Set `toolLimitations` with any restrictions\n- Document fallback strategies in context\n\n**ADAPTATION**: Adjust investigation approach based on available tools.",
             "agentRole": "You are a tool availability specialist ensuring the investigation can proceed smoothly with available resources. You excel at creating fallback strategies.",
             "guidance": [
@@ -175,13 +124,9 @@
             "requireConfirmation": false
         },
         {
-            "id": "phase-0f-create-context",
-            "title": "Phase 0f: Initialize Investigation Context",
-            "runCondition": {
-                "var": "isReproducible",
-                "equals": true
-            },
-            "prompt": "**CREATE INVESTIGATION CONTEXT**\n\nUse createInvestigationBranch(), then create INVESTIGATION_CONTEXT.md with:\n\n1. **Bug Summary**: ID, description, complexity, reproducibility, status, automation level\n2. **Progress Tracking**: Use visualProgress() to show phases completed/remaining\n3. **Environment**: Project type, debugging mechanism, architecture, tools, user preferences\n4-8. **Section Placeholders**: Analysis, Hypotheses, Evidence, Experiments, Dead Ends\n9. **Function Definitions**: Include all from metaGuidance\n10. **Resumption Instructions**:\n   - workflow_get: id=\"systematic-bug-investigation-with-loops\", mode=\"preview\"\n   - workflow_next: JSON with workflowId, completedSteps, context variables\n\n**Key Variables**: bugComplexity, projectType, isReproducible, debuggingMechanism, isDistributed, automationLevel, userDebugPreferences, availableTools\n\n**Set contextInitialized = true**",
+            "id": "phase-0d-create-context",
+            "title": "Phase 0d: Initialize Investigation Context",
+            "prompt": "**CREATE INVESTIGATION CONTEXT** - Initialize comprehensive tracking document.\n\nUse createInvestigationBranch() to set up version control, then create INVESTIGATION_CONTEXT.md:\n\n```markdown\n# Investigation Context\n\n## 1. Bug Summary\n- **ID**: {{bugId || 'investigation-' + Date.now()}}\n- **Description**: [from bug report]\n- **Complexity**: {{bugComplexity}}\n- **Started**: {{new Date().toISOString()}}\n- **Status**: Phase 0d - Context Initialization\n- **Automation Level**: {{automationLevel}}\n\n## 2. Progress Tracking\n{{visualProgress()}}\n✅ Completed: Phase 0 (Triage), Phase 0a (Assumptions), Phase 0b (User Preferences), Phase 0c (Tools)\n🔄 Current: Phase 0d (Context Creation)\n⏳ Remaining: Phase 1 (Analysis), Phase 2 (Hypotheses), Phase 3-5 (Validation), Phase 6 (Writeup)\n📊 Confidence: 0/10\n\n## 3. Environment & Setup\n- **Project Type**: {{projectType}}\n- **Debugging Mechanism**: {{debuggingMechanism}}\n- **Architecture**: {{isDistributed ? 'Distributed' : 'Monolithic'}}\n- **User Preferences**: {{userDebugPreferences}}\n- **Available Tools**: {{availableTools}}\n- **Tool Limitations**: {{toolLimitations || 'None'}}\n\n## 4. Analysis Findings\n*To be populated during Phase 1*\n\n## 5. Hypothesis Registry\n*To be populated during Phase 2*\n\n## 6. Evidence Log\n*To be populated during validation*\n\n## 7. Experiment Results\n*To be populated if experiments conducted*\n\n## 8. Dead Ends & Lessons\n*Track approaches that didn't work*\n\n## 9. Function Definitions\n[Include all function definitions from metaGuidance for reference]\n\n## 10. Resumption Instructions\n\n### How to Resume This Investigation\n\n1. **Get the workflow**: Call `workflow_get` with:\n   - id: \"systematic-bug-investigation-with-loops\"\n   - mode: \"preview\" (to see next step)\n\n2. **Resume from saved state**: Call `workflow_next` with the JSON below:\n\n```json\n{\n  \"workflowId\": \"systematic-bug-investigation-with-loops\",\n  \"completedSteps\": [\"phase-0-triage\", \"phase-0a-assumption-check\", \"phase-0b-user-preferences\", \"phase-0c-tool-check\", \"phase-0d-create-context\"],\n  \"context\": {\n    \"bugComplexity\": \"{{bugComplexity}}\",\n    \"projectType\": \"{{projectType}}\",\n    \"debuggingMechanism\": \"{{debuggingMechanism}}\",\n    \"isDistributed\": {{isDistributed || false}},\n    \"automationLevel\": \"{{automationLevel}}\",\n    \"userDebugPreferences\": {{JSON.stringify(userDebugPreferences)}},\n    \"availableTools\": {{JSON.stringify(availableTools)}},\n    \"toolLimitations\": {{JSON.stringify(toolLimitations)}}\n  }\n}\n```\n\n3. **Continue investigation**: The workflow will pick up from where it left off\n\n### Important Notes\n- Update `completedSteps` array after completing each phase\n- Preserve all context variables for proper state restoration\n- This JSON should be updated after major milestones\n```\n\n**Set `contextInitialized` = true**",
             "agentRole": "You are creating the central documentation hub for this investigation. This document will track all progress, findings, and enable seamless handoffs.",
             "guidance": [
                 "Create a comprehensive but scannable document",
@@ -197,10 +142,6 @@
             "id": "phase-1-iterative-analysis",
             "type": "loop",
             "title": "Phase 1: Multi-Dimensional Codebase Analysis",
-            "runCondition": {
-                "var": "isReproducible",
-                "equals": true
-            },
             "loop": {
                 "type": "for",
                 "count": 4,
@@ -211,15 +152,22 @@
                 {
                     "id": "analysis-breadth-scan",
                     "title": "Analysis 1/4: Breadth Scan",
-                    "prompt": "**BREADTH SCAN**\n\n1. **Error Mapping**: grep_search errors, trace logs, map stack traces\n2. **Component Discovery**: Find all interacting components using codebase_search\n3. **Data Flow**: Trace data through bug area, transformations, persistence\n4. **Recent Changes**: Git history last 10 commits\n\n**Output**: BreadthAnalysis.md with interaction map",
+                    "prompt": "**BREADTH SCAN - Cast Wide Net**\n\nGoal: Understand full system impact and identify all potentially involved components.\n\nPerform: Error propagation mapping, Component discovery, Data flow mapping, Recent changes analysis, and Historical pattern search.\n\n**Output**: Complete BreadthAnalysis.md with component interaction map, data flow diagram, suspicious areas ranked by likelihood, and list of all potentially related files and functions.",
                     "agentRole": "You are performing systematic analysis phase 1 of 4. Your focus is casting a wide net to find all potentially related components.",
                     "guidance": [
                         "This is analysis phase 1 of 4 total phases",
                         "Phase 1 = Breadth Scan - Cast wide net for all related components",
                         "Create BreadthAnalysis.md with structured findings",
+                        "ERROR PROPAGATION MAPPING: Use grep_search for all error occurrences, trace error messages across all log files, map all stack traces to identify call chains, document every point where error appears or is handled",
+                        "COMPONENT DISCOVERY: Find ALL components that interact with failing area, use codebase_search \"How is [failing component] used?\", identify all callers and callees, build component interaction map, note both direct and indirect relationships",
+                        "DATA FLOW MAPPING: Trace data that flows through bug area, identify all transformations applied to data, find all persistence points (database, cache, files), document complete data journey, note where data could be corrupted or lost",
+                        "RECENT CHANGES ANALYSIS: Git history for all identified components, check last 10 commits affecting these areas, identify when bug likely appeared, look for related PRs or issues, note any configuration or dependency changes",
+                        "HISTORICAL PATTERN SEARCH: Use findSimilarBugs() to search for similar error patterns in codebase, previous fixes to related components, related test failures in history",
                         "Use findSimilarBugs() to search for historical patterns",
                         "Use the function definitions for standardized operations",
-                        "Update INVESTIGATION_CONTEXT.md after completion"
+                        "Update INVESTIGATION_CONTEXT.md after completion",
+                        "Be thorough - it's better to include too much than miss something critical",
+                        "Document your reasoning for why each component is potentially involved"
                     ],
                     "runCondition": {"var": "analysisPhase", "equals": 1},
                     "requireConfirmation": false
@@ -227,7 +175,7 @@
                 {
                     "id": "analysis-deep-dive",
                     "title": "Analysis 2/4: Component Deep Dive",
-                    "prompt": "**COMPONENT DEEP DIVE**\n\nUse recursiveAnalysis(component, 3) on top 5 suspicious components:\n\n1. **L1 Direct**: Read complete file, state management, error handling\n2. **L2 Dependencies**: Follow imports, contracts, version compatibility\n3. **L3 Integration**: System fit, side effects, concurrency, resources\n\n**Output**: ComponentAnalysis.md with deep insights",
+                    "prompt": "**COMPONENT DEEP DIVE - Understand Internals**\n\nGoal: Deep understanding of top 5 suspicious components from breadth scan.\n\nFor each component, use recursiveAnalysis(component, 3) to perform 3-level analysis: Direct Implementation (Level 1), Direct Dependencies (Level 2), and Integration Points (Level 3). Document likelihood scores, suspicious code sections, failure modes, and red flags.\n\n**Output**: ComponentAnalysis.md with deep insights for top 5 components, ranked list of most likely root cause locations, detailed notes on internals, and dependency graph showing relationships.",
                     "agentRole": "You are performing systematic analysis phase 2 of 4. Your focus is deep diving into the most suspicious components to understand their internals.",
                     "guidance": [
                         "This is analysis phase 2 of 4 total phases",
@@ -235,7 +183,13 @@
                         "Build on findings from Phase 1 Breadth Scan",
                         "Create ComponentAnalysis.md with structured findings",
                         "Use recursiveAnalysis() for systematic exploration",
-                        "Update INVESTIGATION_CONTEXT.md after completion"
+                        "LEVEL 1 - DIRECT IMPLEMENTATION: Read COMPLETE file including private methods, understand state management and data structures, analyze error handling patterns, check initialization and cleanup logic, document all public/private APIs, identify assumptions or invariants, note TODO/FIXME comments",
+                        "LEVEL 2 - DIRECT DEPENDENCIES: Follow all imports and their usage, understand dependency contracts and interfaces, check version compatibility and breaking changes, analyze coupling points and data exchange, look for shared mutable state, identify circular dependencies, document how failures could propagate",
+                        "LEVEL 3 - INTEGRATION POINTS: How component fits in larger system architecture, side effects and external calls (DB, API, file system), concurrency and threading concerns, resource management (memory, connections, handles), caching and state synchronization, event handling and callbacks, configuration and environment dependencies",
+                        "FOR EACH COMPONENT DOCUMENT: Likelihood score (1-10) of being root cause, specific suspicious code sections with line numbers, potential failure modes and their symptoms, dependencies that could be sources of issues, red flags (complex logic, error handling gaps, race conditions)",
+                        "Update INVESTIGATION_CONTEXT.md after completion",
+                        "Go deep - read entire files, not just the obvious parts",
+                        "Look for subtle issues like race conditions, edge cases, and assumptions"
                     ],
                     "runCondition": {"var": "analysisPhase", "equals": 2},
                     "requireConfirmation": false
@@ -243,15 +197,21 @@
                 {
                     "id": "analysis-dependencies",
                     "title": "Analysis 3/4: Dependencies & Flow",
-                    "prompt": "**DEPENDENCY & FLOW ANALYSIS**\n\n1. **Static Graph**: Import tree, circular deps, hidden dependencies\n2. **Runtime Flow**: Execution paths, async flows, state changes\n3. **Data Pipeline**: Track transformations, validation, corruption points\n4. **Integration**: External services, DB, queues, filesystem\n\n**Output**: FlowAnalysis.md with diagrams",
+                    "prompt": "**DEPENDENCY & FLOW ANALYSIS - Trace Connections**\n\nGoal: Understand how components interact and data flows between them.\n\nPerform: Static dependency graph analysis, Runtime flow analysis, Data transformation pipeline tracing, and Integration analysis.\n\n**Output**: FlowAnalysis.md with sequence diagrams showing execution flow, data flow maps with transformation points, complete dependency graph, list of all integration points and failure modes, and timeline showing order of operations.",
                     "agentRole": "You are performing systematic analysis phase 3 of 4. Your focus is tracing how components connect and data flows between them.",
                     "guidance": [
                         "This is analysis phase 3 of 4 total phases",
                         "Phase 3 = Dependencies - Trace connections and data flows",
                         "Build on component understanding from Phase 2",
                         "Create FlowAnalysis.md with diagrams and flow charts",
+                        "STATIC DEPENDENCY GRAPH: Build complete import/dependency tree, identify circular dependencies, find hidden dependencies (reflection, dynamic loading, DI), map version constraints and compatibility, document shared libraries and utilities, note tight coupling or fragile dependencies",
+                        "RUNTIME FLOW ANALYSIS: Trace execution paths to bug, identify async/concurrent flows and coordination, map state changes through execution, document control flow (conditionals, loops, exceptions), track callback chains and event handlers, identify divergence points, note timing dependencies and race conditions",
+                        "DATA TRANSFORMATION PIPELINE: Track data from input to error point, document each transformation with input/output types, identify validation points and what they check, find where data could be corrupted/lost, note serialization/deserialization boundaries, track data format conversions, document enrichment/filtering steps",
+                        "INTEGRATION ANALYSIS: External service calls and failure modes, database interactions (reads/writes/transactions), message queue operations and formats, file system operations and error handling, network calls and timeout handling, cache usage and invalidation, third-party library calls",
                         "Focus on runtime behavior and integration points",
-                        "Update INVESTIGATION_CONTEXT.md after completion"
+                        "Update INVESTIGATION_CONTEXT.md after completion",
+                        "Pay special attention to async boundaries and error propagation",
+                        "Look for implicit dependencies that aren't obvious from imports"
                     ],
                     "runCondition": {"var": "analysisPhase", "equals": 3},
                     "requireConfirmation": false
@@ -259,15 +219,22 @@
                 {
                     "id": "analysis-test-coverage",
                     "title": "Analysis 4/4: Test Coverage",
-                    "prompt": "**TEST COVERAGE ANALYSIS**\n\nUse analyzeTests(component) for each suspicious component:\n\n1. **Direct Coverage**: Find tests, analyze coverage gaps, quality\n2. **Integration Tests**: Bug area tests, assumptions, flaky tests\n3. **History**: When added/modified, correlation with bug\n4. **Debug Execution**: Run with debug flags, instrument, compare\n\n**Output**: TestAnalysis.md with coverage gaps matrix",
+                    "prompt": "**TEST COVERAGE ANALYSIS - Leverage Existing Knowledge**\n\nGoal: Use existing tests as source of truth about system behavior.\n\nFor each suspicious component, use analyzeTests(component) to perform: Direct test coverage analysis, Integration test analysis, Test history investigation, Test execution with debugging, and Coverage gap analysis.\n\n**Output**: TestAnalysis.md with coverage gaps matrix, suspicious test patterns, test evidence for hypotheses, recommendations for tests to add, and complete test inventory for affected components.",
                     "agentRole": "You are performing systematic analysis phase 4 of 4. Your focus is leveraging existing tests to understand expected behavior and find coverage gaps.",
                     "guidance": [
                         "This is analysis phase 4 of 4 total phases",
                         "Phase 4 = Tests - Analyze test coverage and quality",
                         "Build on all previous analysis phases",
                         "Create TestAnalysis.md with coverage gap matrix",
+                        "DIRECT TEST COVERAGE: Find all tests using grep/test discovery, analyze what's tested (happy/edge/error cases), identify what's NOT tested, check test quality and assertion strength, note mocking/stubbing that might hide issues, review test names and docs",
+                        "INTEGRATION TEST ANALYSIS: Find end-to-end tests for bug area, analyze assumptions/preconditions, check for flaky tests, review disabled/skipped tests and why, look for TODO/incomplete tests, identify multi-component tests, verify if tests cover failing scenario",
+                        "TEST HISTORY: When were tests added/modified? Do test changes correlate with bug appearance? Were tests removed/disabled recently? Use git blame for authors and context, look for related PRs/issues, review test evolution",
+                        "TEST EXECUTION WITH DEBUGGING: Run tests with debug flags (--verbose, --debug), add instrumentation to tests themselves, compare expected vs actual in detail, run in isolation and in suite, try different orderings to check dependencies, monitor resource usage",
+                        "COVERAGE GAP ANALYSIS: Use coverage tools for untested code paths, map coverage to bug components, identify branches/conditions never exercised, note error handling without tests, document missing edge cases, recommend tests to add",
                         "Run tests with debug flags for additional insights",
-                        "After completion, use trackInvestigation('Phase 1 Complete', 'Moving to Hypothesis Development')"
+                        "After completion, use trackInvestigation('Phase 1 Complete', 'Moving to Hypothesis Development')",
+                        "Tests often reveal the 'expected' behavior - compare with actual behavior",
+                        "Missing tests often indicate areas where bugs hide"
                     ],
                     "runCondition": {"var": "analysisPhase", "equals": 4},
                     "requireConfirmation": false
@@ -360,8 +327,29 @@
             "hasValidation": true
         },
         {
-            "id": "phase-2c-prepare-validation",
-            "title": "Phase 2c: Prepare Hypothesis Validation",
+            "id": "phase-2c-hypothesis-assumptions",
+            "title": "Phase 2c: Hypothesis Assumption Audit",
+            "prompt": "**AUDIT** each hypothesis for hidden assumptions:\n\n**FOR EACH HYPOTHESIS**:\n- List implicit assumptions\n- Rate assumption confidence (1-10)\n- Identify verification approach\n\n**REJECT** hypotheses built on unverified assumptions.",
+            "agentRole": "You are a rigorous scientist who rejects any hypothesis not grounded in verified facts.",
+            "guidance": [
+                "EXPLICIT LISTING: Write out every assumption, no matter how obvious it seems",
+                "CONFIDENCE SCORING: Rate 1-10 based on evidence quality, not intuition",
+                "VERIFICATION PLAN: For each assumption, specify how it can be tested",
+                "REJECTION CRITERIA: Any assumption with confidence <7 requires verification",
+                "DOCUMENT RATIONALE: Explain why each assumption is accepted or needs testing"
+            ],
+            "validationCriteria": [
+                {
+                    "type": "contains",
+                    "value": "Assumption confidence",
+                    "message": "Must rate assumption confidence for each hypothesis"
+                }
+            ],
+            "hasValidation": true
+        },
+        {
+            "id": "phase-2d-prepare-validation",
+            "title": "Phase 2d: Prepare Hypothesis Validation",
             "prompt": "**PREPARE VALIDATION ARRAY** - Extract the top 3 hypotheses for systematic validation.\n\n**Create `hypothesesToValidate` array with:**\n```json\n[\n  {\n    \"id\": \"H1\",\n    \"description\": \"[Hypothesis description]\",\n    \"evidenceStrength\": [score],\n    \"testability\": [score],\n    \"validationPlan\": \"[Specific testing approach]\"\n  },\n  // ... H2, H3\n]\n```\n\n**Set context variables:**\n- `hypothesesToValidate`: Array of top 3 hypotheses\n- `currentConfidence`: 0 (will be updated during validation)\n- `validationIterations`: 0 (tracks validation cycles)",
             "agentRole": "You are preparing the systematic validation process by structuring hypotheses for iteration.",
             "guidance": [
@@ -372,8 +360,8 @@
             "requireConfirmation": false
         },
         {
-            "id": "phase-2d-test-evidence-gathering",
-            "title": "Phase 2d: Test-Based Hypothesis Evidence",
+            "id": "phase-2e-test-evidence-gathering",
+            "title": "Phase 2e: Test-Based Hypothesis Evidence",
             "runCondition": {
                 "var": "hypothesesToValidate",
                 "not_equals": null
@@ -389,9 +377,9 @@
             "requireConfirmation": false
         },
         {
-            "id": "phase-2e-hypothesis-verification",
+            "id": "phase-2f-hypothesis-verification",
             "type": "loop",
-            "title": "Phase 2e: Hypothesis Verification & Refinement",
+            "title": "Phase 2f: Hypothesis Verification & Refinement",
             "runCondition": {
                 "var": "hypothesesToValidate",
                 "not_equals": null
@@ -444,9 +432,67 @@
             "requireConfirmation": false
         },
         {
-            "id": "phase-3-4-5-validation-loop",
+            "id": "phase-2g-instrumentation-planning",
+            "title": "Phase 2g: Unified Instrumentation Planning",
+            "prompt": "**UNIFIED INSTRUMENTATION PLANNING** - Plan comprehensive logging strategy for all hypotheses before implementation.\n\n**GOAL**: Create a coordinated instrumentation plan that efficiently captures evidence for all hypotheses in a single execution.\n\n**STEP 1: Hypothesis Review**\nFor each hypothesis (H1, H2, H3):\n- **Component(s)**: Which components need instrumentation?\n- **Critical Paths**: Which execution paths must be logged?\n- **Key Variables**: What state/data must be captured?\n- **Decision Points**: What conditionals/branches matter?\n- **Timing Concerns**: Any concurrency or timing-sensitive areas?\n\n**STEP 2: Identify Instrumentation Locations**\n\nFor each hypothesis, list specific locations:\n```\nH1 Instrumentation Needs:\n  - File: auth/login.ts, Function: validateCredentials, Lines: 45-67\n    What to log: input credentials format, validation result, error conditions\n  - File: auth/session.ts, Function: createSession, Lines: 23-34\n    What to log: session creation parameters, user context\n\nH2 Instrumentation Needs:\n  - File: auth/session.ts, Function: createSession, Lines: 23-34 [OVERLAP with H1]\n    What to log: session storage backend, timing\n  - File: database/connection.ts, Function: getConnection, Lines: 89-102\n    What to log: connection pool state, timeout settings\n\nH3 Instrumentation Needs:\n  - File: cache/redis.ts, Function: set, Lines: 156-178\n    What to log: cache key, TTL, success/failure\n```\n\n**STEP 3: Identify Overlaps**\n\nWhere do multiple hypotheses need logging at the same location?\n```\nOverlapping Instrumentation:\n  - auth/session.ts:23-34: Both H1 and H2 need logs here\n    Strategy: Single log point with both [H1] and [H2] prefixes capturing all needed data\n  \n  - No other overlaps identified\n```\n\n**STEP 4: Plan Log Format & Structure**\n\nDefine what each log should contain:\n```\nLog Format Standard:\n  [HX] ClassName.methodName:{lineNum} | timestamp | specific-data\n\nH1 Log Examples:\n  [H1] LoginValidator.validateCredentials:45 | 2025-10-02T10:23:45.123Z | input={email: user@example.com, hasPassword: true}\n  [H1] LoginValidator.validateCredentials:52 | 2025-10-02T10:23:45.145Z | validation=FAILED reason=\"invalid format\"\n\nH2 Log Examples:\n  [H2] SessionManager.createSession:23 | 2025-10-02T10:23:45.167Z | backend=redis poolSize=10\n  [H2] SessionManager.createSession:28 | 2025-10-02T10:23:45.189Z | sessionId=abc123 stored=true latency=22ms\n```\n\n**STEP 5: Plan Data Capture Strategy**\n\nWhat specific data values need to be captured:\n- **H1 requires**: Credential format, validation results, error messages\n- **H2 requires**: Backend type, connection timing, pool state\n- **H3 requires**: Cache keys, TTL values, hit/miss rates\n\n**STEP 6: Consider Edge Cases**\n\n- **High-frequency locations**: Plan aggregation (e.g., log every 10th iteration)\n- **Sensitive data**: Plan redaction (e.g., mask passwords, PII)\n- **Large data structures**: Plan summarization (e.g., object size, key count, not full dump)\n- **Error paths**: Ensure error cases are logged, not just happy path\n\n**STEP 7: Create Instrumentation Implementation Plan**\n\nProduce structured plan:\n```markdown\n# Instrumentation Implementation Plan\n\n## Summary\n- Total instrumentation points: [count]\n- Overlapping locations: [count]\n- Estimated log volume: [low/medium/high]\n- Sensitive data handling: [yes/no - describe]\n\n## H1 Instrumentation (Priority: High, Evidence Strength: 8/10)\n1. Location: auth/login.ts:45-67\n   Function: validateCredentials\n   Log: [H1] Input format and validation result\n   Frequency: Per-call (not high-frequency)\n   Data: {email format, hasPassword, validation result, error}\n\n2. Location: auth/session.ts:23-34 [SHARED with H2]\n   Function: createSession  \n   Log: [H1] Session creation context\n   Frequency: Per-call\n   Data: {userContext, sessionType}\n\n## H2 Instrumentation (Priority: High, Evidence Strength: 7/10)\n[Similar detailed breakdown]\n\n## H3 Instrumentation (Priority: Medium, Evidence Strength: 6/10)\n[Similar detailed breakdown]\n\n## Implementation Order\n1. Shared locations first (avoid duplication)\n2. H1 specific locations\n3. H2 specific locations\n4. H3 specific locations\n\n## Validation Checklist\n- [ ] All hypotheses have instrumentation coverage\n- [ ] Overlaps identified and coordinated\n- [ ] Log format is consistent\n- [ ] Sensitive data is handled\n- [ ] High-frequency points have aggregation\n- [ ] Edge cases considered\n```\n\n**OUTPUT**:\n- Complete instrumentation implementation plan\n- Set `instrumentationPlanReady` = true\n- Create InstrumentationPlan.md file with detailed plan\n- Update INVESTIGATION_CONTEXT.md with plan summary",
+            "agentRole": "You are an instrumentation architect planning a comprehensive logging strategy. Your goal is to design efficient, coordinated instrumentation that captures all needed evidence in a single execution.",
+            "guidance": [
+                "Review ALL hypotheses together to identify synergies",
+                "Be specific about locations (file, function, line numbers)",
+                "Identify and optimize overlapping instrumentation needs",
+                "Plan log format for consistency and parseability",
+                "Consider practical concerns (volume, sensitivity, performance)",
+                "Create actionable implementation plan, not just theory",
+                "This plan will guide Phase 3 implementation"
+            ],
+            "requireConfirmation": false
+        },
+        {
+            "id": "phase-2h-cognitive-reset",
+            "title": "Phase 2h: Cognitive Reset & Plan Review",
+            "prompt": "**COGNITIVE RESET** - Take a mental step back before implementing instrumentation.\n\n**GOAL**: Review the investigation with fresh eyes and validate the plan before execution.\n\n**STEP 1: Progress Summary**\n- What have we learned so far? (3-5 key insights)\n- What are our top hypotheses? (brief recap)\n- What's our instrumentation strategy? (high-level summary)\n\n**STEP 2: Critical Questions**\n- Are we missing any obvious alternative explanations?\n- Are our hypotheses too similar or too narrow?\n- Is our instrumentation plan efficient and comprehensive?\n- Are we making any unwarranted assumptions?\n- Is there a simpler approach we haven't considered?\n\n**STEP 3: Bias Check**\n- First impression bias: Are we anchored to initial theories?\n- Confirmation bias: Are we seeking evidence that confirms our beliefs?\n- Complexity bias: Are we overcomplicating a simple issue?\n- Recency bias: Are we over-weighting recent findings?\n\n**STEP 4: Sanity Checks**\n- Does the timeline make sense? (When did bug appear vs when hypothesized causes were introduced)\n- Do the symptoms match our theories? (All symptoms explained, no contradictions)\n- Are we investigating the right level? (Too high-level or too low-level)\n- Have we consulted existing documentation/logs adequately?\n\n**STEP 5: Plan Validation**\n- Review the instrumentation plan from Phase 2g\n- Will it actually answer our questions?\n- Are there any gaps or redundancies?\n- Is it safe to execute? (no production impacts, no data corruption risks)\n\n**STEP 6: Proceed or Pivot Decision**\n- **PROCEED**: Plan is sound, move to implementation\n- **REFINE**: Minor adjustments needed (update plan)\n- **PIVOT**: Major issues found (return to earlier phase)\n\n**OUTPUT**:\n- Cognitive reset complete with decision (PROCEED/REFINE/PIVOT)\n- Any plan adjustments documented\n- Set `resetComplete` = true",
+            "agentRole": "You are a senior debugger reviewing the investigation plan with fresh, critical eyes before committing to implementation.",
+            "guidance": [
+                "Be honest about potential biases and blind spots",
+                "Look for simpler explanations we might have missed",
+                "Validate the plan will actually answer our questions",
+                "Don't skip this - catching issues now saves hours later",
+                "It's okay to pivot if major issues are found"
+            ],
+            "requireConfirmation": false
+        },
+        {
+            "id": "phase-3-comprehensive-instrumentation",
+            "title": "Phase 3: Comprehensive Debug Instrumentation",
+            "prompt": "**COMPREHENSIVE DEBUGGING INSTRUMENTATION** - Implement the instrumentation plan from Phase 2g.\n\n**FOLLOW THE PLAN**: Use the instrumentation plan created in Phase 2f as your implementation guide.\n\n**For each hypothesis in hypothesesToValidate, add targeted instrumentation:**\n\n**IMPLEMENTATION STRATEGY**:\n\n1. **Hypothesis-Specific Prefixes**: Each hypothesis gets unique logging prefix\n   - H1: `[H1]` prefix for all H1-related logs\n   - H2: `[H2]` prefix for all H2-related logs\n   - H3: `[H3]` prefix for all H3-related logs\n\n2. **Standard Format for ALL hypotheses**:\n   ```javascript\n   className.methodName [HX] {timestamp}: Hypothesis-specific message\n   ```\n\n3. **Smart Logging Implementation** (apply once, works for all hypotheses):\n   ```javascript\n   const debugState = { lastMsg: '', count: 0 };\n   function smartLog(hypothesisId, msg) {\n     const fullMsg = `[${hypothesisId}] ${msg}`;\n     if (debugState.lastMsg === fullMsg) {\n       debugState.count++;\n       if (debugState.count % 10 === 0) {\n         console.log(`${fullMsg} x${debugState.count}`);\n       }\n     } else {\n       if (debugState.count > 1) {\n         console.log(`Previous message x${debugState.count}`);\n       }\n       console.log(fullMsg);\n       debugState.lastMsg = fullMsg;\n       debugState.count = 1;\n     }\n   }\n   ```\n\n4. **Instrumentation Points** for each hypothesis:\n   - Add H1 logging at H1-relevant locations\n   - Add H2 logging at H2-relevant locations\n   - Add H3 logging at H3-relevant locations\n   - Locations may overlap - that's fine, both will log\n\n5. **Operation Grouping** (for all hypotheses):\n   ```javascript\n   console.log(`=== [H1] Operation ${opName} Start ===`);\n   // ... H1-relevant code ...\n   console.log(`=== [H1] Operation ${opName} End ===`);\n   ```\n\n**INSTRUMENTATION CHECKLIST**:\n- [ ] H1 instrumentation added at identified locations\n- [ ] H2 instrumentation added at identified locations  \n- [ ] H3 instrumentation added at identified locations\n- [ ] Test instrumentation for hypothesis validation\n- [ ] Deduplication logic implemented\n- [ ] All logs use correct [HX] prefixes\n\n**OUTPUT**:\n- Comprehensive instrumented code with logging for ALL hypotheses\n- Set `allHypothesesInstrumented` = true\n- Document instrumentation locations in INVESTIGATION_CONTEXT.md",
+            "agentRole": "You are instrumenting code to validate ALL hypotheses simultaneously. Your goal is comprehensive, non-redundant logging that enables efficient evidence collection in a single execution.",
+            "guidance": [
+                "Add instrumentation for ALL hypotheses at once",
+                "Use unique [HX] prefixes to distinguish hypothesis-specific logs",
+                "Overlapping instrumentation is acceptable - multiple hypotheses can log at same location",
+                "Ensure non-intrusive implementation that doesn't change behavior",
+                "Single execution will produce logs for all hypotheses"
+            ],
+            "requireConfirmation": false
+        },
+        {
+            "id": "phase-4-unified-evidence-collection",
+            "title": "Phase 4: Unified Evidence Collection",
+            "prompt": "**UNIFIED EVIDENCE COLLECTION** - Run instrumented code ONCE and collect all evidence.\n\n**EXECUTION**:\n1. **Single Test/Reproduction Run**:\n   - Execute the reproduction steps with ALL instrumentation active\n   - All hypotheses are tested in the same execution\n   - Capture complete log output\n\n2. **Log Collection**:\n   - Collect ALL debug logs from the single run\n   - Logs will contain [H1], [H2], [H3] prefixed messages\n   - Save complete log output for analysis\n\n3. **Log Organization**:\n   - Parse logs by hypothesis prefix:\n     - Extract all [H1] logs → H1 evidence\n     - Extract all [H2] logs → H2 evidence  \n     - Extract all [H3] logs → H3 evidence\n   - Preserve chronological order within each hypothesis\n   - Note any cross-hypothesis interactions\n\n4. **Test Execution Evidence**:\n   - Run instrumented tests\n   - Collect test debug output\n   - Note any test failures or unexpected behavior\n   - Compare with production bug behavior\n\n5. **Evidence Quality Assessment**:\n   - Rate overall log quality (1-10)\n   - Note if execution reproduced the bug\n   - Document any execution issues\n   - Identify if additional instrumentation needed\n\n**If log volume >500 lines:**\n- Use aggregateDebugLogs() to create summaries\n- Group by hypothesis and operation\n- Create structured sub-analysis\n\n**OUTPUT**:\n- Complete log output with all hypothesis evidence\n- Organized evidence by hypothesis (H1, H2, H3)\n- Set `evidenceCollected` = true\n- Overall execution quality score",
+            "agentRole": "You are collecting comprehensive evidence from a single instrumented execution. Your goal is to capture all hypothesis-relevant data in one efficient run.",
+            "guidance": [
+                "Single execution tests all hypotheses simultaneously",
+                "Organize evidence by [HX] prefix for analysis",
+                "Preserve complete chronological log for cross-hypothesis insights",
+                "Note any unexpected behaviors or patterns",
+                "If execution fails, document why and attempt to collect partial evidence"
+            ],
+            "requireConfirmation": false
+        },
+        {
+            "id": "phase-5-hypothesis-analysis-loop",
             "type": "loop",
-            "title": "Hypothesis Validation Loop (Phases 3-4-5)",
+            "title": "Phase 5: Individual Hypothesis Analysis",
             "loop": {
                 "type": "forEach",
                 "items": "hypothesesToValidate",
@@ -456,48 +502,20 @@
             },
             "body": [
                 {
-                    "id": "loop-phase-3-instrumentation",
-                    "title": "Phase 3: Debug Instrumentation for {{currentHypothesis.id}}",
-                    "prompt": "**DEBUGGING INSTRUMENTATION for {{currentHypothesis.id}}**\n\n**Hypothesis**: {{currentHypothesis.description}}\n\n**IMPLEMENT SMART LOGGING**:\n\n1. **Standard Format**: Use instrumentCode(location, '{{currentHypothesis.id}}')\n   ```\n   className.methodName [{{currentHypothesis.id}}] {timestamp}: Specific message\n   ```\n\n2. **Deduplication Implementation**:\n   ```javascript\n   // Add to each instrumentation point\n   const debugState = { lastMsg: '', count: 0 };\n   function smartLog(msg) {\n     if (debugState.lastMsg === msg) {\n       debugState.count++;\n       if (debugState.count % 10 === 0) {\n         console.log(`[{{currentHypothesis.id}}] ${msg} x${debugState.count}`);\n       }\n     } else {\n       if (debugState.count > 1) {\n         console.log(`[{{currentHypothesis.id}}] Previous message x${debugState.count}`);\n       }\n       console.log(`[{{currentHypothesis.id}}] ${msg}`);\n       debugState.lastMsg = msg;\n       debugState.count = 1;\n     }\n   }\n   ```\n\n3. **Operation Grouping**:\n   ```javascript\n   console.log(`=== {{currentHypothesis.id}}: Operation ${opName} Start ===`);\n   const startTime = Date.now();\n   // ... operation code with smartLog() calls ...\n   console.log(`=== {{currentHypothesis.id}}: Operation ${opName} End (${Date.now() - startTime}ms) ===`);\n   ```\n\n4. **Test Instrumentation**:\n   - Add debugging to relevant test files\n   - Instrument test setup/teardown\n   - Log test assumptions vs actual behavior\n\n5. **High-Frequency Aggregation**:\n   - For loops/iterations, log summary every 100 iterations\n   - For events, create time-window summaries\n   - Track unique values and their counts\n\n**OUTPUT**: Instrumented code ready to produce clean, manageable logs for {{currentHypothesis.id}}",
-                    "agentRole": "You are instrumenting code specifically to validate hypothesis {{currentHypothesis.id}}. Focus on targeted evidence collection.",
-                    "guidance": [
-                        "This is hypothesis {{hypothesisIndex + 1}} of 3",
-                        "Tailor instrumentation to the specific hypothesis",
-                        "Ensure non-intrusive implementation"
-                    ],
-                    "requireConfirmation": false
-                },
-                {
-                    "id": "loop-phase-4-evidence",
-                    "title": "Phase 4: Evidence Collection for {{currentHypothesis.id}}",
-                    "prompt": "**EVIDENCE COLLECTION for {{currentHypothesis.id}}**\n\n**Execute instrumented code and collect evidence:**\n1. Run the instrumented test/reproduction\n2. Collect all {{currentHypothesis.id}}_DEBUG logs\n3. Analyze results against validation criteria\n4. Document evidence quality and relevance\n\n**TEST EXECUTION EVIDENCE**:\n- Run instrumented tests for {{currentHypothesis.id}}\n- Collect test debug output\n- Note any test failures or unexpected behavior\n- Compare with production bug behavior\n\n**EVIDENCE ASSESSMENT:**\n- Does evidence support {{currentHypothesis.id}}? (Yes/No/Partial)\n- Evidence quality score (1-10)\n- Contradicting evidence found?\n- Additional evidence needed?\n\n**If log volume >500 lines, use aggregateDebugLogs() and create sub-analysis prompt.**\n\n**OUTPUT**: Evidence assessment for {{currentHypothesis.id}} with quality scoring",
-                    "agentRole": "You are collecting and analyzing evidence specifically for hypothesis {{currentHypothesis.id}}.",
-                    "guidance": [
-                        "Focus on evidence directly related to this hypothesis",
-                        "Be objective in assessment - negative evidence is valuable",
-                        "Track evidence quality quantitatively"
-                    ],
-                    "requireConfirmation": false
-                },
-                {
-                    "id": "loop-phase-5-synthesis",
-                    "title": "Phase 5: Evidence Synthesis for {{currentHypothesis.id}}",
-                    "prompt": "**EVIDENCE SYNTHESIS for {{currentHypothesis.id}}**\n\n**Synthesize findings:**\n1. **Evidence Summary**: What did we learn about {{currentHypothesis.id}}?\n2. **Confidence Update**: Based on evidence, rate confidence this is the root cause (0-10)\n3. **Status Update**: Mark hypothesis as Confirmed/Refuted/Needs-More-Evidence\n\n**If {{currentHypothesis.id}} is confirmed with high confidence (>8.0):**\n- Set `rootCauseFound` = true\n- Set `rootCauseHypothesis` = {{currentHypothesis.id}}\n- Update `currentConfidence` with confidence score\n\n**If all hypotheses validated but confidence <9.0:**\n- Consider additional investigation needs\n- Document what evidence is still missing\n\n**Context Update**:\n- Use updateInvestigationContext('Evidence Log', evidence summary for {{currentHypothesis.id}})\n- Every 3 iterations: Use trackInvestigation('Validation Progress', '{{hypothesisIndex + 1}}/3 hypotheses validated')",
-                    "agentRole": "You are synthesizing evidence to determine if {{currentHypothesis.id}} is the root cause.",
+                    "id": "analyze-hypothesis-evidence",
+                    "title": "Analyze Evidence for {{currentHypothesis.id}}",
+                    "prompt": "**EVIDENCE ANALYSIS for {{currentHypothesis.id}}**\n\n**Hypothesis**: {{currentHypothesis.description}}\n\n**ANALYZE {{currentHypothesis.id}} LOGS**:\n\n1. **Extract Relevant Logs**:\n   - Review all [{{currentHypothesis.id}}] prefixed logs from Phase 4\n   - Examine log sequence and timing\n   - Look for patterns supporting or refuting the hypothesis\n\n2. **Evidence Assessment**:\n   - Does evidence support {{currentHypothesis.id}}? (Yes/No/Partial)\n   - Evidence quality score (1-10)\n   - Contradicting evidence found?\n   - Unexpected behaviors observed?\n\n3. **Cross-Hypothesis Insights**:\n   - Did other hypothesis logs reveal relevant information?\n   - Are there interactions between suspected components?\n   - Does timeline analysis suggest different root cause?\n\n4. **Confidence Update**:\n   - Based on evidence, rate confidence this is root cause (0-10)\n   - What additional evidence would increase confidence?\n   - Are there alternative explanations for the observed evidence?\n\n5. **Status Determination**:\n   - Mark hypothesis as: Confirmed / Refuted / Needs-More-Evidence / Partially-Confirmed\n   - If Confirmed with high confidence (>8.0):\n     - Set `rootCauseFound` = true\n     - Set `rootCauseHypothesis` = {{currentHypothesis.id}}\n     - Set `currentConfidence` = confidence score\n\n**CONTEXT UPDATE**:\n- Use updateInvestigationContext('Evidence Log', evidence summary for {{currentHypothesis.id}})\n- Use trackInvestigation('Validation Progress', '{{hypothesisIndex + 1}}/3 hypotheses analyzed')\n\n**OUTPUT**: Complete evidence analysis and status for {{currentHypothesis.id}}",
+                    "agentRole": "You are analyzing evidence collected from the unified execution to determine if {{currentHypothesis.id}} is the root cause.",
                     "guidance": [
-                        "Update hypothesis status based on evidence",
-                        "Track overall investigation confidence",
-                        "Be ready to exit loop if root cause found with high confidence"
+                        "Analyze logs specific to this hypothesis ({{hypothesisIndex + 1}} of 3)",
+                        "Consider evidence from all hypotheses - may reveal interactions",
+                        "Be objective - negative evidence is valuable",
+                        "Update hypothesis status based on concrete evidence",
+                        "If high confidence root cause found, document thoroughly"
                     ],
                     "requireConfirmation": false
                 }
             ],
-            "runCondition": {
-                "and": [
-                    { "var": "rootCauseFound", "not_equals": true },
-                    { "var": "validationIterations", "lt": 3 }
-                ]
-            },
             "requireConfirmation": false
         },
         {
@@ -507,7 +525,7 @@
                 "var": "currentConfidence",
                 "lt": 8.0
             },
-            "prompt": "**CONTROLLED EXPERIMENTATION** - When observation isn't enough, experiment!\n\n**Current Investigation Status**: Leading hypothesis (Confidence: {{currentConfidence}}/10)\n\n**EXPERIMENT TYPES** (use controlledModification()):\n\n1. **Guard Additions (Non-Breaking)**:\n   ```javascript\n   // Add defensive check that logs but doesn't change behavior\n   if (unexpectedCondition) {\n     console.error('[H1_GUARD] Unexpected state detected:', state);\n     // Continue normal execution\n   }\n   ```\n\n2. **Assertion Injections**:\n   ```javascript\n   // Add assertion that would fail if hypothesis is correct\n   console.assert(expectedCondition, '[H1_ASSERT] Hypothesis H1 violated!');\n   ```\n\n3. **Minimal Fix Test**:\n   ```javascript\n   // Apply minimal fix for hypothesis, see if bug disappears\n   if (process.env.DEBUG_FIX_H1 === 'true') {\n     // Apply hypothesized fix\n     return fixedBehavior();\n   }\n   ```\n\n4. **Controlled Breaking**:\n   ```javascript\n   // Temporarily break suspected component to verify involvement\n   if (process.env.DEBUG_BREAK_H1 === 'true') {\n     throw new Error('[H1_BREAK] Intentionally breaking to test hypothesis');\n   }\n   ```\n\n**PROTOCOL**:\n1. Choose experiment type based on confidence and risk\n2. Implement modification with clear DEBUG markers\n3. Use createInvestigationBranch() if not already on investigation branch\n4. Commit: `git commit -m \"DEBUG: {{experiment_type}} for hypothesis investigation\"`\n5. Run reproduction steps\n6. Use collectEvidence() to gather results\n7. Revert changes: `git revert HEAD`\n8. Document results in ExperimentResults/hypothesis-experiment.md\n\n**SAFETY LIMITS**:\n- Max 3 experiments per hypothesis\n- Each experiment in separate commit\n- Always revert after evidence collection\n- Document everything in INVESTIGATION_CONTEXT.md\n\n**UPDATE**:\n- Hypothesis confidence based on experimental results\n- Use updateInvestigationContext('Experiment Results', experiment details and outcomes)\n- Track failed experiments in 'Dead Ends & Lessons' section",
+            "prompt": "**CONTROLLED EXPERIMENTATION** - When observation isn't enough, experiment!\n\n**Current Investigation Status**: Leading hypothesis (Confidence: {{currentConfidence}}/10)\n\n**⚠️ SAFETY PROTOCOLS (MANDATORY)**:\n\n1. **Git Branch Required**:\n   - MUST be on investigation branch (use createInvestigationBranch() if not)\n   - Verify with `git branch --show-current`\n   - NEVER experiment directly on main/master\n\n2. **Pre-Experiment Baseline**:\n   - Commit clean state: `git commit -m \"PRE-EXPERIMENT: baseline for {{hypothesis.id}}\"`\n   - Record current test results\n   - Document baseline behavior\n\n3. **Environment Restriction**:\n   - ONLY run in test/dev environment\n   - NEVER in production or staging\n   - Set environment check: `if (process.env.NODE_ENV !== 'development') { throw new Error('Experiments only in dev'); }`\n\n4. **Automatic Revert**:\n   - After evidence collection: `git revert HEAD --no-edit`\n   - Verify code returned to baseline\n   - Run tests to confirm clean state\n\n5. **Approval Gates**:\n   - Low automation: Require approval for ALL experiments\n   - Medium automation: Require approval for breaking/minimal-fix experiments\n   - High automation: Auto-approve guards/logs only\n\n6. **Documentation**:\n   - Create ExperimentLog.md entry with:\n     - Timestamp, experiment type, hypothesis ID\n     - Rationale and expected outcome\n     - Actual outcome and evidence\n     - Revert status (confirmed/failed)\n\n7. **Hard Limits**:\n   - Max 3 experiments total (prevent endless experimentation)\n   - Track with `experimentCount` context variable\n   - Exit if limit reached, recommend different approach\n\n8. **Rollback Verification**:\n   - After revert, run full test suite\n   - Verify no unintended changes remain\n   - Check git status is clean\n\n**EXPERIMENT TYPES** (use controlledModification()):\n\n1. **Guard Additions (Non-Breaking)**:\n   ```javascript\n   // Add defensive check that logs but doesn't change behavior\n   if (unexpectedCondition) {\n     console.error('[H1_GUARD] Unexpected state detected:', state);\n     // Continue normal execution\n   }\n   ```\n\n2. **Assertion Injections**:\n   ```javascript\n   // Add assertion that would fail if hypothesis is correct\n   console.assert(expectedCondition, '[H1_ASSERT] Hypothesis H1 violated!');\n   ```\n\n3. **Minimal Fix Test**:\n   ```javascript\n   // Apply minimal fix for hypothesis, see if bug disappears\n   if (process.env.DEBUG_FIX_H1 === 'true') {\n     // Apply hypothesized fix\n     return fixedBehavior();\n   }\n   ```\n\n4. **Controlled Breaking**:\n   ```javascript\n   // Temporarily break suspected component to verify involvement\n   if (process.env.DEBUG_BREAK_H1 === 'true') {\n     throw new Error('[H1_BREAK] Intentionally breaking to test hypothesis');\n   }\n   ```\n\n**PROTOCOL**:\n1. Choose experiment type based on confidence and risk\n2. Implement modification with clear DEBUG markers\n3. Use createInvestigationBranch() if not already on investigation branch\n4. Commit: `git commit -m \"DEBUG: {{experiment_type}} for hypothesis investigation\"`\n5. Run reproduction steps\n6. Use collectEvidence() to gather results\n7. Revert changes: `git revert HEAD`\n8. Document results in ExperimentResults/hypothesis-experiment.md\n\n**SAFETY LIMITS**:\n- Max 3 experiments per hypothesis\n- Each experiment in separate commit\n- Always revert after evidence collection\n- Document everything in INVESTIGATION_CONTEXT.md\n\n**UPDATE**:\n- Hypothesis confidence based on experimental results\n- Use updateInvestigationContext('Experiment Results', experiment details and outcomes)\n- Track failed experiments in 'Dead Ends & Lessons' section",
             "agentRole": "You are a careful experimenter using controlled code modifications to validate hypotheses. Safety and reversibility are paramount.",
             "guidance": [
                 "Start with non-breaking experiments (guards, logs)",
@@ -535,8 +553,8 @@
             ]
         },
         {
-            "id": "phase-3a-observability-setup",
-            "title": "Phase 3a: Distributed System Observability",
+            "id": "phase-3b-observability-setup",
+            "title": "Phase 3b: Distributed System Observability",
             "runCondition": {
                 "var": "isDistributed",
                 "equals": true
@@ -552,8 +570,8 @@
             ]
         },
         {
-            "id": "phase-4a-distributed-evidence",
-            "title": "Phase 4a: Multi-Service Evidence Collection",
+            "id": "phase-4c-distributed-evidence",
+            "title": "Phase 4c: Multi-Service Evidence Collection",
             "runCondition": {
                 "var": "isDistributed",
                 "equals": true
@@ -604,31 +622,10 @@
             ],
             "hasValidation": true
         },
-        {
-            "id": "phase-2c-hypothesis-assumptions",
-            "title": "Phase 2c: Hypothesis Assumption Audit",
-            "prompt": "**AUDIT** each hypothesis for hidden assumptions:\n\n**FOR EACH HYPOTHESIS**:\n- List implicit assumptions\n- Rate assumption confidence (1-10)\n- Identify verification approach\n\n**REJECT** hypotheses built on unverified assumptions.",
-            "agentRole": "You are a rigorous scientist who rejects any hypothesis not grounded in verified facts.",
-            "guidance": [
-                "EXPLICIT LISTING: Write out every assumption, no matter how obvious it seems",
-                "CONFIDENCE SCORING: Rate 1-10 based on evidence quality, not intuition",
-                "VERIFICATION PLAN: For each assumption, specify how it can be tested",
-                "REJECTION CRITERIA: Any assumption with confidence <7 requires verification",
-                "DOCUMENT RATIONALE: Explain why each assumption is accepted or needs testing"
-            ],
-            "validationCriteria": [
-                {
-                    "type": "contains",
-                    "value": "Assumption confidence",
-                    "message": "Must rate assumption confidence for each hypothesis"
-                }
-            ],
-            "hasValidation": true
-        },
         {
             "id": "phase-6-diagnostic-writeup",
             "title": "Phase 6: Comprehensive Diagnostic Writeup",
-            "prompt": "**DIAGNOSTIC WRITEUP** - Create DIAGNOSTIC_REPORT.md:\n\n1. **Executive Summary**: Bug description, root cause, confidence, scope\n2. **Technical Deep Dive**: Root cause analysis, code locations, execution flow, state\n3. **Investigation Methodology**: Timeline, hypothesis evolution (H1-H5), evidence ratings\n4. **Historical Context**: findSimilarBugs() results, previous fixes, patterns, lessons\n5. **Knowledge Transfer**: Skills needed, prevention measures, action items, testing strategy\n6. **Context Finalization**: updateInvestigationContext('Final'), archive complete context\n\n**Format**: Clear sections, code snippets, 1500-3000 words\n**Goal**: Enable bug fixing, knowledge transfer, and organizational learning",
+            "prompt": "**FINAL DIAGNOSTIC DOCUMENTATION** - I will create comprehensive writeup enabling effective bug fixing and knowledge transfer.\n\n**STEP 1: Executive Summary**\n- **Bug Summary**: Concise description of issue and impact\n- **Root Cause**: Clear, non-technical explanation of what is happening\n- **Confidence Level**: Final confidence assessment with calculation methodology\n- **Scope**: What systems, users, or scenarios are affected\n\n**STEP 2: Technical Deep Dive**\n- **Root Cause Analysis**: Detailed technical explanation of failure mechanism\n- **Code Component Analysis**: Specific files, functions, and lines with exact locations\n- **Execution Flow**: Step-by-step sequence of events leading to bug\n- **State Analysis**: How system state contributes to failure\n\n**STEP 3: Investigation Methodology**\n- **Investigation Timeline**: Chronological summary with phase time investments\n- **Hypothesis Evolution**: Complete record of hypotheses (H1-H5) with status changes\n- **Evidence Assessment**: Rating and reliability of evidence sources with key citations\n\n**STEP 4: Historical Context & Patterns**\n- **Similar Bugs**: Reference findings from findSimilarBugs() and SimilarPatterns.md\n- **Previous Fixes**: How similar issues were resolved\n- **Recurring Patterns**: Identify if this is part of a larger pattern\n- **Lessons Learned**: What can be applied from past experiences\n\n**STEP 5: Knowledge Transfer & Action Plan**\n- **Skill Requirements**: Technical expertise needed for understanding and fixing\n- **Prevention & Review**: Specific measures and code review checklist items\n- **Action Items**: Immediate mitigation steps and permanent fix areas with timelines\n- **Testing Strategy**: Comprehensive verification approach for fixes\n- **Recommended Next Investigations** (if confidence < 9.0):\n  - Additional instrumentation locations and data points not yet captured\n  - Alternative hypotheses to explore (theories that were deprioritized)\n  - External expertise to consult (domain experts, similar bugs)\n  - Environmental factors to test (load, concurrency, timing, config variations)\n  - Expanded scope (related components, upstream/downstream systems)\n  - Prioritized next steps based on evidence gaps\n\n**STEP 6: Context Finalization**\n- **Final Update**: Use updateInvestigationContext('Final Report', link to diagnostic report)\n- **Archive Context**: Ensure INVESTIGATION_CONTEXT.md is complete for future reference\n- **Knowledge Base**: Consider key findings for team knowledge base\n\n**DELIVERABLE**: Enterprise-grade diagnostic report enabling confident bug fixing, knowledge transfer, and organizational learning.",
             "agentRole": "You are a senior technical writer and diagnostic documentation specialist with expertise in creating comprehensive, actionable bug reports for enterprise environments. Your strength lies in translating complex technical investigations into clear, structured documentation that enables effective problem resolution, knowledge transfer, and organizational learning. You excel at creating reports that serve immediate fixing needs, long-term system improvement, and team collaboration.",
             "guidance": [
                 "ENTERPRISE FOCUS: Write for multiple stakeholders including developers, managers, and future team members",