@exaudeus/workrail 0.6.1-beta.8 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/application/services/workflow-service.js +41 -11
  2. package/dist/cli.js +44 -0
  3. package/dist/config/feature-flags.d.ts +33 -0
  4. package/dist/config/feature-flags.js +106 -0
  5. package/dist/container.d.ts +2 -0
  6. package/dist/container.js +3 -0
  7. package/dist/mcp-server.js +31 -6
  8. package/package.json +1 -1
  9. package/web/assets/images/favicon-amber-16.png +0 -0
  10. package/web/assets/images/favicon-amber-32.png +0 -0
  11. package/web/assets/images/favicon-white-16-clean.png +0 -0
  12. package/web/assets/images/favicon-white-32-clean.png +0 -0
  13. package/web/assets/images/icon-amber-192.png +0 -0
  14. package/web/assets/images/icon-amber-512.png +0 -0
  15. package/web/assets/images/icon-amber.svg +27 -0
  16. package/web/assets/images/icon-white-192-clean.png +0 -0
  17. package/web/assets/images/icon-white-512-clean.png +0 -0
  18. package/web/assets/images/icon-white.svg +27 -0
  19. package/web/manifest.json +1 -1
  20. package/workflows/CHANGELOG-bug-investigation.md +167 -85
  21. package/workflows/documentation-update-workflow.json +334 -345
  22. package/workflows/examples/dashboard-template-workflow.json +176 -0
  23. package/workflows/systematic-bug-investigation-with-loops.backup-20251106-125543.json +751 -0
  24. package/workflows/systematic-bug-investigation-with-loops.json +727 -664
  25. package/web/ADAPTIVE_BACKGROUND_SYSTEM.md +0 -523
  26. package/web/BACKGROUND_ENHANCEMENTS.md +0 -419
  27. package/web/COMPONENT_LIBRARY.md +0 -755
  28. package/web/COMPONENT_MIGRATION_GUIDE.md +0 -537
  29. package/web/assets/images/favicon-white-16.png +0 -0
  30. package/web/assets/images/favicon-white-32.png +0 -0
  31. package/web/assets/images/icon-white-192.png +0 -0
  32. package/web/assets/images/icon-white-512.png +0 -0
  33. package/web/assets/images/icon-white.png +0 -0
  34. package/workflows/dashboard-template-workflow.json +0 -337
  35. package/workflows/deep-documentation-workflow.json +0 -0
  36. package/workflows/systemic-bug-investigation-with-loops.json +0 -645
@@ -1,645 +0,0 @@
1
- {
2
- "id": "systematic-bug-investigation-with-loops",
3
- "name": "Systematic Bug Investigation Workflow",
4
- "version": "1.1.0-beta.1",
5
- "description": "A comprehensive workflow for systematic bug and failing test investigation that prevents LLMs from jumping to conclusions. Enforces thorough evidence gathering, hypothesis formation, debugging instrumentation, and validation to achieve near 100% certainty about root causes. This workflow does NOT fix bugs - it produces detailed diagnostic writeups that enable effective fixing by providing complete understanding of what is happening, why it's happening, and supporting evidence.",
6
- "clarificationPrompts": [
7
- "What type of system is this? (web app, mobile app, backend service, desktop app, etc.)",
8
- "How consistently can you reproduce this bug? (always reproducible, sometimes reproducible, rarely reproducible)",
9
- "What was the last known working version or state if applicable?",
10
- "Are there any time constraints or urgency factors for this investigation?",
11
- "What level of system access do you have? (full codebase, limited access, production logs only)",
12
- "What existing documentation is available? (README files, architecture docs, API docs, design documents, runbooks)",
13
- "Do you have access to existing logs? (production logs, error logs, debug logs, metrics, traces)",
14
- "Do you have preferences for handling large log volumes? (sub-chat analysis, inline summaries only, or no preference for automatic decision)"
15
- ],
16
- "preconditions": [
17
- "User has identified a specific bug or failing test to investigate",
18
- "Agent has access to codebase analysis tools (grep, file readers, etc.)",
19
- "Agent has access to build/test execution tools for the project type",
20
- "User can provide error messages, stack traces, or test failure output",
21
- "Bug is reproducible with specific steps or a minimal test case"
22
- ],
23
- "metaGuidance": [
24
- "**CRITICAL WORKFLOW DISCIPLINE:**",
25
- "HIGH CONFIDENCE ≠ INVESTIGATION COMPLETE: Achieving 8-10/10 confidence in a hypothesis is excellent progress but does NOT mean the workflow is done.",
26
- "COMPLETE ALL PHASES: You MUST complete ALL phases (0 through 6) regardless of confidence level. Each phase builds critical evidence and documentation.",
27
- "WORKFLOW COMPLETION FLAG: Only set isWorkflowComplete=true when you complete Phase 6 (Comprehensive Diagnostic Writeup) AND produce the full deliverable.",
28
- "DO NOT SKIP PHASES: Even with high confidence, you must complete instrumentation (Phase 3), evidence collection (Phase 4), analysis (Phase 5), and writeup (Phase 6).",
29
- "PHASE PROGRESSION: An investigation that stops at hypothesis formation (Phase 2) or evidence collection (Phase 4) is INCOMPLETE - the diagnostic writeup is the required deliverable.",
30
- "**FUNCTION DEFINITIONS:**",
31
- "fun instrumentCode(location, hypothesis) = 'Add debug logs at {location} for {hypothesis}. Format: ClassName.method [{hypothesis}]: message. Include timestamp, thread ID if concurrent.'",
32
- "fun collectEvidence(hypothesis) = 'Run instrumented code, collect logs, analyze results. Score evidence quality 1-10. Document in Evidence/{hypothesis}.md.'",
33
- "fun updateHypothesisLog(id, status, evidence) = 'Update INVESTIGATION_CONTEXT.md section {id} with {status} and {evidence}. Include confidence score.'",
34
- "fun analyzeTests(component) = 'Find all tests for {component} using grep_search. Check coverage, recent changes, what they validate vs miss. Run with --debug flag.'",
35
- "fun recursiveAnalysis(component, depth=3) = 'Analyze {component} to {depth} levels. L1: implementation, L2: direct deps, L3: transitive deps. Document each level.'",
36
- "fun controlledModification(type, location) = 'Make {type} change at {location}. Types: guard (add logging), assert (add assertion), fix (minimal fix), break (controlled failure). Commit: DEBUG: {type} at {location}'",
37
- "fun checkHypothesisInTests(hypothesis) = 'Search existing tests for evidence. Direct: tests of suspected components. Indirect: tests that would fail if true. Document in TestEvidence/{hypothesis}.md'",
38
- "fun aggregateDebugLogs(pattern, timeWindow=100) = 'Deduplicate logs matching {pattern}. Output: {pattern} x{count} in {timeWindow}ms, variations: {unique_values}'",
39
- "fun createInvestigationBranch() = 'git checkout -b investigate/{bug-id}-{timestamp}. If git unavailable, create Investigation/{timestamp}/ directory for artifacts.'",
40
- "fun trackInvestigation(phase, status) = 'Update INVESTIGATION_CONTEXT.md progress: ✅ {completed}, 🔄 {phase}, ⏳ Remaining: {list}, 📊 Confidence: {score}/10'",
41
- "fun updateInvestigationContext(section, content) = 'Update INVESTIGATION_CONTEXT.md {section} with {content}. Include timestamp. If section doesn\\'t exist, create it. Preserve all other sections.'",
42
- "fun findSimilarBugs() = 'Search for: 1) Similar error patterns in codebase, 2) Previous fixes in git history, 3) Related test cases. Document in SimilarPatterns.md'",
43
- "fun visualProgress() = 'Show: ✅ Phase 0 | ✅ Phase 1 | 🔄 Phase 2 | ⏳ Phase 3-5 | ⏳ Phase 6 | 📊 35% Complete. Include time spent per phase.'",
44
- "fun applyDebugPreferences() = 'Apply user debugging preferences from userDebugPreferences context variable. Adapt logging verbosity, tool selection, output format.'",
45
- "fun addResumptionJson(phase) = 'Update INVESTIGATION_CONTEXT.md resumption section with: workflowId, completedSteps up to {phase}, all context variables. Include workflow_get and workflow_next instructions.'",
46
- "**USAGE:** When you see function calls like instrumentCode() or analyzeTests(), execute the full instructions defined above.",
47
- "INVESTIGATION DISCIPLINE: Never propose fixes or solutions until Phase 6 (Comprehensive Diagnostic Writeup). Focus entirely on systematic evidence gathering and analysis.",
48
- "HYPOTHESIS RIGOR: All hypotheses must be based on concrete evidence from code analysis with quantified scoring (1-10 scales). Maximum 5 hypotheses per investigation.",
49
- "DEBUGGING INSTRUMENTATION: Always implement debugging mechanisms before running tests - logs, print statements, or test modifications that will provide evidence.",
50
- "EVIDENCE THRESHOLD: Require minimum 3 independent sources of evidence before confirming any hypothesis. Use objective verification criteria.",
51
- "SYSTEMATIC PROGRESSION: Complete each investigation phase fully before proceeding. Each phase builds critical context for the next with structured documentation.",
52
- "CONFIDENCE CALIBRATION: Use mathematical confidence framework with 9.0/10 minimum threshold. Actively challenge conclusions with adversarial analysis.",
53
- "UNCERTAINTY ACKNOWLEDGMENT: Explicitly document all remaining unknowns and their potential impact. No subjective confidence assessments.",
54
- "THOROUGHNESS: For complex bugs, recursively analyze dependencies and internals of identified components to ensure full picture.",
55
- "TEST INTEGRATION: Leverage existing tests to validate hypotheses where possible.",
56
- "**LOGGING STANDARDS:**",
57
- "LOG FORMAT: Always use 'ClassName.methodName [hypothesisId] {timestamp}: message'. For concurrent code, add thread/worker ID.",
58
- "LOG DEDUPLICATION: Implement in debug code: if (lastMsg === currentMsg) { count++; if (count % 10 === 0) log(`${msg} x${count}`); } else { if (count > 1) log(`Previous: x${count}`); log(currentMsg); count = 1; }",
59
- "LOG AGGREGATION: For high-frequency events, create summaries: 'Event X occurred 847 times between 10:23:45-10:23:47, unique values: [val1: 623, val2: 224]'",
60
- "LOG WINDOWS: Group related logs within 50-100ms. Mark groups with '=== Operation: XYZ Start ===' and '=== Operation: XYZ End (duration: 73ms) ==='",
61
- "LOG CONTEXT: Include hypothesis ID in all debug logs. Use prefixes like 'H1_DEBUG:', 'H2_TRACE:', 'H3_ERROR:'",
62
- "LOG ANALYSIS OFFLOADING: For voluminous logs (>500 lines), offload analysis to sub-chats with structured prompts. See Phase 4 for detailed sub-analysis implementation.",
63
- "RECURSION DEPTH: Limit recursive analysis to 3 levels deep to prevent analysis paralysis while ensuring thoroughness.",
64
- "INVESTIGATION BOUNDS: If investigation exceeds 20 steps or 4 hours without root cause, pause and reassess approach with user.",
65
- "AUTOMATION LEVELS: High=auto-approve >8.0 confidence decisions, Medium=standard confirmations, Low=extra confirmations for safety. Control workflow autonomy based on user preference.",
66
- "CONTEXT DOCUMENTATION: Maintain INVESTIGATION_CONTEXT.md throughout. Update after major milestones, failures, or user interventions to enable seamless handoffs between sessions. Include explicit resumption instructions using workflow_get and workflow_next.",
67
- "GIT FALLBACK STRATEGY: If git unavailable, gracefully skip commits/branches, log changes manually in CONTEXT.md with timestamps, warn user, document modifications for manual control.",
68
- "GIT ERROR HANDLING: Use run_terminal_cmd for git operations; if fails, output exact command for user manual execution. Never halt investigation due to git unavailability.",
69
- "TOOL AVAILABILITY AWARENESS: Check debugging tool availability before investigation design. Have fallbacks for when primary tools unavailable (grep→file_search, etc).",
70
- "SECURITY PROTOCOLS: Sanitize sensitive data in logs/reproduction steps. Be mindful of exposing credentials, PII, or system internals during evidence collection phases.",
71
- "DYNAMIC RE-TRIAGE: Allow complexity upgrades during investigation if evidence reveals deeper issues. Safe downgrades only with explicit user confirmation after evidence review.",
72
- "DEVIL'S ADVOCATE REVIEW: Actively challenge primary hypothesis with available evidence. Seek alternative explanations and rate alternative likelihood before final confidence assessment.",
73
- "COLLABORATIVE HANDOFFS: Structure documentation for peer review and team coordination. Include methodology, reasoning, and complete evidence chain for knowledge transfer.",
74
- "FAILURE BOUNDS: Track investigation progress. If >20 steps or >4 hours without breakthrough, pause for user guidance. Document dead ends to prevent redundant work in future sessions.",
75
- "COGNITIVE BREAKS: After 10 investigation steps, pause and summarize progress to reset perspective.",
76
- "RUBBER DUCK: Verbalize hypotheses in sub-prompts to externalize reasoning and catch logical gaps.",
77
- "COLLABORATION READY: Document clearly for handoffs when stuck beyond iteration limits."
78
- ],
79
- "steps": [
80
- {
81
- "id": "phase-0-triage",
82
- "title": "Phase 0: Initial Triage & Context Gathering",
83
- "prompt": "**SYSTEMATIC INVESTIGATION BEGINS** - Your mission is to achieve near 100% certainty about this bug's root cause through systematic evidence gathering. NO FIXES will be proposed until Phase 6.\n\n**STEP 1: Bug Report Analysis**\nPlease provide the complete bug context:\n- **Bug Description**: What is the observed behavior vs expected behavior?\n- **Error Messages/Stack Traces**: Paste the complete error output\n- **Reproduction Steps**: How can this bug be consistently reproduced?\n- **Environment Details**: OS, language version, framework version, etc.\n- **Recent Changes**: Any recent commits, deployments, or configuration changes?\n\n**STEP 2: Project Type Classification**\nBased on the information provided, I will classify the project type and set debugging strategies:\n- **Languages/Frameworks**: Primary tech stack\n- **Build System**: Maven, Gradle, npm, etc.\n- **Testing Framework**: JUnit, Jest, pytest, etc.\n- **Logging System**: Available logging mechanisms\n- **Architecture**: Monolithic, microservices, distributed, serverless, etc.\n\n**STEP 3: Complexity Assessment**\nI will analyze the bug complexity using these criteria:\n- **Simple**: Single function/method, clear error path, minimal dependencies\n- **Standard**: Multiple components, moderate investigation required\n- **Complex**: Cross-system issues, race conditions, complex state management\n\n**STEP 4: Automation Level Selection**\nAsk the user: \"What automation level would you prefer for this investigation?\"\n- **High**: Auto-approve decisions with confidence >8.0, minimal confirmations\n- **Medium**: Standard confirmations for key decisions\n- **Low**: Extra confirmations for safety, manual approval for all changes\n\n**OUTPUTS**: Set context variables:\n- `projectType`, `bugComplexity`, `debuggingMechanism`\n- `isDistributed` (true if architecture involves microservices/distributed systems)\n- `automationLevel` (High/Medium/Low based on user preference)",
84
- "agentRole": "You are a senior debugging specialist and bug triage expert with 15+ years of experience across multiple technology stacks. Your expertise lies in quickly classifying bugs, understanding project architectures, and determining appropriate investigation strategies. You excel at extracting critical information from bug reports and setting up systematic investigation approaches.",
85
- "guidance": [
86
- "CLASSIFICATION ACCURACY: Proper complexity assessment determines investigation depth - be thorough but decisive",
87
- "CONTEXT CAPTURE: Gather complete environmental and situational context now to avoid gaps later",
88
- "DEBUGGING STRATEGY: Choose debugging mechanisms appropriate for the project type and bug complexity",
89
- "NO ASSUMPTIONS: If critical information is missing, explicitly request it before proceeding"
90
- ]
91
- },
92
- {
93
- "id": "phase-0a-assumption-check",
94
- "title": "Phase 0a: Assumption Verification Checkpoint",
95
- "prompt": "**ASSUMPTION CHECK** - Before proceeding, verify key assumptions to prevent bias.\n\n**VERIFY**:\n1. **Data State**: Confirm variable types and null handling\n2. **API/Library**: Check documentation for actual vs assumed behavior\n3. **Environment**: Verify bug exists in clean environment\n4. **Recent Changes**: Review last 5 commits for relevance\n\n**OUTPUT**: List verified assumptions with evidence sources.",
96
- "agentRole": "You are a skeptical analyst who challenges every assumption. Question everything that hasn't been explicitly verified.",
97
- "guidance": [
98
- "Use analysis tools to verify, don't assume",
99
- "Document each assumption with its verification method",
100
- "Flag any unverifiable assumptions for tracking",
101
- "CHECK API DOCS: Never assume function behavior from names - verify actual documentation",
102
- "VERIFY DATA TYPES: Use debugger or logs to confirm actual runtime types and values",
103
- "TEST ENVIRONMENT: Reproduce in minimal environment to rule out configuration issues"
104
- ]
105
- },
106
- {
107
- "id": "phase-0b-user-preferences",
108
- "title": "Phase 0b: Identify User Debugging Preferences",
109
- "prompt": "**USER DEBUGGING PREFERENCES** - Identify and document user-specific debugging preferences.\n\n**CHECK FOR PREFERENCES IN:**\n1. **User Settings/Memory**: Any stored debugging preferences\n2. **Project Documentation**: Team debugging standards\n3. **Previous Instructions**: Past user guidance on debugging approach\n\n**CATEGORIZE PREFERENCES:**\n- **Debugging Tools**: Preference for debugger vs logs vs traces\n- **Log Verbosity**: Detailed vs concise output\n- **Output Format**: Structured logs vs human-readable\n- **Testing Approach**: Unit tests vs integration tests focus\n- **Commit Style**: Conventional commits vs descriptive\n- **Documentation**: Inline comments vs separate docs\n- **Error Handling**: Fail fast vs defensive programming\n\n**IF NO EXPLICIT PREFERENCES:**\nAsk user:\n- \"Do you prefer verbose logging or concise summaries?\"\n- \"Should I use interactive debuggers or rely on log analysis?\"\n- \"Any specific tools or approaches your team prefers?\"\n\n**OUTPUT**: Set `userDebugPreferences` context variable with categorized preferences.\n\n**APPLY**: Use applyDebugPreferences() throughout investigation to adapt approach.",
110
- "agentRole": "You are a debugging preferences specialist who understands how different teams and developers approach problem-solving. You excel at identifying and applying user-specific debugging styles.",
111
- "guidance": [
112
- "This step ensures the investigation aligns with user/team practices",
113
- "Capture both explicit and implicit preferences",
114
- "Default to standard practices if no preferences found",
115
- "These preferences will be applied throughout the workflow"
116
- ],
117
- "requireConfirmation": false
118
- },
119
- {
120
- "id": "phase-0c-tool-check",
121
- "title": "Phase 0c: Tool Availability Verification",
122
- "prompt": "**TOOL AVAILABILITY CHECK** - Verify required debugging tools before investigation.\n\n**CORE TOOLS CHECK:**\n1. **Analysis Tools**:\n - grep_search: Text pattern searching\n - read_file: File content reading\n - codebase_search: Semantic code search\n - Test availability, note any failures\n\n2. **Git Operations**:\n - Check git availability: `git --version`\n - If unavailable, set `gitAvailable = false`\n - Plan fallback: manual change tracking\n\n3. **Build/Test Tools** (based on projectType):\n - npm/yarn for JavaScript\n - Maven/Gradle for Java\n - pytest/unittest for Python\n - Document which are available\n\n4. **Debugging Tools**:\n - Language-specific debuggers\n - Profilers if needed\n - Log aggregation tools\n\n**FALLBACK STRATEGIES:**\n- grep_search fails → use file_search\n- codebase_search fails → use grep_search with context\n- Git unavailable → track changes in INVESTIGATION_CONTEXT.md\n- Build tools missing → focus on static analysis\n\n**OUTPUT**:\n- Set `availableTools` context variable\n- Set `toolLimitations` with any restrictions\n- Document fallback strategies in context\n\n**ADAPTATION**: Adjust investigation approach based on available tools.",
123
- "agentRole": "You are a tool availability specialist ensuring the investigation can proceed smoothly with available resources. You excel at creating fallback strategies.",
124
- "guidance": [
125
- "Test each tool category systematically",
126
- "Don't fail if some tools are unavailable - adapt",
127
- "Document limitations clearly for user awareness",
128
- "Prefer degraded functionality over investigation failure"
129
- ],
130
- "requireConfirmation": false
131
- },
132
- {
133
- "id": "phase-0d-create-context",
134
- "title": "Phase 0d: Initialize Investigation Context",
135
- "prompt": "**CREATE INVESTIGATION CONTEXT** - Initialize comprehensive tracking document.\n\nUse createInvestigationBranch() to set up version control, then create INVESTIGATION_CONTEXT.md:\n\n```markdown\n# Investigation Context\n\n## 1. Bug Summary\n- **ID**: {{bugId || 'investigation-' + Date.now()}}\n- **Description**: [from bug report]\n- **Complexity**: {{bugComplexity}}\n- **Started**: {{new Date().toISOString()}}\n- **Status**: Phase 0d - Context Initialization\n- **Automation Level**: {{automationLevel}}\n\n## 2. Progress Tracking\n{{visualProgress()}}\n✅ Completed: Phase 0 (Triage), Phase 0a (Assumptions), Phase 0b (User Preferences), Phase 0c (Tools)\n🔄 Current: Phase 0d (Context Creation)\n⏳ Remaining: Phase 1 (Analysis), Phase 2 (Hypotheses), Phase 3-5 (Validation), Phase 6 (Writeup)\n📊 Confidence: 0/10\n\n## 3. Environment & Setup\n- **Project Type**: {{projectType}}\n- **Debugging Mechanism**: {{debuggingMechanism}}\n- **Architecture**: {{isDistributed ? 'Distributed' : 'Monolithic'}}\n- **User Preferences**: {{userDebugPreferences}}\n- **Available Tools**: {{availableTools}}\n- **Tool Limitations**: {{toolLimitations || 'None'}}\n\n## 4. Analysis Findings\n*To be populated during Phase 1*\n\n## 5. Hypothesis Registry\n*To be populated during Phase 2*\n\n## 6. Evidence Log\n*To be populated during validation*\n\n## 7. Experiment Results\n*To be populated if experiments conducted*\n\n## 8. Dead Ends & Lessons\n*Track approaches that didn't work*\n\n## 9. Function Definitions\n[Include all function definitions from metaGuidance for reference]\n\n## 10. Resumption Instructions\n\n### How to Resume This Investigation\n\n1. **Get the workflow**: Call `workflow_get` with:\n - id: \"systematic-bug-investigation-with-loops\"\n - mode: \"preview\" (to see next step)\n\n2. **Resume from saved state**: Call `workflow_next` with the JSON below:\n\n```json\n{\n \"workflowId\": \"systematic-bug-investigation-with-loops\",\n \"completedSteps\": [\"phase-0-triage\", \"phase-0a-assumption-check\", \"phase-0b-user-preferences\", \"phase-0c-tool-check\", \"phase-0d-create-context\"],\n \"context\": {\n \"bugComplexity\": \"{{bugComplexity}}\",\n \"projectType\": \"{{projectType}}\",\n \"debuggingMechanism\": \"{{debuggingMechanism}}\",\n \"isDistributed\": {{isDistributed || false}},\n \"automationLevel\": \"{{automationLevel}}\",\n \"userDebugPreferences\": {{JSON.stringify(userDebugPreferences)}},\n \"availableTools\": {{JSON.stringify(availableTools)}},\n \"toolLimitations\": {{JSON.stringify(toolLimitations)}}\n }\n}\n```\n\n3. **Continue investigation**: The workflow will pick up from where it left off\n\n### Important Notes\n- Update `completedSteps` array after completing each phase\n- Preserve all context variables for proper state restoration\n- This JSON should be updated after major milestones\n```\n\n**Set `contextInitialized` = true**",
136
- "agentRole": "You are creating the central documentation hub for this investigation. This document will track all progress, findings, and enable seamless handoffs.",
137
- "guidance": [
138
- "Create a comprehensive but scannable document",
139
- "Include all context variables discovered so far",
140
- "Set up structure for future updates",
141
- "Include function definitions for reference",
142
- "Update the resumption JSON after each major phase using addResumptionJson()",
143
- "Always include the workflow_get and workflow_next instructions for proper resumption"
144
- ],
145
- "requireConfirmation": false
146
- },
147
- {
148
- "id": "phase-1-iterative-analysis",
149
- "type": "loop",
150
- "title": "Phase 1: Multi-Dimensional Codebase Analysis",
151
- "loop": {
152
- "type": "for",
153
- "count": 4,
154
- "maxIterations": 4,
155
- "iterationVar": "analysisPhase"
156
- },
157
- "body": [
158
- {
159
- "id": "analysis-breadth-scan",
160
- "title": "Analysis 1/4: Breadth Scan",
161
- "prompt": "**BREADTH SCAN - Cast Wide Net**\n\nGoal: Understand full system impact and identify all potentially involved components.\n\nPerform: Error propagation mapping, Component discovery, Data flow mapping, Recent changes analysis, and Historical pattern search.\n\n**Output**: Complete BreadthAnalysis.md with component interaction map, data flow diagram, suspicious areas ranked by likelihood, and list of all potentially related files and functions.",
162
- "agentRole": "You are performing systematic analysis phase 1 of 4. Your focus is casting a wide net to find all potentially related components.",
163
- "guidance": [
164
- "This is analysis phase 1 of 4 total phases",
165
- "Phase 1 = Breadth Scan - Cast wide net for all related components",
166
- "Create BreadthAnalysis.md with structured findings",
167
- "ERROR PROPAGATION MAPPING: Use grep_search for all error occurrences, trace error messages across all log files, map all stack traces to identify call chains, document every point where error appears or is handled",
168
- "COMPONENT DISCOVERY: Find ALL components that interact with failing area, use codebase_search \"How is [failing component] used?\", identify all callers and callees, build component interaction map, note both direct and indirect relationships",
169
- "DATA FLOW MAPPING: Trace data that flows through bug area, identify all transformations applied to data, find all persistence points (database, cache, files), document complete data journey, note where data could be corrupted or lost",
170
- "RECENT CHANGES ANALYSIS: Git history for all identified components, check last 10 commits affecting these areas, identify when bug likely appeared, look for related PRs or issues, note any configuration or dependency changes",
171
- "HISTORICAL PATTERN SEARCH: Use findSimilarBugs() to search for similar error patterns in codebase, previous fixes to related components, related test failures in history",
172
- "Use findSimilarBugs() to search for historical patterns",
173
- "Use the function definitions for standardized operations",
174
- "Update INVESTIGATION_CONTEXT.md after completion",
175
- "Be thorough - it's better to include too much than miss something critical",
176
- "Document your reasoning for why each component is potentially involved"
177
- ],
178
- "runCondition": {"var": "analysisPhase", "equals": 1},
179
- "requireConfirmation": false
180
- },
181
- {
182
- "id": "analysis-deep-dive",
183
- "title": "Analysis 2/4: Component Deep Dive",
184
- "prompt": "**COMPONENT DEEP DIVE - Understand Internals**\n\nGoal: Deep understanding of top 5 suspicious components from breadth scan.\n\nFor each component, use recursiveAnalysis(component, 3) to perform 3-level analysis: Direct Implementation (Level 1), Direct Dependencies (Level 2), and Integration Points (Level 3). Document likelihood scores, suspicious code sections, failure modes, and red flags.\n\n**Output**: ComponentAnalysis.md with deep insights for top 5 components, ranked list of most likely root cause locations, detailed notes on internals, and dependency graph showing relationships.",
185
- "agentRole": "You are performing systematic analysis phase 2 of 4. Your focus is deep diving into the most suspicious components to understand their internals.",
186
- "guidance": [
187
- "This is analysis phase 2 of 4 total phases",
188
- "Phase 2 = Deep Dive - Analyze suspicious components 3 levels deep",
189
- "Build on findings from Phase 1 Breadth Scan",
190
- "Create ComponentAnalysis.md with structured findings",
191
- "Use recursiveAnalysis() for systematic exploration",
192
- "LEVEL 1 - DIRECT IMPLEMENTATION: Read COMPLETE file including private methods, understand state management and data structures, analyze error handling patterns, check initialization and cleanup logic, document all public/private APIs, identify assumptions or invariants, note TODO/FIXME comments",
193
- "LEVEL 2 - DIRECT DEPENDENCIES: Follow all imports and their usage, understand dependency contracts and interfaces, check version compatibility and breaking changes, analyze coupling points and data exchange, look for shared mutable state, identify circular dependencies, document how failures could propagate",
194
- "LEVEL 3 - INTEGRATION POINTS: How component fits in larger system architecture, side effects and external calls (DB, API, file system), concurrency and threading concerns, resource management (memory, connections, handles), caching and state synchronization, event handling and callbacks, configuration and environment dependencies",
195
- "FOR EACH COMPONENT DOCUMENT: Likelihood score (1-10) of being root cause, specific suspicious code sections with line numbers, potential failure modes and their symptoms, dependencies that could be sources of issues, red flags (complex logic, error handling gaps, race conditions)",
196
- "Update INVESTIGATION_CONTEXT.md after completion",
197
- "Go deep - read entire files, not just the obvious parts",
198
- "Look for subtle issues like race conditions, edge cases, and assumptions"
199
- ],
200
- "runCondition": {"var": "analysisPhase", "equals": 2},
201
- "requireConfirmation": false
202
- },
203
- {
204
- "id": "analysis-dependencies",
205
- "title": "Analysis 3/4: Dependencies & Flow",
206
- "prompt": "**DEPENDENCY & FLOW ANALYSIS - Trace Connections**\n\nGoal: Understand how components interact and data flows between them.\n\nPerform: Static dependency graph analysis, Runtime flow analysis, Data transformation pipeline tracing, and Integration analysis.\n\n**Output**: FlowAnalysis.md with sequence diagrams showing execution flow, data flow maps with transformation points, complete dependency graph, list of all integration points and failure modes, and timeline showing order of operations.",
207
- "agentRole": "You are performing systematic analysis phase 3 of 4. Your focus is tracing how components connect and data flows between them.",
208
- "guidance": [
209
- "This is analysis phase 3 of 4 total phases",
210
- "Phase 3 = Dependencies - Trace connections and data flows",
211
- "Build on component understanding from Phase 2",
212
- "Create FlowAnalysis.md with diagrams and flow charts",
213
- "STATIC DEPENDENCY GRAPH: Build complete import/dependency tree, identify circular dependencies, find hidden dependencies (reflection, dynamic loading, DI), map version constraints and compatibility, document shared libraries and utilities, note tight coupling or fragile dependencies",
214
- "RUNTIME FLOW ANALYSIS: Trace execution paths to bug, identify async/concurrent flows and coordination, map state changes through execution, document control flow (conditionals, loops, exceptions), track callback chains and event handlers, identify divergence points, note timing dependencies and race conditions",
215
- "DATA TRANSFORMATION PIPELINE: Track data from input to error point, document each transformation with input/output types, identify validation points and what they check, find where data could be corrupted/lost, note serialization/deserialization boundaries, track data format conversions, document enrichment/filtering steps",
216
- "INTEGRATION ANALYSIS: External service calls and failure modes, database interactions (reads/writes/transactions), message queue operations and formats, file system operations and error handling, network calls and timeout handling, cache usage and invalidation, third-party library calls",
217
- "Focus on runtime behavior and integration points",
218
- "Update INVESTIGATION_CONTEXT.md after completion",
219
- "Pay special attention to async boundaries and error propagation",
220
- "Look for implicit dependencies that aren't obvious from imports"
221
- ],
222
- "runCondition": {"var": "analysisPhase", "equals": 3},
223
- "requireConfirmation": false
224
- },
225
- {
226
- "id": "analysis-test-coverage",
227
- "title": "Analysis 4/4: Test Coverage",
228
- "prompt": "**TEST COVERAGE ANALYSIS - Leverage Existing Knowledge**\n\nGoal: Use existing tests as source of truth about system behavior.\n\nFor each suspicious component, use analyzeTests(component) to perform: Direct test coverage analysis, Integration test analysis, Test history investigation, Test execution with debugging, and Coverage gap analysis.\n\n**Output**: TestAnalysis.md with coverage gaps matrix, suspicious test patterns, test evidence for hypotheses, recommendations for tests to add, and complete test inventory for affected components.",
229
- "agentRole": "You are performing systematic analysis phase 4 of 4. Your focus is leveraging existing tests to understand expected behavior and find coverage gaps.",
230
- "guidance": [
231
- "This is analysis phase 4 of 4 total phases",
232
- "Phase 4 = Tests - Analyze test coverage and quality",
233
- "Build on all previous analysis phases",
234
- "Create TestAnalysis.md with coverage gap matrix",
235
- "DIRECT TEST COVERAGE: Find all tests using grep/test discovery, analyze what's tested (happy/edge/error cases), identify what's NOT tested, check test quality and assertion strength, note mocking/stubbing that might hide issues, review test names and docs",
236
- "INTEGRATION TEST ANALYSIS: Find end-to-end tests for bug area, analyze assumptions/preconditions, check for flaky tests, review disabled/skipped tests and why, look for TODO/incomplete tests, identify multi-component tests, verify if tests cover failing scenario",
237
- "TEST HISTORY: When were tests added/modified? Do test changes correlate with bug appearance? Were tests removed/disabled recently? Use git blame for authors and context, look for related PRs/issues, review test evolution",
238
- "TEST EXECUTION WITH DEBUGGING: Run tests with debug flags (--verbose, --debug), add instrumentation to tests themselves, compare expected vs actual in detail, run in isolation and in suite, try different orderings to check dependencies, monitor resource usage",
239
- "COVERAGE GAP ANALYSIS: Use coverage tools for untested code paths, map coverage to bug components, identify branches/conditions never exercised, note error handling without tests, document missing edge cases, recommend tests to add",
240
- "Run tests with debug flags for additional insights",
241
- "After completion, use trackInvestigation('Phase 1 Complete', 'Moving to Hypothesis Development')",
242
- "Tests often reveal the 'expected' behavior - compare with actual behavior",
243
- "Missing tests often indicate areas where bugs hide"
244
- ],
245
- "runCondition": {"var": "analysisPhase", "equals": 4},
246
- "requireConfirmation": false
247
- }
248
- ],
249
- "requireConfirmation": false
250
- },
251
- {
252
- "id": "phase-1a-binary-search",
253
- "title": "Phase 1a: Binary Search Isolation",
254
- "runCondition": {
255
- "or": [
256
- {"var": "bugType", "equals": "regression"},
257
- {"var": "searchSpace", "equals": "large"}
258
- ]
259
- },
260
- "prompt": "**BINARY SEARCH** - Apply divide-and-conquer:\n\n1. Identify GOOD state (working) and BAD state (broken)\n2. Find midpoint in history/code/data\n3. Test midpoint state\n4. Narrow to relevant half\n5. Document reduced search space\n\n**OUTPUT**: Narrowed location with evidence.",
261
- "agentRole": "You are a systematic investigator using algorithmic search to efficiently isolate issues.",
262
- "guidance": [
263
- "VERSION CONTROL: Use 'git bisect' or equivalent for commit history searches",
264
- "DATA PIPELINE: Test data at pipeline midpoints to isolate transformation issues",
265
- "TIME WINDOWS: For time-based issues, binary search through timestamps",
266
- "DOCUMENT BOUNDARIES: Clearly record each tested boundary and result",
267
- "EFFICIENCY: Each test should eliminate ~50% of remaining search space"
268
- ]
269
- },
270
- {
271
- "id": "phase-1b-test-reduction",
272
- "title": "Phase 1b: Test Case Minimization",
273
- "runCondition": {
274
- "var": "bugSource",
275
- "equals": "failing_test"
276
- },
277
- "prompt": "**TEST REDUCTION** - Simplify failing test:\n\n1. Inline called methods into test\n2. Add earlier assertion to fail sooner\n3. Remove code after new failure point\n4. Repeat until minimal\n\n**OUTPUT**: Minimal failing test case.",
278
- "agentRole": "You are a surgical debugger who strips away layers to reveal core issues.",
279
- "guidance": [
280
- "PRESERVE FAILURE: Each reduction must maintain the original failure mode",
281
- "INLINE AGGRESSIVELY: Replace method calls with their actual implementation",
282
- "FAIL EARLY: Move assertions up to find earliest deviation from expected state",
283
- "REMOVE RUTHLESSLY: Delete all code that doesn't contribute to the failure",
284
- "CLARITY GOAL: Final test should make the bug obvious to any reader"
285
- ]
286
- },
287
- {
288
- "id": "phase-2a-hypothesis-development",
289
- "title": "Phase 2a: Hypothesis Development & Prioritization",
290
- "prompt": "**HYPOTHESIS GENERATION** - Based on codebase analysis, formulate testable hypotheses about the bug's root cause.\n\n**STEP 1: Evidence-Based Hypothesis Development**\nCreate maximum 5 prioritized hypotheses. Each includes:\n- **Root Cause Theory**: Specific technical explanation\n- **Supporting Evidence**: Code patterns/logic flows supporting this theory\n- **Failure Mechanism**: Exact sequence leading to observed bug\n- **Testability Score**: Quantified assessment (1-10) of validation ease\n- **Evidence Strength Score**: Quantified assessment (1-10) based on code findings\n\n**STEP 2: Hypothesis Prioritization Matrix**\nRank hypotheses using weighted scoring:\n- **Evidence Strength** (40%): Code analysis support for theory\n- **Testability** (35%): Validation ease with debugging instruments\n- **Impact Scope** (25%): How well this explains all symptoms\n\n**STEP 3: Pattern Integration**\nIncorporate findings from findSimilarBugs():\n- **Historical Patterns**: Similar bugs fixed previously\n- **Known Issues**: Related problems in the codebase\n- **Test Failures**: Similar test failure patterns\n- Adjust hypothesis confidence based on pattern matches\n\n**CRITICAL RULE**: All hypotheses must be based on concrete evidence from code analysis.\n\n**OUTPUTS**: Maximum 5 hypotheses with quantified scoring, ranked by priority.\n\n**⚠️ INVESTIGATION NOT COMPLETE**: Developing hypotheses with high evidence scores is excellent progress, but represents only ~35% of the investigation. Even if you have a hypothesis with 9-10/10 evidence strength:\n\n- You are NOT done with the investigation\n- You MUST continue to Phase 2b-2h to refine and validate hypotheses\n- You MUST continue to Phase 3 to implement instrumentation\n- You MUST continue to Phase 4-5 to collect and analyze evidence\n- You MUST continue to Phase 6 to produce the comprehensive diagnostic writeup\n\n**DO NOT set isWorkflowComplete=true at this stage.** The workflow requires completing all phases.",
291
- "agentRole": "You are a senior software detective and root cause analysis expert with deep expertise in systematic hypothesis formation. Your strength lies in connecting code evidence to potential failure mechanisms and creating testable theories. You excel at logical reasoning and evidence-based deduction. You must maintain rigorous quantitative standards and reject any hypothesis not grounded in concrete code evidence.",
292
- "guidance": [
293
- "EVIDENCE-BASED ONLY: Every hypothesis must be grounded in concrete code analysis findings with quantified evidence scores",
294
- "HYPOTHESIS LIMITS: Generate maximum 5 hypotheses to prevent analysis paralysis",
295
- "QUANTIFIED SCORING: Use 1-10 scales for evidence strength and testability with clear criteria"
296
- ],
297
- "validationCriteria": [
298
- {
299
- "type": "contains",
300
- "value": "Evidence Strength Score",
301
- "message": "Must include quantified evidence strength scoring (1-10) for each hypothesis"
302
- },
303
- {
304
- "type": "contains",
305
- "value": "Testability Score",
306
- "message": "Must include quantified testability scoring (1-10) for each hypothesis"
307
- }
308
- ],
309
- "hasValidation": true
310
- },
311
- {
312
- "id": "phase-2b-hypothesis-validation-strategy",
313
- "title": "Phase 2b: Hypothesis Validation Strategy & Documentation",
314
- "prompt": "**HYPOTHESIS VALIDATION PLANNING** - For the top 3 hypotheses, create validation strategies and documentation.\n\n**STEP 1: Hypothesis Validation Strategy**\nFor top 3 hypotheses, define:\n- **Required Evidence**: Specific evidence to confirm/refute hypothesis\n- **Debugging Approach**: Instrumentation/tests providing evidence\n- **Success Criteria**: Results proving hypothesis correct\n- **Confidence Threshold**: Minimum evidence quality needed\n\n**STEP 2: Hypothesis Documentation**\nCreate structured registry:\n- **Hypothesis ID**: H1, H2, H3 for tracking\n- **Status**: Active, Refuted, Confirmed\n- **Evidence Log**: Supporting and contradicting evidence\n- **Validation Plan**: Specific testing approach\n\n**STEP 3: Coverage Check**\nEnsure hypotheses cover diverse categories (logic, state, dependencies) with deep analysis.\n\n**STEP 4: Update Investigation Context**\nUse updateInvestigationContext('Hypothesis Registry', formatted hypothesis table with all details)\n\n**OUTPUTS**: Top 3 hypotheses selected for validation with structured documentation and validation plans.",
315
- "agentRole": "You are a systematic testing strategist and documentation expert. Your strength lies in creating clear validation plans and maintaining rigorous documentation standards for hypothesis tracking and evidence collection.",
316
- "guidance": [
317
- "STRUCTURED DOCUMENTATION: Create formal hypothesis registry with tracking IDs and status",
318
- "VALIDATION RIGOR: Only proceed with top 3 hypotheses that meet minimum evidence thresholds",
319
- "COMPREHENSIVE PLANNING: Each hypothesis must have clear validation approach and success criteria"
320
- ],
321
- "validationCriteria": [
322
- {
323
- "type": "contains",
324
- "value": "Hypothesis ID",
325
- "message": "Must assign tracking IDs (H1, H2, H3) to each hypothesis"
326
- },
327
- {
328
- "type": "regex",
329
- "pattern": "H[1-3]",
330
- "message": "Must use proper hypothesis ID format (H1, H2, H3)"
331
- }
332
- ],
333
- "hasValidation": true
334
- },
335
- {
336
- "id": "phase-2c-hypothesis-assumptions",
337
- "title": "Phase 2c: Hypothesis Assumption Audit",
338
- "prompt": "**AUDIT** each hypothesis for hidden assumptions:\n\n**FOR EACH HYPOTHESIS**:\n- List implicit assumptions\n- Rate assumption confidence (1-10)\n- Identify verification approach\n\n**REJECT** hypotheses built on unverified assumptions.",
339
- "agentRole": "You are a rigorous scientist who rejects any hypothesis not grounded in verified facts.",
340
- "guidance": [
341
- "EXPLICIT LISTING: Write out every assumption, no matter how obvious it seems",
342
- "CONFIDENCE SCORING: Rate 1-10 based on evidence quality, not intuition",
343
- "VERIFICATION PLAN: For each assumption, specify how it can be tested",
344
- "REJECTION CRITERIA: Any assumption with confidence <7 requires verification",
345
- "DOCUMENT RATIONALE: Explain why each assumption is accepted or needs testing"
346
- ],
347
- "validationCriteria": [
348
- {
349
- "type": "contains",
350
- "value": "Assumption confidence",
351
- "message": "Must rate assumption confidence for each hypothesis"
352
- }
353
- ],
354
- "hasValidation": true
355
- },
356
- {
357
- "id": "phase-2d-prepare-validation",
358
- "title": "Phase 2d: Prepare Hypothesis Validation",
359
- "prompt": "**PREPARE VALIDATION ARRAY** - Extract the top 3 hypotheses for systematic validation.\n\n**Create `hypothesesToValidate` array with:**\n```json\n[\n {\n \"id\": \"H1\",\n \"description\": \"[Hypothesis description]\",\n \"evidenceStrength\": [score],\n \"testability\": [score],\n \"validationPlan\": \"[Specific testing approach]\"\n },\n // ... H2, H3\n]\n```\n\n**Set context variables:**\n- `hypothesesToValidate`: Array of top 3 hypotheses\n- `currentConfidence`: 0 (will be updated during validation)\n- `validationIterations`: 0 (tracks validation cycles)",
360
- "agentRole": "You are preparing the systematic validation process by structuring hypotheses for iteration.",
361
- "guidance": [
362
- "Extract only the top 3 hypotheses from Phase 2b",
363
- "Ensure each has complete validation information",
364
- "Initialize tracking variables for the validation loop"
365
- ],
366
- "requireConfirmation": false
367
- },
368
- {
369
- "id": "phase-2e-test-evidence-gathering",
370
- "title": "Phase 2e: Test-Based Hypothesis Evidence",
371
- "runCondition": {
372
- "var": "hypothesesToValidate",
373
- "not_equals": null
374
- },
375
- "prompt": "**TEST-DRIVEN HYPOTHESIS VALIDATION**\n\nFor each hypothesis in hypothesesToValidate, use checkHypothesisInTests(hypothesis):\n\n**1. Direct Test Evidence**:\n- Find tests that directly test suspected components\n- Analyze test names, descriptions, and assertions\n- Check if tests actually validate what we think\n\n**2. Indirect Test Evidence**:\n- Find tests that would fail if hypothesis is true\n- Look for integration tests touching the area\n- Check for tests that assume opposite behavior\n\n**3. Test Coverage Gaps**:\n- What aspects of hypothesis are NOT tested?\n- Where would a test have caught this bug?\n- What assumptions do tests make?\n\n**4. Test Execution Analysis**:\n- Run tests with debug instrumentation\n- Add temporary logging to tests\n- Compare test expectations vs reality\n\n**5. Historical Test Analysis**:\n- When were relevant tests last modified?\n- Were any tests disabled recently?\n- Do test changes correlate with bug appearance?\n\n**Create TestEvidence Matrix**:\n```\n| Hypothesis | Supporting Tests | Contradicting Tests | Coverage Gaps | Confidence Impact |\n|------------|------------------|---------------------|---------------|-------------------|\n| H1 | TestA, TestB | TestC (partially) | Edge case X | +2 confidence |\n```\n\n**Update each hypothesis** with test evidence findings.",
376
- "agentRole": "You are a test analysis specialist validating hypotheses against the existing test suite. Your goal is to use tests as objective evidence for or against each hypothesis.",
377
- "guidance": [
378
- "Tests are the codified understanding of system behavior",
379
- "A hypothesis contradicted by passing tests needs reconsideration",
380
- "Missing test coverage often indicates where bugs hide",
381
- "Update hypothesis confidence based on test evidence"
382
- ],
383
- "requireConfirmation": false
384
- },
385
- {
386
- "id": "phase-2f-hypothesis-verification",
387
- "type": "loop",
388
- "title": "Phase 2f: Hypothesis Verification & Refinement",
389
- "runCondition": {
390
- "var": "hypothesesToValidate",
391
- "not_equals": null
392
- },
393
- "loop": {
394
- "type": "forEach",
395
- "items": "hypothesesToValidate",
396
- "itemVar": "hypothesis",
397
- "indexVar": "hypothesisIndex",
398
- "maxIterations": 10
399
- },
400
- "body": [
401
- {
402
- "id": "verify-against-code",
403
- "title": "Deep Code Verification for {{hypothesis.id}}",
404
- "prompt": "**DEEP VERIFICATION for {{hypothesis.id}}**\n\n**Goal**: Verify hypothesis assumptions through deep code analysis.\n\nUse recursiveAnalysis() on key components:\n\n1. **Component Analysis (3 levels deep)**:\n - Level 1: Direct implementation of suspected component\n - Level 2: All direct dependencies and callers\n - Level 3: Transitive dependencies and integration points\n\n2. **State & Data Flow Verification**:\n - How does data actually flow through this component?\n - What state transformations occur?\n - Are there hidden side effects?\n\n3. **Error Path Analysis**:\n - Trace all error handling paths\n - Find where errors could originate\n - Check error propagation matches hypothesis\n\n4. **Concurrency Check** (if applicable):\n - Race conditions possible?\n - Shared state issues?\n - Timing dependencies?\n\n**Output**: Deep verification findings for {{hypothesis.id}}",
405
- "agentRole": "You are performing deep verification of hypothesis {{hypothesis.id}}, diving 3+ levels deep to ensure thorough understanding.",
406
- "guidance": [
407
- "This is verification step 1 of 3 for {{hypothesis.id}}",
408
- "Go deeper than the initial analysis - follow every lead",
409
- "Document any new discoveries that affect the hypothesis"
410
- ],
411
- "requireConfirmation": false
412
- },
413
- {
414
- "id": "check-contradictions",
415
- "title": "Search for Contradicting Evidence",
416
- "prompt": "**CONTRADICTION SEARCH for {{hypothesis.id}}**\n\n**Goal**: Actively search for evidence that contradicts this hypothesis.\n\n1. **Code Pattern Contradictions**:\n - Search for code that assumes opposite behavior\n - Find defensive checks that prevent this scenario\n - Look for comments indicating different understanding\n\n2. **Test Contradictions**:\n - Tests that would fail if hypothesis were true\n - Tests that explicitly verify opposite behavior\n - Integration tests showing different flow\n\n3. **Historical Contradictions**:\n - Git history showing intentional design decisions\n - PRs or issues discussing this behavior\n - Documentation stating different intent\n\n4. **Runtime Contradictions**:\n - Logs showing successful execution through suspected path\n - Metrics indicating normal behavior\n - Other systems depending on current behavior\n\n**Be a skeptic** - try to disprove {{hypothesis.id}}",
417
- "agentRole": "You are a skeptical investigator trying to find flaws in hypothesis {{hypothesis.id}}.",
418
- "guidance": [
419
- "Actively search for contradicting evidence",
420
- "Check assumptions against reality",
421
- "Consider alternative explanations"
422
- ],
423
- "requireConfirmation": false
424
- },
425
- {
426
- "id": "refine-or-replace",
427
- "title": "Refine Hypothesis {{hypothesis.id}}",
428
- "prompt": "**REFINEMENT DECISION for {{hypothesis.id}}**\n\nBased on deep verification and contradiction search:\n\n1. **Assessment**:\n - New evidence supporting: [list]\n - New evidence contradicting: [list]\n - Unverified assumptions: [list]\n - Confidence change: [+/- points]\n\n2. **Refinement Options**:\n - **Keep as-is**: Evidence strongly supports current formulation\n - **Refine**: Adjust hypothesis based on new understanding\n - **Replace**: Fundamentally flawed, create new hypothesis\n - **Merge**: Combine with another hypothesis\n\n3. **If Refining/Replacing**:\n - Update hypothesis description\n - Adjust evidence strength score\n - Revise validation plan\n - Document why changed\n\n4. **Update Context**:\n - Use updateInvestigationContext('Hypothesis Registry', updated hypothesis)\n - Note verification findings\n\n**Output**: Updated hypothesis with refined understanding",
429
- "agentRole": "You are making the final decision on hypothesis {{hypothesis.id}} based on verification findings.",
430
- "guidance": [
431
- "Be willing to change hypotheses based on evidence",
432
- "Document all changes and reasoning",
433
- "Update confidence scores appropriately"
434
- ],
435
- "requireConfirmation": false
436
- }
437
- ],
438
- "requireConfirmation": false
439
- },
440
- {
441
- "id": "phase-2g-instrumentation-planning",
442
- "title": "Phase 2g: Unified Instrumentation Planning",
443
- "prompt": "**UNIFIED INSTRUMENTATION PLANNING** - Plan comprehensive logging strategy for all hypotheses before implementation.\n\n**GOAL**: Create a coordinated instrumentation plan that efficiently captures evidence for all hypotheses in a single execution.\n\n**STEP 1: Hypothesis Review**\nFor each hypothesis (H1, H2, H3):\n- **Component(s)**: Which components need instrumentation?\n- **Critical Paths**: Which execution paths must be logged?\n- **Key Variables**: What state/data must be captured?\n- **Decision Points**: What conditionals/branches matter?\n- **Timing Concerns**: Any concurrency or timing-sensitive areas?\n\n**STEP 2: Identify Instrumentation Locations**\n\nFor each hypothesis, list specific locations:\n```\nH1 Instrumentation Needs:\n - File: auth/login.ts, Function: validateCredentials, Lines: 45-67\n What to log: input credentials format, validation result, error conditions\n - File: auth/session.ts, Function: createSession, Lines: 23-34\n What to log: session creation parameters, user context\n\nH2 Instrumentation Needs:\n - File: auth/session.ts, Function: createSession, Lines: 23-34 [OVERLAP with H1]\n What to log: session storage backend, timing\n - File: database/connection.ts, Function: getConnection, Lines: 89-102\n What to log: connection pool state, timeout settings\n\nH3 Instrumentation Needs:\n - File: cache/redis.ts, Function: set, Lines: 156-178\n What to log: cache key, TTL, success/failure\n```\n\n**STEP 3: Identify Overlaps**\n\nWhere do multiple hypotheses need logging at the same location?\n```\nOverlapping Instrumentation:\n - auth/session.ts:23-34: Both H1 and H2 need logs here\n Strategy: Single log point with both [H1] and [H2] prefixes capturing all needed data\n \n - No other overlaps identified\n```\n\n**STEP 4: Plan Log Format & Structure**\n\nDefine what each log should contain:\n```\nLog Format Standard:\n [HX] ClassName.methodName:{lineNum} | timestamp | specific-data\n\nH1 Log Examples:\n [H1] LoginValidator.validateCredentials:45 | 2025-10-02T10:23:45.123Z | input={email: user@example.com, hasPassword: true}\n [H1] LoginValidator.validateCredentials:52 | 2025-10-02T10:23:45.145Z | validation=FAILED reason=\"invalid format\"\n\nH2 Log Examples:\n [H2] SessionManager.createSession:23 | 2025-10-02T10:23:45.167Z | backend=redis poolSize=10\n [H2] SessionManager.createSession:28 | 2025-10-02T10:23:45.189Z | sessionId=abc123 stored=true latency=22ms\n```\n\n**STEP 5: Plan Data Capture Strategy**\n\nWhat specific data values need to be captured:\n- **H1 requires**: Credential format, validation results, error messages\n- **H2 requires**: Backend type, connection timing, pool state\n- **H3 requires**: Cache keys, TTL values, hit/miss rates\n\n**STEP 6: Consider Edge Cases**\n\n- **High-frequency locations**: Plan aggregation (e.g., log every 10th iteration)\n- **Sensitive data**: Plan redaction (e.g., mask passwords, PII)\n- **Large data structures**: Plan summarization (e.g., object size, key count, not full dump)\n- **Error paths**: Ensure error cases are logged, not just happy path\n\n**STEP 7: Create Instrumentation Implementation Plan**\n\nProduce structured plan:\n```markdown\n# Instrumentation Implementation Plan\n\n## Summary\n- Total instrumentation points: [count]\n- Overlapping locations: [count]\n- Estimated log volume: [low/medium/high]\n- Sensitive data handling: [yes/no - describe]\n\n## H1 Instrumentation (Priority: High, Evidence Strength: 8/10)\n1. Location: auth/login.ts:45-67\n Function: validateCredentials\n Log: [H1] Input format and validation result\n Frequency: Per-call (not high-frequency)\n Data: {email format, hasPassword, validation result, error}\n\n2. Location: auth/session.ts:23-34 [SHARED with H2]\n Function: createSession \n Log: [H1] Session creation context\n Frequency: Per-call\n Data: {userContext, sessionType}\n\n## H2 Instrumentation (Priority: High, Evidence Strength: 7/10)\n[Similar detailed breakdown]\n\n## H3 Instrumentation (Priority: Medium, Evidence Strength: 6/10)\n[Similar detailed breakdown]\n\n## Implementation Order\n1. Shared locations first (avoid duplication)\n2. H1 specific locations\n3. H2 specific locations\n4. H3 specific locations\n\n## Validation Checklist\n- [ ] All hypotheses have instrumentation coverage\n- [ ] Overlaps identified and coordinated\n- [ ] Log format is consistent\n- [ ] Sensitive data is handled\n- [ ] High-frequency points have aggregation\n- [ ] Edge cases considered\n```\n\n**OUTPUT**:\n- Complete instrumentation implementation plan\n- Set `instrumentationPlanReady` = true\n- Create InstrumentationPlan.md file with detailed plan\n- Update INVESTIGATION_CONTEXT.md with plan summary",
444
- "agentRole": "You are an instrumentation architect planning a comprehensive logging strategy. Your goal is to design efficient, coordinated instrumentation that captures all needed evidence in a single execution.",
445
- "guidance": [
446
- "Review ALL hypotheses together to identify synergies",
447
- "Be specific about locations (file, function, line numbers)",
448
- "Identify and optimize overlapping instrumentation needs",
449
- "Plan log format for consistency and parseability",
450
- "Consider practical concerns (volume, sensitivity, performance)",
451
- "Create actionable implementation plan, not just theory",
452
- "This plan will guide Phase 3 implementation"
453
- ],
454
- "requireConfirmation": false
455
- },
456
- {
457
- "id": "phase-2h-cognitive-reset",
458
- "title": "Phase 2h: Cognitive Reset & Plan Review",
459
- "prompt": "**COGNITIVE RESET** - Take a mental step back before implementing instrumentation.\n\n**GOAL**: Review the investigation with fresh eyes and validate the plan before execution.\n\n**STEP 1: Progress Summary**\n- What have we learned so far? (3-5 key insights)\n- What are our top hypotheses? (brief recap)\n- What's our instrumentation strategy? (high-level summary)\n\n**STEP 2: Critical Questions**\n- Are we missing any obvious alternative explanations?\n- Are our hypotheses too similar or too narrow?\n- Is our instrumentation plan efficient and comprehensive?\n- Are we making any unwarranted assumptions?\n- Is there a simpler approach we haven't considered?\n\n**STEP 3: Bias Check**\n- First impression bias: Are we anchored to initial theories?\n- Confirmation bias: Are we seeking evidence that confirms our beliefs?\n- Complexity bias: Are we overcomplicating a simple issue?\n- Recency bias: Are we over-weighting recent findings?\n\n**STEP 4: Sanity Checks**\n- Does the timeline make sense? (When did bug appear vs when hypothesized causes were introduced)\n- Do the symptoms match our theories? (All symptoms explained, no contradictions)\n- Are we investigating the right level? (Too high-level or too low-level)\n- Have we consulted existing documentation/logs adequately?\n\n**STEP 5: Plan Validation**\n- Review the instrumentation plan from Phase 2g\n- Will it actually answer our questions?\n- Are there any gaps or redundancies?\n- Is it safe to execute? (no production impacts, no data corruption risks)\n\n**STEP 6: Proceed or Pivot Decision**\n- **PROCEED**: Plan is sound, move to implementation\n- **REFINE**: Minor adjustments needed (update plan)\n- **PIVOT**: Major issues found (return to earlier phase)\n\n**OUTPUT**:\n- Cognitive reset complete with decision (PROCEED/REFINE/PIVOT)\n- Any plan adjustments documented\n- Set `resetComplete` = true",
460
- "agentRole": "You are a senior debugger reviewing the investigation plan with fresh, critical eyes before committing to implementation.",
461
- "guidance": [
462
- "Be honest about potential biases and blind spots",
463
- "Look for simpler explanations we might have missed",
464
- "Validate the plan will actually answer our questions",
465
- "Don't skip this - catching issues now saves hours later",
466
- "It's okay to pivot if major issues are found"
467
- ],
468
- "requireConfirmation": false
469
- },
470
- {
471
- "id": "phase-3-comprehensive-instrumentation",
472
- "title": "Phase 3: Comprehensive Debug Instrumentation",
473
- "prompt": "**COMPREHENSIVE DEBUGGING INSTRUMENTATION** - Implement the instrumentation plan from Phase 2g.\n\n**FOLLOW THE PLAN**: Use the instrumentation plan created in Phase 2f as your implementation guide.\n\n**For each hypothesis in hypothesesToValidate, add targeted instrumentation:**\n\n**IMPLEMENTATION STRATEGY**:\n\n1. **Hypothesis-Specific Prefixes**: Each hypothesis gets unique logging prefix\n - H1: `[H1]` prefix for all H1-related logs\n - H2: `[H2]` prefix for all H2-related logs\n - H3: `[H3]` prefix for all H3-related logs\n\n2. **Standard Format for ALL hypotheses**:\n ```javascript\n className.methodName [HX] {timestamp}: Hypothesis-specific message\n ```\n\n3. **Smart Logging Implementation** (apply once, works for all hypotheses):\n ```javascript\n const debugState = { lastMsg: '', count: 0 };\n function smartLog(hypothesisId, msg) {\n const fullMsg = `[${hypothesisId}] ${msg}`;\n if (debugState.lastMsg === fullMsg) {\n debugState.count++;\n if (debugState.count % 10 === 0) {\n console.log(`${fullMsg} x${debugState.count}`);\n }\n } else {\n if (debugState.count > 1) {\n console.log(`Previous message x${debugState.count}`);\n }\n console.log(fullMsg);\n debugState.lastMsg = fullMsg;\n debugState.count = 1;\n }\n }\n ```\n\n4. **Instrumentation Points** for each hypothesis:\n - Add H1 logging at H1-relevant locations\n - Add H2 logging at H2-relevant locations\n - Add H3 logging at H3-relevant locations\n - Locations may overlap - that's fine, both will log\n\n5. **Operation Grouping** (for all hypotheses):\n ```javascript\n console.log(`=== [H1] Operation ${opName} Start ===`);\n // ... H1-relevant code ...\n console.log(`=== [H1] Operation ${opName} End ===`);\n ```\n\n**INSTRUMENTATION CHECKLIST**:\n- [ ] H1 instrumentation added at identified locations\n- [ ] H2 instrumentation added at identified locations \n- [ ] H3 instrumentation added at identified locations\n- [ ] Test instrumentation for hypothesis validation\n- [ ] Deduplication logic implemented\n- [ ] All logs use correct [HX] prefixes\n\n**OUTPUT**:\n- Comprehensive instrumented code with logging for ALL hypotheses\n- Set `allHypothesesInstrumented` = true\n- Document instrumentation locations in INVESTIGATION_CONTEXT.md",
474
- "agentRole": "You are instrumenting code to validate ALL hypotheses simultaneously. Your goal is comprehensive, non-redundant logging that enables efficient evidence collection in a single execution.",
475
- "guidance": [
476
- "Add instrumentation for ALL hypotheses at once",
477
- "Use unique [HX] prefixes to distinguish hypothesis-specific logs",
478
- "Overlapping instrumentation is acceptable - multiple hypotheses can log at same location",
479
- "Ensure non-intrusive implementation that doesn't change behavior",
480
- "Single execution will produce logs for all hypotheses"
481
- ],
482
- "requireConfirmation": false
483
- },
484
- {
485
- "id": "phase-4-unified-evidence-collection",
486
- "title": "Phase 4: Unified Evidence Collection",
487
- "prompt": "**UNIFIED EVIDENCE COLLECTION** - Run instrumented code ONCE and collect all evidence.\n\n**EXECUTION**:\n1. **Single Test/Reproduction Run**:\n - Execute the reproduction steps with ALL instrumentation active\n - All hypotheses are tested in the same execution\n - Capture complete log output\n\n2. **Log Collection**:\n - Collect ALL debug logs from the single run\n - Logs will contain [H1], [H2], [H3] prefixed messages\n - Save complete log output for analysis\n\n3. **Log Organization**:\n - Parse logs by hypothesis prefix:\n - Extract all [H1] logs → H1 evidence\n - Extract all [H2] logs → H2 evidence \n - Extract all [H3] logs → H3 evidence\n - Preserve chronological order within each hypothesis\n - Note any cross-hypothesis interactions\n\n4. **Test Execution Evidence**:\n - Run instrumented tests\n - Collect test debug output\n - Note any test failures or unexpected behavior\n - Compare with production bug behavior\n\n5. **Evidence Quality Assessment**:\n - Rate overall log quality (1-10)\n - Note if execution reproduced the bug\n - Document any execution issues\n - Identify if additional instrumentation needed\n\n**If log volume >500 lines:**\n- Use aggregateDebugLogs() to create summaries\n- Group by hypothesis and operation\n- Create structured sub-analysis\n\n**OUTPUT**:\n- Complete log output with all hypothesis evidence\n- Organized evidence by hypothesis (H1, H2, H3)\n- Set `evidenceCollected` = true\n- Overall execution quality score",
488
- "agentRole": "You are collecting comprehensive evidence from a single instrumented execution. Your goal is to capture all hypothesis-relevant data in one efficient run.",
489
- "guidance": [
490
- "Single execution tests all hypotheses simultaneously",
491
- "Organize evidence by [HX] prefix for analysis",
492
- "Preserve complete chronological log for cross-hypothesis insights",
493
- "Note any unexpected behaviors or patterns",
494
- "If execution fails, document why and attempt to collect partial evidence"
495
- ],
496
- "requireConfirmation": false
497
- },
498
- {
499
- "id": "phase-5-hypothesis-analysis-loop",
500
- "type": "loop",
501
- "title": "Phase 5: Individual Hypothesis Analysis",
502
- "loop": {
503
- "type": "forEach",
504
- "items": "hypothesesToValidate",
505
- "itemVar": "currentHypothesis",
506
- "indexVar": "hypothesisIndex",
507
- "maxIterations": 5
508
- },
509
- "body": [
510
- {
511
- "id": "analyze-hypothesis-evidence",
512
- "title": "Analyze Evidence for {{currentHypothesis.id}}",
513
- "prompt": "**EVIDENCE ANALYSIS for {{currentHypothesis.id}}**\n\n**Hypothesis**: {{currentHypothesis.description}}\n\n**ANALYZE {{currentHypothesis.id}} LOGS**:\n\n1. **Extract Relevant Logs**:\n - Review all [{{currentHypothesis.id}}] prefixed logs from Phase 4\n - Examine log sequence and timing\n - Look for patterns supporting or refuting the hypothesis\n\n2. **Evidence Assessment**:\n - Does evidence support {{currentHypothesis.id}}? (Yes/No/Partial)\n - Evidence quality score (1-10)\n - Contradicting evidence found?\n - Unexpected behaviors observed?\n\n3. **Cross-Hypothesis Insights**:\n - Did other hypothesis logs reveal relevant information?\n - Are there interactions between suspected components?\n - Does timeline analysis suggest different root cause?\n\n4. **Confidence Update**:\n - Based on evidence, rate confidence this is root cause (0-10)\n - What additional evidence would increase confidence?\n - Are there alternative explanations for the observed evidence?\n\n5. **Status Determination**:\n - Mark hypothesis as: Confirmed / Refuted / Needs-More-Evidence / Partially-Confirmed\n - If Confirmed with high confidence (>8.0):\n - Set `rootCauseFound` = true\n - Set `rootCauseHypothesis` = {{currentHypothesis.id}}\n - Set `currentConfidence` = confidence score\n\n**CONTEXT UPDATE**:\n- Use updateInvestigationContext('Evidence Log', evidence summary for {{currentHypothesis.id}})\n- Use trackInvestigation('Validation Progress', '{{hypothesisIndex + 1}}/3 hypotheses analyzed')\n\n**OUTPUT**: Complete evidence analysis and status for {{currentHypothesis.id}}",
514
- "agentRole": "You are analyzing evidence collected from the unified execution to determine if {{currentHypothesis.id}} is the root cause.",
515
- "guidance": [
516
- "Analyze logs specific to this hypothesis ({{hypothesisIndex + 1}} of 3)",
517
- "Consider evidence from all hypotheses - may reveal interactions",
518
- "Be objective - negative evidence is valuable",
519
- "Update hypothesis status based on concrete evidence",
520
- "If high confidence root cause found, document thoroughly"
521
- ],
522
- "requireConfirmation": false
523
- }
524
- ],
525
- "requireConfirmation": false
526
- },
527
- {
528
- "id": "phase-4a-controlled-experimentation",
529
- "title": "Phase 4a: Controlled Code Experiments",
530
- "runCondition": {
531
- "var": "currentConfidence",
532
- "lt": 8.0
533
- },
534
- "prompt": "**CONTROLLED EXPERIMENTATION** - When observation isn't enough, experiment!\n\n**Current Investigation Status**: Leading hypothesis (Confidence: {{currentConfidence}}/10)\n\n**⚠️ SAFETY PROTOCOLS (MANDATORY)**:\n\n1. **Git Branch Required**:\n - MUST be on investigation branch (use createInvestigationBranch() if not)\n - Verify with `git branch --show-current`\n - NEVER experiment directly on main/master\n\n2. **Pre-Experiment Baseline**:\n - Commit clean state: `git commit -m \"PRE-EXPERIMENT: baseline for {{hypothesis.id}}\"`\n - Record current test results\n - Document baseline behavior\n\n3. **Environment Restriction**:\n - ONLY run in test/dev environment\n - NEVER in production or staging\n - Set environment check: `if (process.env.NODE_ENV !== 'development') { throw new Error('Experiments only in dev'); }`\n\n4. **Automatic Revert**:\n - After evidence collection: `git revert HEAD --no-edit`\n - Verify code returned to baseline\n - Run tests to confirm clean state\n\n5. **Approval Gates**:\n - Low automation: Require approval for ALL experiments\n - Medium automation: Require approval for breaking/minimal-fix experiments\n - High automation: Auto-approve guards/logs only\n\n6. **Documentation**:\n - Create ExperimentLog.md entry with:\n - Timestamp, experiment type, hypothesis ID\n - Rationale and expected outcome\n - Actual outcome and evidence\n - Revert status (confirmed/failed)\n\n7. **Hard Limits**:\n - Max 3 experiments total (prevent endless experimentation)\n - Track with `experimentCount` context variable\n - Exit if limit reached, recommend different approach\n\n8. **Rollback Verification**:\n - After revert, run full test suite\n - Verify no unintended changes remain\n - Check git status is clean\n\n**EXPERIMENT TYPES** (use controlledModification()):\n\n1. **Guard Additions (Non-Breaking)**:\n ```javascript\n // Add defensive check that logs but doesn't change behavior\n if (unexpectedCondition) {\n console.error('[H1_GUARD] Unexpected state detected:', state);\n // Continue normal execution\n }\n ```\n\n2. **Assertion Injections**:\n ```javascript\n // Add assertion that would fail if hypothesis is correct\n console.assert(expectedCondition, '[H1_ASSERT] Hypothesis H1 violated!');\n ```\n\n3. **Minimal Fix Test**:\n ```javascript\n // Apply minimal fix for hypothesis, see if bug disappears\n if (process.env.DEBUG_FIX_H1 === 'true') {\n // Apply hypothesized fix\n return fixedBehavior();\n }\n ```\n\n4. **Controlled Breaking**:\n ```javascript\n // Temporarily break suspected component to verify involvement\n if (process.env.DEBUG_BREAK_H1 === 'true') {\n throw new Error('[H1_BREAK] Intentionally breaking to test hypothesis');\n }\n ```\n\n**PROTOCOL**:\n1. Choose experiment type based on confidence and risk\n2. Implement modification with clear DEBUG markers\n3. Use createInvestigationBranch() if not already on investigation branch\n4. Commit: `git commit -m \"DEBUG: {{experiment_type}} for hypothesis investigation\"`\n5. Run reproduction steps\n6. Use collectEvidence() to gather results\n7. Revert changes: `git revert HEAD`\n8. Document results in ExperimentResults/hypothesis-experiment.md\n\n**SAFETY LIMITS**:\n- Max 3 experiments per hypothesis\n- Each experiment in separate commit\n- Always revert after evidence collection\n- Document everything in INVESTIGATION_CONTEXT.md\n\n**UPDATE**:\n- Hypothesis confidence based on experimental results\n- Use updateInvestigationContext('Experiment Results', experiment details and outcomes)\n- Track failed experiments in 'Dead Ends & Lessons' section",
535
- "agentRole": "You are a careful experimenter using controlled code modifications to validate hypotheses. Safety and reversibility are paramount.",
536
- "guidance": [
537
- "Start with non-breaking experiments (guards, logs)",
538
- "Only use breaking experiments if essential",
539
- "Every change must be easily reversible",
540
- "Document rationale for each experiment type",
541
- "Consider test environment experiments first"
542
- ],
543
- "requireConfirmation": {
544
- "or": [
545
- {"var": "automationLevel", "equals": "Low"},
546
- {"var": "automationLevel", "equals": "Medium"},
547
- {"and": [
548
- {"var": "automationLevel", "equals": "High"},
549
- {"var": "currentConfidence", "lt": 6.0}
550
- ]}
551
- ]
552
- },
553
- "validationCriteria": [
554
- {
555
- "type": "contains",
556
- "value": "commit",
557
- "message": "Must specify commit message for experiment"
558
- }
559
- ]
560
- },
561
- {
562
- "id": "phase-3b-observability-setup",
563
- "title": "Phase 3b: Distributed System Observability",
564
- "runCondition": {
565
- "var": "isDistributed",
566
- "equals": true
567
- },
568
- "prompt": "**OBSERVABILITY** - Set up three-pillar strategy:\n\n**METRICS**: Identify key indicators (latency, errors)\n**TRACES**: Enable request path tracking\n**LOGS**: Ensure correlation IDs present\n\n**OUTPUT**: Observability checklist completed.",
569
- "agentRole": "You are a distributed systems expert who thinks in terms of emergent behaviors and system-wide patterns.",
570
- "guidance": [
571
- "METRICS SELECTION: Focus on RED metrics (Rate, Errors, Duration) for each service",
572
- "TRACE COVERAGE: Ensure spans cover all service boundaries and key operations",
573
- "CORRELATION IDS: Verify IDs propagate through entire request lifecycle",
574
- "AGGREGATION READY: Set up centralized collection for cross-service analysis",
575
- "BASELINE ESTABLISHMENT: Capture normal behavior metrics for comparison"
576
- ]
577
- },
578
- {
579
- "id": "phase-4c-distributed-evidence",
580
- "title": "Phase 4c: Multi-Service Evidence Collection",
581
- "runCondition": {
582
- "var": "isDistributed",
583
- "equals": true
584
- },
585
- "prompt": "**DISTRIBUTED ANALYSIS**:\n\n1. Check METRICS for anomalies\n2. Follow TRACES for request path\n3. Correlate LOGS across services\n4. Identify cascade points\n\n**OUTPUT**: Service interaction map with failure points.",
586
- "agentRole": "You are a systems detective who can trace failures across service boundaries.",
587
- "guidance": [
588
- "ANOMALY DETECTION: Look for deviations in latency, error rates, or traffic patterns",
589
- "TRACE ANALYSIS: Follow request ID through all services to find failure point",
590
- "LOG CORRELATION: Use timestamp windows and correlation IDs to link events",
591
- "CASCADE IDENTIFICATION: Look for timeout chains or error propagation patterns",
592
- "VISUAL MAPPING: Create service dependency diagram with failure annotations"
593
- ]
594
- },
595
- {
596
- "id": "phase-4b-cognitive-reset",
597
- "title": "Phase 4b: Cognitive Reset & Progress Review",
598
- "runCondition": {
599
- "var": "validationIterations",
600
- "gte": 2
601
- },
602
- "prompt": "**COGNITIVE RESET** - Step back and review:\n\n1. Summarize findings so far\n2. List eliminated possibilities\n3. Identify investigation blind spots\n4. Reformulate approach if needed\n\n**DECIDE**: Continue current path or pivot strategy?",
603
- "agentRole": "You are a strategic advisor who helps maintain perspective during complex investigations.",
604
- "guidance": [
605
- "PROGRESS SUMMARY: Write concise bullet points of key findings and eliminations",
606
- "BLIND SPOT CHECK: What areas haven't been investigated? What assumptions remain?",
607
- "PATTERN RECOGNITION: Look for investigation loops or repeated dead ends",
608
- "STRATEGY EVALUATION: Is current approach yielding diminishing returns?",
609
- "PIVOT CRITERIA: Consider new approach if last 3 iterations provided no new insights"
610
- ]
611
- },
612
- {
613
- "id": "phase-5a-final-confidence",
614
- "title": "Phase 5a: Final Confidence Assessment",
615
- "prompt": "**FINAL CONFIDENCE ASSESSMENT** - Evaluate the investigation results.\n\n**If root cause found (rootCauseFound = true):**\n- Review all evidence for {{rootCauseHypothesis}}\n- Perform adversarial challenge\n- Calculate final confidence score\n\n**If no high-confidence root cause:**\n- Document what was learned\n- Identify remaining unknowns\n- Recommend next investigation steps\n\n**CONFIDENCE CALCULATION:**\n- Evidence Quality (1-10)\n- Explanation Completeness (1-10)\n- Alternative Likelihood (1-10, inverted)\n- Final = (Quality × 0.4) + (Completeness × 0.4) + (Alternative × 0.2)\n\n**CONTEXT UPDATE**:\n- Use trackInvestigation('Investigation Complete', 'Confidence: {{finalConfidence}}/10')\n- Use addResumptionJson('phase-5a-final-confidence')\n- Document lessons learned in 'Dead Ends & Lessons' section\n\n**⚠️ ONE PHASE REMAINING**: Even if you have achieved 9-10/10 confidence in the root cause with strong supporting evidence:\n\n- The investigation is NOT complete yet\n- You MUST proceed to Phase 6 to create the comprehensive diagnostic writeup\n- Phase 6 is the REQUIRED DELIVERABLE that makes all your investigation work actionable\n- High confidence means you've identified the root cause, but the writeup translates that into actionable documentation\n\n**DO NOT set isWorkflowComplete=true yet.** You are at ~90% completion. Phase 6 is required.\n\n**OUTPUT**: Final confidence assessment with recommendations",
616
- "agentRole": "You are making the final determination about the root cause with rigorous confidence assessment.",
617
- "guidance": [
618
- "Be honest about confidence levels",
619
- "Document all remaining uncertainties",
620
- "Provide clear next steps if confidence is low"
621
- ],
622
- "validationCriteria": [
623
- {
624
- "type": "regex",
625
- "pattern": "Final.*=.*[0-9\\.]+",
626
- "message": "Must calculate final confidence score"
627
- }
628
- ],
629
- "hasValidation": true
630
- },
631
- {
632
- "id": "phase-6-diagnostic-writeup",
633
- "title": "Phase 6: Comprehensive Diagnostic Writeup",
634
- "prompt": "**FINAL DIAGNOSTIC DOCUMENTATION** - I will create comprehensive writeup enabling effective bug fixing and knowledge transfer.\n\n**STEP 1: Executive Summary**\n- **Bug Summary**: Concise description of issue and impact\n- **Root Cause**: Clear, non-technical explanation of what is happening\n- **Confidence Level**: Final confidence assessment with calculation methodology\n- **Scope**: What systems, users, or scenarios are affected\n\n**STEP 2: Technical Deep Dive**\n- **Root Cause Analysis**: Detailed technical explanation of failure mechanism\n- **Code Component Analysis**: Specific files, functions, and lines with exact locations\n- **Execution Flow**: Step-by-step sequence of events leading to bug\n- **State Analysis**: How system state contributes to failure\n\n**STEP 3: Investigation Methodology**\n- **Investigation Timeline**: Chronological summary with phase time investments\n- **Hypothesis Evolution**: Complete record of hypotheses (H1-H5) with status changes\n- **Evidence Assessment**: Rating and reliability of evidence sources with key citations\n\n**STEP 4: Historical Context & Patterns**\n- **Similar Bugs**: Reference findings from findSimilarBugs() and SimilarPatterns.md\n- **Previous Fixes**: How similar issues were resolved\n- **Recurring Patterns**: Identify if this is part of a larger pattern\n- **Lessons Learned**: What can be applied from past experiences\n\n**STEP 5: Knowledge Transfer & Action Plan**\n- **Skill Requirements**: Technical expertise needed for understanding and fixing\n- **Prevention & Review**: Specific measures and code review checklist items\n- **Action Items**: Immediate mitigation steps and permanent fix areas with timelines\n- **Testing Strategy**: Comprehensive verification approach for fixes\n- **Recommended Next Investigations** (if confidence < 9.0):\n - Additional instrumentation locations and data points not yet captured\n - Alternative hypotheses to explore (theories that were deprioritized)\n - External expertise to consult (domain experts, similar bugs)\n - Environmental factors to test (load, concurrency, timing, config variations)\n - Expanded scope (related components, upstream/downstream systems)\n - Prioritized next steps based on evidence gaps\n\n**STEP 6: Context Finalization**\n- **Final Update**: Use updateInvestigationContext('Final Report', link to diagnostic report)\n- **Archive Context**: Ensure INVESTIGATION_CONTEXT.md is complete for future reference\n- **Knowledge Base**: Consider key findings for team knowledge base\n\n**DELIVERABLE**: Enterprise-grade diagnostic report enabling confident bug fixing, knowledge transfer, and organizational learning.\n\n**✅ WORKFLOW COMPLETION**: After producing the comprehensive diagnostic writeup with all required sections:\n\n1. Verify the writeup includes:\n - Executive Summary with root cause and confidence\n - Technical Deep Dive with code analysis\n - Investigation Methodology and timeline\n - Historical Context from similar bugs\n - Knowledge Transfer and Action Plan\n - All 6 sections fully documented\n\n2. Update INVESTIGATION_CONTEXT.md with final status and handoff information\n\n3. **Set isWorkflowComplete = true** to indicate the investigation is finished\n\nThis is the ONLY step where isWorkflowComplete should be set to true.",
635
- "agentRole": "You are a senior technical writer and diagnostic documentation specialist with expertise in creating comprehensive, actionable bug reports for enterprise environments. Your strength lies in translating complex technical investigations into clear, structured documentation that enables effective problem resolution, knowledge transfer, and organizational learning. You excel at creating reports that serve immediate fixing needs, long-term system improvement, and team collaboration.",
636
- "guidance": [
637
- "ENTERPRISE FOCUS: Write for multiple stakeholders including developers, managers, and future team members",
638
- "KNOWLEDGE TRANSFER: Include methodology and reasoning, not just conclusions",
639
- "COLLABORATIVE DESIGN: Structure content for peer review and team coordination",
640
- "COMPREHENSIVE COVERAGE: Include all information needed for resolution and prevention",
641
- "ACTIONABLE DOCUMENTATION: Provide specific, concrete next steps with clear ownership"
642
- ]
643
- }
644
- ]
645
- }