@exaudeus/workrail 0.1.1 β†’ 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/README.md +153 -189
  2. package/dist/application/services/classification-engine.d.ts +33 -0
  3. package/dist/application/services/classification-engine.js +258 -0
  4. package/dist/application/services/compression-service.d.ts +20 -0
  5. package/dist/application/services/compression-service.js +312 -0
  6. package/dist/application/services/context-management-service.d.ts +38 -0
  7. package/dist/application/services/context-management-service.js +301 -0
  8. package/dist/application/services/context-persistence-service.d.ts +45 -0
  9. package/dist/application/services/context-persistence-service.js +273 -0
  10. package/dist/cli/migrate-workflow.js +3 -2
  11. package/dist/infrastructure/storage/context-storage.d.ts +150 -0
  12. package/dist/infrastructure/storage/context-storage.js +40 -0
  13. package/dist/infrastructure/storage/filesystem-blob-storage.d.ts +27 -0
  14. package/dist/infrastructure/storage/filesystem-blob-storage.js +363 -0
  15. package/dist/infrastructure/storage/hybrid-context-storage.d.ts +29 -0
  16. package/dist/infrastructure/storage/hybrid-context-storage.js +400 -0
  17. package/dist/infrastructure/storage/migrations/001_initial_schema.sql +38 -0
  18. package/dist/infrastructure/storage/migrations/002_context_concurrency_enhancements.sql +234 -0
  19. package/dist/infrastructure/storage/migrations/003_classification_overrides.sql +20 -0
  20. package/dist/infrastructure/storage/sqlite-metadata-storage.d.ts +35 -0
  21. package/dist/infrastructure/storage/sqlite-metadata-storage.js +410 -0
  22. package/dist/infrastructure/storage/sqlite-migrator.d.ts +46 -0
  23. package/dist/infrastructure/storage/sqlite-migrator.js +293 -0
  24. package/dist/types/context-types.d.ts +236 -0
  25. package/dist/types/context-types.js +10 -0
  26. package/dist/utils/storage-security.js +1 -1
  27. package/package.json +3 -1
  28. package/workflows/coding-task-workflow-with-loops.json +84 -49
  29. package/workflows/mr-review-workflow.json +75 -26
  30. package/workflows/systemic-bug-investigation-with-loops.json +596 -0
  31. package/workflows/workflow-for-workflows.json +197 -27
@@ -0,0 +1,596 @@
1
+ {
2
+ "id": "systematic-bug-investigation-with-loops",
3
+ "name": "Systematic Bug Investigation Workflow",
4
+ "version": "1.0.0",
5
+ "description": "A comprehensive workflow for systematic bug and failing test investigation that prevents LLMs from jumping to conclusions. Enforces thorough evidence gathering, hypothesis formation, debugging instrumentation, and validation to achieve near 100% certainty about root causes. This workflow does NOT fix bugs - it produces detailed diagnostic writeups that enable effective fixing by providing complete understanding of what is happening, why it's happening, and supporting evidence.",
6
+ "clarificationPrompts": [
7
+ "What type of system is this? (web app, mobile app, backend service, desktop app, etc.)",
8
+ "How consistently can you reproduce this bug? (always reproducible, sometimes reproducible, rarely reproducible)",
9
+ "What was the last known working version or state if applicable?",
10
+ "Are there any time constraints or urgency factors for this investigation?",
11
+ "What level of system access do you have? (full codebase, limited access, production logs only)",
12
+ "Do you have preferences for handling large log volumes? (sub-chat analysis, inline summaries only, or no preference for automatic decision)"
13
+ ],
14
+ "preconditions": [
15
+ "User has identified a specific bug or failing test to investigate",
16
+ "Agent has access to codebase analysis tools (grep, file readers, etc.)",
17
+ "Agent has access to build/test execution tools for the project type",
18
+ "User can provide error messages, stack traces, or test failure output",
19
+ "Bug is reproducible with specific steps or a minimal test case"
20
+ ],
21
+ "metaGuidance": [
22
+ "**FUNCTION DEFINITIONS:**",
23
+ "fun instrumentCode(location, hypothesis) = 'Add debug logs at {location} for {hypothesis}. Format: ClassName.method [{hypothesis}]: message. Include timestamp, thread ID if concurrent.'",
24
+ "fun collectEvidence(hypothesis) = 'Run instrumented code, collect logs, analyze results. Score evidence quality 1-10. Document in Evidence/{hypothesis}.md.'",
25
+ "fun updateHypothesisLog(id, status, evidence) = 'Update INVESTIGATION_CONTEXT.md section {id} with {status} and {evidence}. Include confidence score.'",
26
+ "fun analyzeTests(component) = 'Find all tests for {component} using grep_search. Check coverage, recent changes, what they validate vs miss. Run with --debug flag.'",
27
+ "fun recursiveAnalysis(component, depth=3) = 'Analyze {component} to {depth} levels. L1: implementation, L2: direct deps, L3: transitive deps. Document each level.'",
28
+ "fun controlledModification(type, location) = 'Make {type} change at {location}. Types: guard (add logging), assert (add assertion), fix (minimal fix), break (controlled failure). Commit: DEBUG: {type} at {location}'",
29
+ "fun checkHypothesisInTests(hypothesis) = 'Search existing tests for evidence. Direct: tests of suspected components. Indirect: tests that would fail if true. Document in TestEvidence/{hypothesis}.md'",
30
+ "fun aggregateDebugLogs(pattern, timeWindow=100) = 'Deduplicate logs matching {pattern}. Output: {pattern} x{count} in {timeWindow}ms, variations: {unique_values}'",
31
+ "fun createInvestigationBranch() = 'git checkout -b investigate/{bug-id}-{timestamp}. If git unavailable, create Investigation/{timestamp}/ directory for artifacts.'",
32
+ "fun trackInvestigation(phase, status) = 'Update INVESTIGATION_CONTEXT.md progress: βœ… {completed}, πŸ”„ {phase}, ⏳ Remaining: {list}, πŸ“Š Confidence: {score}/10'",
33
+ "fun updateInvestigationContext(section, content) = 'Update INVESTIGATION_CONTEXT.md {section} with {content}. Include timestamp. If section doesn\\'t exist, create it. Preserve all other sections.'",
34
+ "fun findSimilarBugs() = 'Search for: 1) Similar error patterns in codebase, 2) Previous fixes in git history, 3) Related test cases. Document in SimilarPatterns.md'",
35
+ "fun visualProgress() = 'Show: βœ… Phase 0 | βœ… Phase 1 | πŸ”„ Phase 2 | ⏳ Phase 3-5 | ⏳ Phase 6 | πŸ“Š 35% Complete. Include time spent per phase.'",
36
+ "fun applyDebugPreferences() = 'Apply user debugging preferences from userDebugPreferences context variable. Adapt logging verbosity, tool selection, output format.'",
37
+ "fun addResumptionJson(phase) = 'Update INVESTIGATION_CONTEXT.md resumption section with: workflowId, completedSteps up to {phase}, all context variables. Include workflow_get and workflow_next instructions.'",
38
+ "**USAGE:** When you see function calls like instrumentCode() or analyzeTests(), execute the full instructions defined above.",
39
+ "INVESTIGATION DISCIPLINE: Never propose fixes or solutions until Phase 6 (Comprehensive Diagnostic Writeup). Focus entirely on systematic evidence gathering and analysis.",
40
+ "HYPOTHESIS RIGOR: All hypotheses must be based on concrete evidence from code analysis with quantified scoring (1-10 scales). Maximum 5 hypotheses per investigation.",
41
+ "DEBUGGING INSTRUMENTATION: Always implement debugging mechanisms before running tests - logs, print statements, or test modifications that will provide evidence.",
42
+ "EVIDENCE THRESHOLD: Require minimum 3 independent sources of evidence before confirming any hypothesis. Use objective verification criteria.",
43
+ "SYSTEMATIC PROGRESSION: Complete each investigation phase fully before proceeding. Each phase builds critical context for the next with structured documentation.",
44
+ "CONFIDENCE CALIBRATION: Use mathematical confidence framework with 9.0/10 minimum threshold. Actively challenge conclusions with adversarial analysis.",
45
+ "UNCERTAINTY ACKNOWLEDGMENT: Explicitly document all remaining unknowns and their potential impact. No subjective confidence assessments.",
46
+ "THOROUGHNESS: For complex bugs, recursively analyze dependencies and internals of identified components to ensure full picture.",
47
+ "TEST INTEGRATION: Leverage existing tests to validate hypotheses where possible.",
48
+ "**LOGGING STANDARDS:**",
49
+ "LOG FORMAT: Always use 'ClassName.methodName [hypothesisId] {timestamp}: message'. For concurrent code, add thread/worker ID.",
50
+ "LOG DEDUPLICATION: Implement in debug code: if (lastMsg === currentMsg) { count++; if (count % 10 === 0) log(`${msg} x${count}`); } else { if (count > 1) log(`Previous: x${count}`); log(currentMsg); count = 1; }",
51
+ "LOG AGGREGATION: For high-frequency events, create summaries: 'Event X occurred 847 times between 10:23:45-10:23:47, unique values: [val1: 623, val2: 224]'",
52
+ "LOG WINDOWS: Group related logs within 50-100ms. Mark groups with '=== Operation: XYZ Start ===' and '=== Operation: XYZ End (duration: 73ms) ==='",
53
+ "LOG CONTEXT: Include hypothesis ID in all debug logs. Use prefixes like 'H1_DEBUG:', 'H2_TRACE:', 'H3_ERROR:'",
54
+ "LOG ANALYSIS OFFLOADING: For voluminous logs (>500 lines), offload analysis to sub-chats with structured prompts. See Phase 4 for detailed sub-analysis implementation.",
55
+ "RECURSION DEPTH: Limit recursive analysis to 3 levels deep to prevent analysis paralysis while ensuring thoroughness.",
56
+ "INVESTIGATION BOUNDS: If investigation exceeds 20 steps or 4 hours without root cause, pause and reassess approach with user.",
57
+ "AUTOMATION LEVELS: High=auto-approve >8.0 confidence decisions, Medium=standard confirmations, Low=extra confirmations for safety. Control workflow autonomy based on user preference.",
58
+ "CONTEXT DOCUMENTATION: Maintain INVESTIGATION_CONTEXT.md throughout. Update after major milestones, failures, or user interventions to enable seamless handoffs between sessions. Include explicit resumption instructions using workflow_get and workflow_next.",
59
+ "GIT FALLBACK STRATEGY: If git unavailable, gracefully skip commits/branches, log changes manually in CONTEXT.md with timestamps, warn user, document modifications for manual control.",
60
+ "GIT ERROR HANDLING: Use run_terminal_cmd for git operations; if fails, output exact command for user manual execution. Never halt investigation due to git unavailability.",
61
+ "TOOL AVAILABILITY AWARENESS: Check debugging tool availability before investigation design. Have fallbacks for when primary tools unavailable (grep→file_search, etc).",
62
+ "SECURITY PROTOCOLS: Sanitize sensitive data in logs/reproduction steps. Be mindful of exposing credentials, PII, or system internals during evidence collection phases.",
63
+ "DYNAMIC RE-TRIAGE: Allow complexity upgrades during investigation if evidence reveals deeper issues. Safe downgrades only with explicit user confirmation after evidence review.",
64
+ "DEVIL'S ADVOCATE REVIEW: Actively challenge primary hypothesis with available evidence. Seek alternative explanations and rate alternative likelihood before final confidence assessment.",
65
+ "COLLABORATIVE HANDOFFS: Structure documentation for peer review and team coordination. Include methodology, reasoning, and complete evidence chain for knowledge transfer.",
66
+ "FAILURE BOUNDS: Track investigation progress. If >20 steps or >4 hours without breakthrough, pause for user guidance. Document dead ends to prevent redundant work in future sessions.",
67
+ "COGNITIVE BREAKS: After 10 investigation steps, pause and summarize progress to reset perspective.",
68
+ "RUBBER DUCK: Verbalize hypotheses in sub-prompts to externalize reasoning and catch logical gaps.",
69
+ "COLLABORATION READY: Document clearly for handoffs when stuck beyond iteration limits."
70
+ ],
71
+ "steps": [
72
+ {
73
+ "id": "phase-0-triage",
74
+ "title": "Phase 0: Initial Triage & Context Gathering",
75
+ "prompt": "**SYSTEMATIC INVESTIGATION BEGINS** - Your mission is to achieve near 100% certainty about this bug's root cause through systematic evidence gathering. NO FIXES will be proposed until Phase 6.\n\n**STEP 1: Bug Report Analysis**\nPlease provide the complete bug context:\n- **Bug Description**: What is the observed behavior vs expected behavior?\n- **Error Messages/Stack Traces**: Paste the complete error output\n- **Reproduction Steps**: How can this bug be consistently reproduced?\n- **Environment Details**: OS, language version, framework version, etc.\n- **Recent Changes**: Any recent commits, deployments, or configuration changes?\n\n**STEP 2: Project Type Classification**\nBased on the information provided, I will classify the project type and set debugging strategies:\n- **Languages/Frameworks**: Primary tech stack\n- **Build System**: Maven, Gradle, npm, etc.\n- **Testing Framework**: JUnit, Jest, pytest, etc.\n- **Logging System**: Available logging mechanisms\n- **Architecture**: Monolithic, microservices, distributed, serverless, etc.\n\n**STEP 3: Complexity Assessment**\nI will analyze the bug complexity using these criteria:\n- **Simple**: Single function/method, clear error path, minimal dependencies\n- **Standard**: Multiple components, moderate investigation required\n- **Complex**: Cross-system issues, race conditions, complex state management\n\n**STEP 4: Automation Level Selection**\nAsk the user: \"What automation level would you prefer for this investigation?\"\n- **High**: Auto-approve decisions with confidence >8.0, minimal confirmations\n- **Medium**: Standard confirmations for key decisions\n- **Low**: Extra confirmations for safety, manual approval for all changes\n\n**OUTPUTS**: Set context variables:\n- `projectType`, `bugComplexity`, `debuggingMechanism`\n- `isDistributed` (true if architecture involves microservices/distributed systems)\n- `automationLevel` (High/Medium/Low based on user preference)",
76
+ "agentRole": "You are a senior debugging specialist and bug triage expert with 15+ years of experience across multiple technology stacks. Your expertise lies in quickly classifying bugs, understanding project architectures, and determining appropriate investigation strategies. You excel at extracting critical information from bug reports and setting up systematic investigation approaches.",
77
+ "guidance": [
78
+ "CLASSIFICATION ACCURACY: Proper complexity assessment determines investigation depth - be thorough but decisive",
79
+ "CONTEXT CAPTURE: Gather complete environmental and situational context now to avoid gaps later",
80
+ "DEBUGGING STRATEGY: Choose debugging mechanisms appropriate for the project type and bug complexity",
81
+ "NO ASSUMPTIONS: If critical information is missing, explicitly request it before proceeding"
82
+ ]
83
+ },
84
+ {
85
+ "id": "phase-0a-assumption-check",
86
+ "title": "Phase 0a: Assumption Verification Checkpoint",
87
+ "prompt": "**ASSUMPTION CHECK** - Before proceeding, verify key assumptions to prevent bias.\n\n**VERIFY**:\n1. **Data State**: Confirm variable types and null handling\n2. **API/Library**: Check documentation for actual vs assumed behavior\n3. **Environment**: Verify bug exists in clean environment\n4. **Recent Changes**: Review last 5 commits for relevance\n\n**OUTPUT**: List verified assumptions with evidence sources.",
88
+ "agentRole": "You are a skeptical analyst who challenges every assumption. Question everything that hasn't been explicitly verified.",
89
+ "guidance": [
90
+ "Use analysis tools to verify, don't assume",
91
+ "Document each assumption with its verification method",
92
+ "Flag any unverifiable assumptions for tracking",
93
+ "CHECK API DOCS: Never assume function behavior from names - verify actual documentation",
94
+ "VERIFY DATA TYPES: Use debugger or logs to confirm actual runtime types and values",
95
+ "TEST ENVIRONMENT: Reproduce in minimal environment to rule out configuration issues"
96
+ ]
97
+ },
98
+ {
99
+ "id": "phase-0b-user-preferences",
100
+ "title": "Phase 0b: Identify User Debugging Preferences",
101
+ "prompt": "**USER DEBUGGING PREFERENCES** - Identify and document user-specific debugging preferences.\n\n**CHECK FOR PREFERENCES IN:**\n1. **User Settings/Memory**: Any stored debugging preferences\n2. **Project Documentation**: Team debugging standards\n3. **Previous Instructions**: Past user guidance on debugging approach\n\n**CATEGORIZE PREFERENCES:**\n- **Debugging Tools**: Preference for debugger vs logs vs traces\n- **Log Verbosity**: Detailed vs concise output\n- **Output Format**: Structured logs vs human-readable\n- **Testing Approach**: Unit tests vs integration tests focus\n- **Commit Style**: Conventional commits vs descriptive\n- **Documentation**: Inline comments vs separate docs\n- **Error Handling**: Fail fast vs defensive programming\n\n**IF NO EXPLICIT PREFERENCES:**\nAsk user:\n- \"Do you prefer verbose logging or concise summaries?\"\n- \"Should I use interactive debuggers or rely on log analysis?\"\n- \"Any specific tools or approaches your team prefers?\"\n\n**OUTPUT**: Set `userDebugPreferences` context variable with categorized preferences.\n\n**APPLY**: Use applyDebugPreferences() throughout investigation to adapt approach.",
102
+ "agentRole": "You are a debugging preferences specialist who understands how different teams and developers approach problem-solving. You excel at identifying and applying user-specific debugging styles.",
103
+ "guidance": [
104
+ "This step ensures the investigation aligns with user/team practices",
105
+ "Capture both explicit and implicit preferences",
106
+ "Default to standard practices if no preferences found",
107
+ "These preferences will be applied throughout the workflow"
108
+ ],
109
+ "requireConfirmation": false
110
+ },
111
+ {
112
+ "id": "phase-0c-reproducibility-loop",
113
+ "type": "loop",
114
+ "title": "Phase 0c: Reproducibility Verification Loop",
115
+ "loop": {
116
+ "type": "for",
117
+ "count": 3,
118
+ "maxIterations": 3,
119
+ "iterationVar": "reproductionAttempt"
120
+ },
121
+ "body": [
122
+ {
123
+ "id": "reproduce-bug",
124
+ "title": "Reproduction Attempt {{reproductionAttempt}}/3",
125
+ "prompt": "**REPRODUCTION ATTEMPT {{reproductionAttempt}}/3**\n\nExecute the provided reproduction steps:\n1. Follow exact steps from bug report\n2. Document outcome (Success/Failure)\n3. Note any variations in behavior\n4. Capture error messages/stack traces\n\n**Update context:**\n- Set `reproductionResults[{{reproductionAttempt - 1}}]` = true/false\n- If failed, document why\n- Track any intermittent patterns",
126
+ "agentRole": "You are systematically verifying bug reproducibility to ensure solid investigation foundation.",
127
+ "guidance": [
128
+ "Execute exactly as specified",
129
+ "Document any deviations",
130
+ "Capture all error details"
131
+ ],
132
+ "requireConfirmation": false
133
+ }
134
+ ],
135
+ "requireConfirmation": false
136
+ },
137
+ {
138
+ "id": "phase-0d-reproducibility-assessment",
139
+ "title": "Phase 0d: Reproducibility Assessment",
140
+ "prompt": "**ASSESS REPRODUCIBILITY**\n\nBased on 3 reproduction attempts:\n- **Success Rate**: Calculate percentage\n- **Pattern Analysis**: Identify any intermittent patterns\n- **Minimal Reproduction**: Create simplified test case if needed\n\n**DECISION:**\n- If 100% reproducible: Proceed to Phase 1\n- If intermittent: Apply stress techniques and document patterns\n- If 0% reproducible: Request more information from user\n\n**Set `isReproducible` = true/false based on assessment**",
141
+ "agentRole": "You are assessing reproduction results to determine investigation viability.",
142
+ "guidance": [
143
+ "100% reproduction is ideal but not always required",
144
+ "Document intermittent patterns for investigation",
145
+ "Create minimal test case for complex scenarios"
146
+ ],
147
+ "validationCriteria": [
148
+ {
149
+ "type": "contains",
150
+ "value": "reproducib",
151
+ "message": "Must make reproducibility determination"
152
+ }
153
+ ],
154
+ "hasValidation": true,
155
+ "runCondition": {
156
+ "var": "reproductionAttempt",
157
+ "equals": 3
158
+ }
159
+ },
160
+ {
161
+ "id": "phase-0e-tool-check",
162
+ "title": "Phase 0e: Tool Availability Verification",
163
+ "runCondition": {
164
+ "var": "isReproducible",
165
+ "equals": true
166
+ },
167
+ "prompt": "**TOOL AVAILABILITY CHECK** - Verify required debugging tools before investigation.\n\n**CORE TOOLS CHECK:**\n1. **Analysis Tools**:\n - grep_search: Text pattern searching\n - read_file: File content reading\n - codebase_search: Semantic code search\n - Test availability, note any failures\n\n2. **Git Operations**:\n - Check git availability: `git --version`\n - If unavailable, set `gitAvailable = false`\n - Plan fallback: manual change tracking\n\n3. **Build/Test Tools** (based on projectType):\n - npm/yarn for JavaScript\n - Maven/Gradle for Java\n - pytest/unittest for Python\n - Document which are available\n\n4. **Debugging Tools**:\n - Language-specific debuggers\n - Profilers if needed\n - Log aggregation tools\n\n**FALLBACK STRATEGIES:**\n- grep_search fails β†’ use file_search\n- codebase_search fails β†’ use grep_search with context\n- Git unavailable β†’ track changes in INVESTIGATION_CONTEXT.md\n- Build tools missing β†’ focus on static analysis\n\n**OUTPUT**:\n- Set `availableTools` context variable\n- Set `toolLimitations` with any restrictions\n- Document fallback strategies in context\n\n**ADAPTATION**: Adjust investigation approach based on available tools.",
168
+ "agentRole": "You are a tool availability specialist ensuring the investigation can proceed smoothly with available resources. You excel at creating fallback strategies.",
169
+ "guidance": [
170
+ "Test each tool category systematically",
171
+ "Don't fail if some tools are unavailable - adapt",
172
+ "Document limitations clearly for user awareness",
173
+ "Prefer degraded functionality over investigation failure"
174
+ ],
175
+ "requireConfirmation": false
176
+ },
177
+ {
178
+ "id": "phase-0f-create-context",
179
+ "title": "Phase 0f: Initialize Investigation Context",
180
+ "runCondition": {
181
+ "var": "isReproducible",
182
+ "equals": true
183
+ },
184
+ "prompt": "**CREATE INVESTIGATION CONTEXT**\n\nUse createInvestigationBranch(), then create INVESTIGATION_CONTEXT.md with:\n\n1. **Bug Summary**: ID, description, complexity, reproducibility, status, automation level\n2. **Progress Tracking**: Use visualProgress() to show phases completed/remaining\n3. **Environment**: Project type, debugging mechanism, architecture, tools, user preferences\n4-8. **Section Placeholders**: Analysis, Hypotheses, Evidence, Experiments, Dead Ends\n9. **Function Definitions**: Include all from metaGuidance\n10. **Resumption Instructions**:\n - workflow_get: id=\"systematic-bug-investigation-with-loops\", mode=\"preview\"\n - workflow_next: JSON with workflowId, completedSteps, context variables\n\n**Key Variables**: bugComplexity, projectType, isReproducible, debuggingMechanism, isDistributed, automationLevel, userDebugPreferences, availableTools\n\n**Set contextInitialized = true**",
185
+ "agentRole": "You are creating the central documentation hub for this investigation. This document will track all progress, findings, and enable seamless handoffs.",
186
+ "guidance": [
187
+ "Create a comprehensive but scannable document",
188
+ "Include all context variables discovered so far",
189
+ "Set up structure for future updates",
190
+ "Include function definitions for reference",
191
+ "Update the resumption JSON after each major phase using addResumptionJson()",
192
+ "Always include the workflow_get and workflow_next instructions for proper resumption"
193
+ ],
194
+ "requireConfirmation": false
195
+ },
196
+ {
197
+ "id": "phase-1-iterative-analysis",
198
+ "type": "loop",
199
+ "title": "Phase 1: Multi-Dimensional Codebase Analysis",
200
+ "runCondition": {
201
+ "var": "isReproducible",
202
+ "equals": true
203
+ },
204
+ "loop": {
205
+ "type": "for",
206
+ "count": 4,
207
+ "maxIterations": 4,
208
+ "iterationVar": "analysisPhase"
209
+ },
210
+ "body": [
211
+ {
212
+ "id": "analysis-iteration",
213
+ "title": "Analysis {{analysisPhase}}/4",
214
+ "prompt": "{{analysisPhase === 1 ? '**BREADTH SCAN**\\n\\n1. **Error Mapping**: grep_search errors, trace logs, map stack traces\\n2. **Component Discovery**: Find all interacting components using codebase_search\\n3. **Data Flow**: Trace data through bug area, transformations, persistence\\n4. **Recent Changes**: Git history last 10 commits\\n\\n**Output**: BreadthAnalysis.md with interaction map' : analysisPhase === 2 ? '**COMPONENT DEEP DIVE**\\n\\nUse recursiveAnalysis(component, 3) on top 5 suspicious components:\\n\\n1. **L1 Direct**: Read complete file, state management, error handling\\n2. **L2 Dependencies**: Follow imports, contracts, version compatibility\\n3. **L3 Integration**: System fit, side effects, concurrency, resources\\n\\n**Output**: ComponentAnalysis.md with deep insights' : analysisPhase === 3 ? '**DEPENDENCY & FLOW ANALYSIS**\\n\\n1. **Static Graph**: Import tree, circular deps, hidden dependencies\\n2. **Runtime Flow**: Execution paths, async flows, state changes\\n3. **Data Pipeline**: Track transformations, validation, corruption points\\n4. **Integration**: External services, DB, queues, filesystem\\n\\n**Output**: FlowAnalysis.md with diagrams' : '**TEST COVERAGE ANALYSIS**\\n\\nUse analyzeTests(component) for each suspicious component:\\n\\n1. **Direct Coverage**: Find tests, analyze coverage gaps, quality\\n2. **Integration Tests**: Bug area tests, assumptions, flaky tests\\n3. **History**: When added/modified, correlation with bug\\n4. **Debug Execution**: Run with debug flags, instrument, compare\\n\\n**Output**: TestAnalysis.md with coverage gaps matrix'}}",
215
+ "agentRole": "You are performing systematic analysis phase {{analysisPhase}} of 4. Your focus is {{analysisPhase === 1 ? 'casting a wide net to find all potentially related components' : analysisPhase === 2 ? 'deep diving into the most suspicious components to understand their internals' : analysisPhase === 3 ? 'tracing how components connect and data flows between them' : 'leveraging existing tests to understand expected behavior and find coverage gaps'}}.",
216
+ "guidance": [
217
+ "This is analysis phase {{analysisPhase}} of 4 total phases",
218
+ "Phase 1 = Breadth Scan, Phase 2 = Deep Dive, Phase 3 = Dependencies, Phase 4 = Tests",
219
+ "Each phase builds on previous findings",
220
+ "Create a structured markdown file for each phase output",
221
+ "Use the function definitions for standardized operations",
222
+ "If you discover the bug's root cause with high confidence, note it but complete all analysis phases for thoroughness",
223
+ "Update INVESTIGATION_CONTEXT.md after each phase: use updateInvestigationContext('Analysis Findings', phase-specific findings)",
224
+ "In Phase 1 (Breadth Scan): Use findSimilarBugs() to search for historical patterns",
225
+ "After all 4 phases complete, use trackInvestigation('Phase 1 Complete', 'Moving to Hypothesis Development')"
226
+ ],
227
+ "requireConfirmation": false
228
+ }
229
+ ],
230
+ "requireConfirmation": false
231
+ },
232
+ {
233
+ "id": "phase-1a-binary-search",
234
+ "title": "Phase 1a: Binary Search Isolation",
235
+ "runCondition": {
236
+ "or": [
237
+ {"var": "bugType", "equals": "regression"},
238
+ {"var": "searchSpace", "equals": "large"}
239
+ ]
240
+ },
241
+ "prompt": "**BINARY SEARCH** - Apply divide-and-conquer:\n\n1. Identify GOOD state (working) and BAD state (broken)\n2. Find midpoint in history/code/data\n3. Test midpoint state\n4. Narrow to relevant half\n5. Document reduced search space\n\n**OUTPUT**: Narrowed location with evidence.",
242
+ "agentRole": "You are a systematic investigator using algorithmic search to efficiently isolate issues.",
243
+ "guidance": [
244
+ "VERSION CONTROL: Use 'git bisect' or equivalent for commit history searches",
245
+ "DATA PIPELINE: Test data at pipeline midpoints to isolate transformation issues",
246
+ "TIME WINDOWS: For time-based issues, binary search through timestamps",
247
+ "DOCUMENT BOUNDARIES: Clearly record each tested boundary and result",
248
+ "EFFICIENCY: Each test should eliminate ~50% of remaining search space"
249
+ ]
250
+ },
251
+ {
252
+ "id": "phase-1b-test-reduction",
253
+ "title": "Phase 1b: Test Case Minimization",
254
+ "runCondition": {
255
+ "var": "bugSource",
256
+ "equals": "failing_test"
257
+ },
258
+ "prompt": "**TEST REDUCTION** - Simplify failing test:\n\n1. Inline called methods into test\n2. Add earlier assertion to fail sooner\n3. Remove code after new failure point\n4. Repeat until minimal\n\n**OUTPUT**: Minimal failing test case.",
259
+ "agentRole": "You are a surgical debugger who strips away layers to reveal core issues.",
260
+ "guidance": [
261
+ "PRESERVE FAILURE: Each reduction must maintain the original failure mode",
262
+ "INLINE AGGRESSIVELY: Replace method calls with their actual implementation",
263
+ "FAIL EARLY: Move assertions up to find earliest deviation from expected state",
264
+ "REMOVE RUTHLESSLY: Delete all code that doesn't contribute to the failure",
265
+ "CLARITY GOAL: Final test should make the bug obvious to any reader"
266
+ ]
267
+ },
268
+ {
269
+ "id": "phase-2a-hypothesis-development",
270
+ "title": "Phase 2a: Hypothesis Development & Prioritization",
271
+ "prompt": "**HYPOTHESIS GENERATION** - Based on codebase analysis, formulate testable hypotheses about the bug's root cause.\n\n**STEP 1: Evidence-Based Hypothesis Development**\nCreate maximum 5 prioritized hypotheses. Each includes:\n- **Root Cause Theory**: Specific technical explanation\n- **Supporting Evidence**: Code patterns/logic flows supporting this theory\n- **Failure Mechanism**: Exact sequence leading to observed bug\n- **Testability Score**: Quantified assessment (1-10) of validation ease\n- **Evidence Strength Score**: Quantified assessment (1-10) based on code findings\n\n**STEP 2: Hypothesis Prioritization Matrix**\nRank hypotheses using weighted scoring:\n- **Evidence Strength** (40%): Code analysis support for theory\n- **Testability** (35%): Validation ease with debugging instruments\n- **Impact Scope** (25%): How well this explains all symptoms\n\n**STEP 3: Pattern Integration**\nIncorporate findings from findSimilarBugs():\n- **Historical Patterns**: Similar bugs fixed previously\n- **Known Issues**: Related problems in the codebase\n- **Test Failures**: Similar test failure patterns\n- Adjust hypothesis confidence based on pattern matches\n\n**CRITICAL RULE**: All hypotheses must be based on concrete evidence from code analysis.\n\n**OUTPUTS**: Maximum 5 hypotheses with quantified scoring, ranked by priority.",
272
+ "agentRole": "You are a senior software detective and root cause analysis expert with deep expertise in systematic hypothesis formation. Your strength lies in connecting code evidence to potential failure mechanisms and creating testable theories. You excel at logical reasoning and evidence-based deduction. You must maintain rigorous quantitative standards and reject any hypothesis not grounded in concrete code evidence.",
273
+ "guidance": [
274
+ "EVIDENCE-BASED ONLY: Every hypothesis must be grounded in concrete code analysis findings with quantified evidence scores",
275
+ "HYPOTHESIS LIMITS: Generate maximum 5 hypotheses to prevent analysis paralysis",
276
+ "QUANTIFIED SCORING: Use 1-10 scales for evidence strength and testability with clear criteria"
277
+ ],
278
+ "validationCriteria": [
279
+ {
280
+ "type": "contains",
281
+ "value": "Evidence Strength Score",
282
+ "message": "Must include quantified evidence strength scoring (1-10) for each hypothesis"
283
+ },
284
+ {
285
+ "type": "contains",
286
+ "value": "Testability Score",
287
+ "message": "Must include quantified testability scoring (1-10) for each hypothesis"
288
+ }
289
+ ],
290
+ "hasValidation": true
291
+ },
292
+ {
293
+ "id": "phase-2b-hypothesis-validation-strategy",
294
+ "title": "Phase 2b: Hypothesis Validation Strategy & Documentation",
295
+ "prompt": "**HYPOTHESIS VALIDATION PLANNING** - For the top 3 hypotheses, create validation strategies and documentation.\n\n**STEP 1: Hypothesis Validation Strategy**\nFor top 3 hypotheses, define:\n- **Required Evidence**: Specific evidence to confirm/refute hypothesis\n- **Debugging Approach**: Instrumentation/tests providing evidence\n- **Success Criteria**: Results proving hypothesis correct\n- **Confidence Threshold**: Minimum evidence quality needed\n\n**STEP 2: Hypothesis Documentation**\nCreate structured registry:\n- **Hypothesis ID**: H1, H2, H3 for tracking\n- **Status**: Active, Refuted, Confirmed\n- **Evidence Log**: Supporting and contradicting evidence\n- **Validation Plan**: Specific testing approach\n\n**STEP 3: Coverage Check**\nEnsure hypotheses cover diverse categories (logic, state, dependencies) with deep analysis.\n\n**STEP 4: Update Investigation Context**\nUse updateInvestigationContext('Hypothesis Registry', formatted hypothesis table with all details)\n\n**OUTPUTS**: Top 3 hypotheses selected for validation with structured documentation and validation plans.",
296
+ "agentRole": "You are a systematic testing strategist and documentation expert. Your strength lies in creating clear validation plans and maintaining rigorous documentation standards for hypothesis tracking and evidence collection.",
297
+ "guidance": [
298
+ "STRUCTURED DOCUMENTATION: Create formal hypothesis registry with tracking IDs and status",
299
+ "VALIDATION RIGOR: Only proceed with top 3 hypotheses that meet minimum evidence thresholds",
300
+ "COMPREHENSIVE PLANNING: Each hypothesis must have clear validation approach and success criteria"
301
+ ],
302
+ "validationCriteria": [
303
+ {
304
+ "type": "contains",
305
+ "value": "Hypothesis ID",
306
+ "message": "Must assign tracking IDs (H1, H2, H3) to each hypothesis"
307
+ },
308
+ {
309
+ "type": "regex",
310
+ "pattern": "H[1-3]",
311
+ "message": "Must use proper hypothesis ID format (H1, H2, H3)"
312
+ }
313
+ ],
314
+ "hasValidation": true
315
+ },
316
+ {
317
+ "id": "phase-2c-prepare-validation",
318
+ "title": "Phase 2c: Prepare Hypothesis Validation",
319
+ "prompt": "**PREPARE VALIDATION ARRAY** - Extract the top 3 hypotheses for systematic validation.\n\n**Create `hypothesesToValidate` array with:**\n```json\n[\n {\n \"id\": \"H1\",\n \"description\": \"[Hypothesis description]\",\n \"evidenceStrength\": [score],\n \"testability\": [score],\n \"validationPlan\": \"[Specific testing approach]\"\n },\n // ... H2, H3\n]\n```\n\n**Set context variables:**\n- `hypothesesToValidate`: Array of top 3 hypotheses\n- `currentConfidence`: 0 (will be updated during validation)\n- `validationIterations`: 0 (tracks validation cycles)",
320
+ "agentRole": "You are preparing the systematic validation process by structuring hypotheses for iteration.",
321
+ "guidance": [
322
+ "Extract only the top 3 hypotheses from Phase 2b",
323
+ "Ensure each has complete validation information",
324
+ "Initialize tracking variables for the validation loop"
325
+ ],
326
+ "requireConfirmation": false
327
+ },
328
+ {
329
+ "id": "phase-2d-test-evidence-gathering",
330
+ "title": "Phase 2d: Test-Based Hypothesis Evidence",
331
+ "runCondition": {
332
+ "var": "hypothesesToValidate",
333
+ "not_equals": null
334
+ },
335
+ "prompt": "**TEST-DRIVEN HYPOTHESIS VALIDATION**\n\nFor each hypothesis in hypothesesToValidate, use checkHypothesisInTests(hypothesis):\n\n**1. Direct Test Evidence**:\n- Find tests that directly test suspected components\n- Analyze test names, descriptions, and assertions\n- Check if tests actually validate what we think\n\n**2. Indirect Test Evidence**:\n- Find tests that would fail if hypothesis is true\n- Look for integration tests touching the area\n- Check for tests that assume opposite behavior\n\n**3. Test Coverage Gaps**:\n- What aspects of hypothesis are NOT tested?\n- Where would a test have caught this bug?\n- What assumptions do tests make?\n\n**4. Test Execution Analysis**:\n- Run tests with debug instrumentation\n- Add temporary logging to tests\n- Compare test expectations vs reality\n\n**5. Historical Test Analysis**:\n- When were relevant tests last modified?\n- Were any tests disabled recently?\n- Do test changes correlate with bug appearance?\n\n**Create TestEvidence Matrix**:\n```\n| Hypothesis | Supporting Tests | Contradicting Tests | Coverage Gaps | Confidence Impact |\n|------------|------------------|---------------------|---------------|-------------------|\n| H1 | TestA, TestB | TestC (partially) | Edge case X | +2 confidence |\n```\n\n**Update each hypothesis** with test evidence findings.",
336
+ "agentRole": "You are a test analysis specialist validating hypotheses against the existing test suite. Your goal is to use tests as objective evidence for or against each hypothesis.",
337
+ "guidance": [
338
+ "Tests are the codified understanding of system behavior",
339
+ "A hypothesis contradicted by passing tests needs reconsideration",
340
+ "Missing test coverage often indicates where bugs hide",
341
+ "Update hypothesis confidence based on test evidence"
342
+ ],
343
+ "requireConfirmation": false
344
+ },
345
+ {
346
+ "id": "phase-2e-hypothesis-verification",
347
+ "type": "loop",
348
+ "title": "Phase 2e: Hypothesis Verification & Refinement",
349
+ "runCondition": {
350
+ "var": "hypothesesToValidate",
351
+ "not_equals": null
352
+ },
353
+ "loop": {
354
+ "type": "forEach",
355
+ "items": "hypothesesToValidate",
356
+ "itemVar": "hypothesis",
357
+ "indexVar": "hypothesisIndex",
358
+ "maxIterations": 10
359
+ },
360
+ "body": [
361
+ {
362
+ "id": "verify-against-code",
363
+ "title": "Deep Code Verification for {{hypothesis.id}}",
364
+ "prompt": "**DEEP VERIFICATION for {{hypothesis.id}}**\n\n**Goal**: Verify hypothesis assumptions through deep code analysis.\n\nUse recursiveAnalysis() on key components:\n\n1. **Component Analysis (3 levels deep)**:\n - Level 1: Direct implementation of suspected component\n - Level 2: All direct dependencies and callers\n - Level 3: Transitive dependencies and integration points\n\n2. **State & Data Flow Verification**:\n - How does data actually flow through this component?\n - What state transformations occur?\n - Are there hidden side effects?\n\n3. **Error Path Analysis**:\n - Trace all error handling paths\n - Find where errors could originate\n - Check error propagation matches hypothesis\n\n4. **Concurrency Check** (if applicable):\n - Race conditions possible?\n - Shared state issues?\n - Timing dependencies?\n\n**Output**: Deep verification findings for {{hypothesis.id}}",
365
+ "agentRole": "You are performing deep verification of hypothesis {{hypothesis.id}}, diving 3+ levels deep to ensure thorough understanding.",
366
+ "guidance": [
367
+ "This is verification step 1 of 3 for {{hypothesis.id}}",
368
+ "Go deeper than the initial analysis - follow every lead",
369
+ "Document any new discoveries that affect the hypothesis"
370
+ ],
371
+ "requireConfirmation": false
372
+ },
373
+ {
374
+ "id": "check-contradictions",
375
+ "title": "Search for Contradicting Evidence",
376
+ "prompt": "**CONTRADICTION SEARCH for {{hypothesis.id}}**\n\n**Goal**: Actively search for evidence that contradicts this hypothesis.\n\n1. **Code Pattern Contradictions**:\n - Search for code that assumes opposite behavior\n - Find defensive checks that prevent this scenario\n - Look for comments indicating different understanding\n\n2. **Test Contradictions**:\n - Tests that would fail if hypothesis were true\n - Tests that explicitly verify opposite behavior\n - Integration tests showing different flow\n\n3. **Historical Contradictions**:\n - Git history showing intentional design decisions\n - PRs or issues discussing this behavior\n - Documentation stating different intent\n\n4. **Runtime Contradictions**:\n - Logs showing successful execution through suspected path\n - Metrics indicating normal behavior\n - Other systems depending on current behavior\n\n**Be a skeptic** - try to disprove {{hypothesis.id}}",
377
+ "agentRole": "You are a skeptical investigator trying to find flaws in hypothesis {{hypothesis.id}}.",
378
+ "guidance": [
379
+ "Actively search for contradicting evidence",
380
+ "Check assumptions against reality",
381
+ "Consider alternative explanations"
382
+ ],
383
+ "requireConfirmation": false
384
+ },
385
+ {
386
+ "id": "refine-or-replace",
387
+ "title": "Refine Hypothesis {{hypothesis.id}}",
388
+ "prompt": "**REFINEMENT DECISION for {{hypothesis.id}}**\n\nBased on deep verification and contradiction search:\n\n1. **Assessment**:\n - New evidence supporting: [list]\n - New evidence contradicting: [list]\n - Unverified assumptions: [list]\n - Confidence change: [+/- points]\n\n2. **Refinement Options**:\n - **Keep as-is**: Evidence strongly supports current formulation\n - **Refine**: Adjust hypothesis based on new understanding\n - **Replace**: Fundamentally flawed, create new hypothesis\n - **Merge**: Combine with another hypothesis\n\n3. **If Refining/Replacing**:\n - Update hypothesis description\n - Adjust evidence strength score\n - Revise validation plan\n - Document why changed\n\n4. **Update Context**:\n - Use updateInvestigationContext('Hypothesis Registry', updated hypothesis)\n - Note verification findings\n\n**Output**: Updated hypothesis with refined understanding",
389
+ "agentRole": "You are making the final decision on hypothesis {{hypothesis.id}} based on verification findings.",
390
+ "guidance": [
391
+ "Be willing to change hypotheses based on evidence",
392
+ "Document all changes and reasoning",
393
+ "Update confidence scores appropriately"
394
+ ],
395
+ "requireConfirmation": false
396
+ }
397
+ ],
398
+ "requireConfirmation": false
399
+ },
400
+ {
401
+ "id": "phase-3-4-5-validation-loop",
402
+ "type": "loop",
403
+ "title": "Hypothesis Validation Loop (Phases 3-4-5)",
404
+ "loop": {
405
+ "type": "forEach",
406
+ "items": "hypothesesToValidate",
407
+ "itemVar": "currentHypothesis",
408
+ "indexVar": "hypothesisIndex",
409
+ "maxIterations": 5
410
+ },
411
+ "body": [
412
+ {
413
+ "id": "loop-phase-3-instrumentation",
414
+ "title": "Phase 3: Debug Instrumentation for {{currentHypothesis.id}}",
415
+ "prompt": "**DEBUGGING INSTRUMENTATION for {{currentHypothesis.id}}**\n\n**Hypothesis**: {{currentHypothesis.description}}\n\n**IMPLEMENT SMART LOGGING**:\n\n1. **Standard Format**: Use instrumentCode(location, '{{currentHypothesis.id}}')\n ```\n className.methodName [{{currentHypothesis.id}}] {timestamp}: Specific message\n ```\n\n2. **Deduplication Implementation**:\n ```javascript\n // Add to each instrumentation point\n const debugState = { lastMsg: '', count: 0 };\n function smartLog(msg) {\n if (debugState.lastMsg === msg) {\n debugState.count++;\n if (debugState.count % 10 === 0) {\n console.log(`[{{currentHypothesis.id}}] ${msg} x${debugState.count}`);\n }\n } else {\n if (debugState.count > 1) {\n console.log(`[{{currentHypothesis.id}}] Previous message x${debugState.count}`);\n }\n console.log(`[{{currentHypothesis.id}}] ${msg}`);\n debugState.lastMsg = msg;\n debugState.count = 1;\n }\n }\n ```\n\n3. **Operation Grouping**:\n ```javascript\n console.log(`=== {{currentHypothesis.id}}: Operation ${opName} Start ===`);\n const startTime = Date.now();\n // ... operation code with smartLog() calls ...\n console.log(`=== {{currentHypothesis.id}}: Operation ${opName} End (${Date.now() - startTime}ms) ===`);\n ```\n\n4. **Test Instrumentation**:\n - Add debugging to relevant test files\n - Instrument test setup/teardown\n - Log test assumptions vs actual behavior\n\n5. **High-Frequency Aggregation**:\n - For loops/iterations, log summary every 100 iterations\n - For events, create time-window summaries\n - Track unique values and their counts\n\n**OUTPUT**: Instrumented code ready to produce clean, manageable logs for {{currentHypothesis.id}}",
416
+ "agentRole": "You are instrumenting code specifically to validate hypothesis {{currentHypothesis.id}}. Focus on targeted evidence collection.",
417
+ "guidance": [
418
+ "This is hypothesis {{hypothesisIndex + 1}} of {{hypothesesToValidate.length}}",
419
+ "Tailor instrumentation to the specific hypothesis",
420
+ "Ensure non-intrusive implementation"
421
+ ],
422
+ "requireConfirmation": false
423
+ },
424
+ {
425
+ "id": "loop-phase-4-evidence",
426
+ "title": "Phase 4: Evidence Collection for {{currentHypothesis.id}}",
427
+ "prompt": "**EVIDENCE COLLECTION for {{currentHypothesis.id}}**\n\n**Execute instrumented code and collect evidence:**\n1. Run the instrumented test/reproduction\n2. Collect all {{currentHypothesis.id}}_DEBUG logs\n3. Analyze results against validation criteria\n4. Document evidence quality and relevance\n\n**TEST EXECUTION EVIDENCE**:\n- Run instrumented tests for {{currentHypothesis.id}}\n- Collect test debug output\n- Note any test failures or unexpected behavior\n- Compare with production bug behavior\n\n**EVIDENCE ASSESSMENT:**\n- Does evidence support {{currentHypothesis.id}}? (Yes/No/Partial)\n- Evidence quality score (1-10)\n- Contradicting evidence found?\n- Additional evidence needed?\n\n**If log volume >500 lines, use aggregateDebugLogs() and create sub-analysis prompt.**\n\n**OUTPUT**: Evidence assessment for {{currentHypothesis.id}} with quality scoring",
428
+ "agentRole": "You are collecting and analyzing evidence specifically for hypothesis {{currentHypothesis.id}}.",
429
+ "guidance": [
430
+ "Focus on evidence directly related to this hypothesis",
431
+ "Be objective in assessment - negative evidence is valuable",
432
+ "Track evidence quality quantitatively"
433
+ ],
434
+ "requireConfirmation": false
435
+ },
436
+ {
437
+ "id": "loop-phase-5-synthesis",
438
+ "title": "Phase 5: Evidence Synthesis for {{currentHypothesis.id}}",
439
+ "prompt": "**EVIDENCE SYNTHESIS for {{currentHypothesis.id}}**\n\n**Synthesize findings:**\n1. **Evidence Summary**: What did we learn about {{currentHypothesis.id}}?\n2. **Confidence Update**: Based on evidence, rate confidence this is the root cause (0-10)\n3. **Status Update**: Mark hypothesis as Confirmed/Refuted/Needs-More-Evidence\n\n**If {{currentHypothesis.id}} is confirmed with high confidence (>8.0):**\n- Set `rootCauseFound` = true\n- Set `rootCauseHypothesis` = {{currentHypothesis.id}}\n- Update `currentConfidence` with confidence score\n\n**If all hypotheses validated but confidence <9.0:**\n- Consider additional investigation needs\n- Document what evidence is still missing\n\n**Context Update**:\n- Use updateInvestigationContext('Evidence Log', evidence summary for {{currentHypothesis.id}})\n- Every 3 iterations: Use trackInvestigation('Validation Progress', '{{hypothesisIndex + 1}}/{{hypothesesToValidate.length}} hypotheses validated')",
440
+ "agentRole": "You are synthesizing evidence to determine if {{currentHypothesis.id}} is the root cause.",
441
+ "guidance": [
442
+ "Update hypothesis status based on evidence",
443
+ "Track overall investigation confidence",
444
+ "Be ready to exit loop if root cause found with high confidence"
445
+ ],
446
+ "requireConfirmation": false
447
+ }
448
+ ],
449
+ "runCondition": {
450
+ "and": [
451
+ { "var": "rootCauseFound", "not_equals": true },
452
+ { "var": "validationIterations", "lt": 3 }
453
+ ]
454
+ },
455
+ "requireConfirmation": false
456
+ },
457
+ {
458
+ "id": "phase-4a-controlled-experimentation",
459
+ "title": "Phase 4a: Controlled Code Experiments",
460
+ "runCondition": {
461
+ "var": "currentConfidence",
462
+ "lt": 8.0
463
+ },
464
+ "prompt": "**CONTROLLED EXPERIMENTATION** - When observation isn't enough, experiment!\n\n**Current Top Hypothesis**: {{hypothesesToValidate[0].id}} (Confidence: {{currentConfidence}}/10)\n\n**EXPERIMENT TYPES** (use controlledModification()):\n\n1. **Guard Additions (Non-Breaking)**:\n ```javascript\n // Add defensive check that logs but doesn't change behavior\n if (unexpectedCondition) {\n console.error('[H1_GUARD] Unexpected state detected:', state);\n // Continue normal execution\n }\n ```\n\n2. **Assertion Injections**:\n ```javascript\n // Add assertion that would fail if hypothesis is correct\n console.assert(expectedCondition, '[H1_ASSERT] Hypothesis H1 violated!');\n ```\n\n3. **Minimal Fix Test**:\n ```javascript\n // Apply minimal fix for hypothesis, see if bug disappears\n if (process.env.DEBUG_FIX_H1 === 'true') {\n // Apply hypothesized fix\n return fixedBehavior();\n }\n ```\n\n4. **Controlled Breaking**:\n ```javascript\n // Temporarily break suspected component to verify involvement\n if (process.env.DEBUG_BREAK_H1 === 'true') {\n throw new Error('[H1_BREAK] Intentionally breaking to test hypothesis');\n }\n ```\n\n**PROTOCOL**:\n1. Choose experiment type based on confidence and risk\n2. Implement modification with clear DEBUG markers\n3. Use createInvestigationBranch() if not already on investigation branch\n4. Commit: `git commit -m \"DEBUG: {{experiment_type}} for {{currentHypothesis.id}}\"`\n5. Run reproduction steps\n6. Use collectEvidence() to gather results\n7. Revert changes: `git revert HEAD`\n8. Document results in ExperimentResults/{{currentHypothesis.id}}.md\n\n**SAFETY LIMITS**:\n- Max 3 experiments per hypothesis\n- Each experiment in separate commit\n- Always revert after evidence collection\n- Document everything in INVESTIGATION_CONTEXT.md\n\n**UPDATE**:\n- Hypothesis confidence based on experimental results\n- Use updateInvestigationContext('Experiment Results', experiment details and outcomes)\n- Track failed experiments in 'Dead Ends & Lessons' section",
465
+ "agentRole": "You are a careful experimenter using controlled code modifications to validate hypotheses. Safety and reversibility are paramount.",
466
+ "guidance": [
467
+ "Start with non-breaking experiments (guards, logs)",
468
+ "Only use breaking experiments if essential",
469
+ "Every change must be easily reversible",
470
+ "Document rationale for each experiment type",
471
+ "Consider test environment experiments first"
472
+ ],
473
+ "requireConfirmation": {
474
+ "or": [
475
+ {"var": "automationLevel", "equals": "Low"},
476
+ {"var": "automationLevel", "equals": "Medium"},
477
+ {"and": [
478
+ {"var": "automationLevel", "equals": "High"},
479
+ {"var": "currentConfidence", "lt": 6.0}
480
+ ]}
481
+ ]
482
+ },
483
+ "validationCriteria": [
484
+ {
485
+ "type": "contains",
486
+ "value": "commit",
487
+ "message": "Must specify commit message for experiment"
488
+ }
489
+ ]
490
+ },
491
+ {
492
+ "id": "phase-3a-observability-setup",
493
+ "title": "Phase 3a: Distributed System Observability",
494
+ "runCondition": {
495
+ "var": "isDistributed",
496
+ "equals": true
497
+ },
498
+ "prompt": "**OBSERVABILITY** - Set up three-pillar strategy:\n\n**METRICS**: Identify key indicators (latency, errors)\n**TRACES**: Enable request path tracking\n**LOGS**: Ensure correlation IDs present\n\n**OUTPUT**: Observability checklist completed.",
499
+ "agentRole": "You are a distributed systems expert who thinks in terms of emergent behaviors and system-wide patterns.",
500
+ "guidance": [
501
+ "METRICS SELECTION: Focus on RED metrics (Rate, Errors, Duration) for each service",
502
+ "TRACE COVERAGE: Ensure spans cover all service boundaries and key operations",
503
+ "CORRELATION IDS: Verify IDs propagate through entire request lifecycle",
504
+ "AGGREGATION READY: Set up centralized collection for cross-service analysis",
505
+ "BASELINE ESTABLISHMENT: Capture normal behavior metrics for comparison"
506
+ ]
507
+ },
508
+ {
509
+ "id": "phase-4a-distributed-evidence",
510
+ "title": "Phase 4a: Multi-Service Evidence Collection",
511
+ "runCondition": {
512
+ "var": "isDistributed",
513
+ "equals": true
514
+ },
515
+ "prompt": "**DISTRIBUTED ANALYSIS**:\n\n1. Check METRICS for anomalies\n2. Follow TRACES for request path\n3. Correlate LOGS across services\n4. Identify cascade points\n\n**OUTPUT**: Service interaction map with failure points.",
516
+ "agentRole": "You are a systems detective who can trace failures across service boundaries.",
517
+ "guidance": [
518
+ "ANOMALY DETECTION: Look for deviations in latency, error rates, or traffic patterns",
519
+ "TRACE ANALYSIS: Follow request ID through all services to find failure point",
520
+ "LOG CORRELATION: Use timestamp windows and correlation IDs to link events",
521
+ "CASCADE IDENTIFICATION: Look for timeout chains or error propagation patterns",
522
+ "VISUAL MAPPING: Create service dependency diagram with failure annotations"
523
+ ]
524
+ },
525
+ {
526
+ "id": "phase-4b-cognitive-reset",
527
+ "title": "Phase 4b: Cognitive Reset & Progress Review",
528
+ "runCondition": {
529
+ "var": "validationIterations",
530
+ "gte": 2
531
+ },
532
+ "prompt": "**COGNITIVE RESET** - Step back and review:\n\n1. Summarize findings so far\n2. List eliminated possibilities\n3. Identify investigation blind spots\n4. Reformulate approach if needed\n\n**DECIDE**: Continue current path or pivot strategy?",
533
+ "agentRole": "You are a strategic advisor who helps maintain perspective during complex investigations.",
534
+ "guidance": [
535
+ "PROGRESS SUMMARY: Write concise bullet points of key findings and eliminations",
536
+ "BLIND SPOT CHECK: What areas haven't been investigated? What assumptions remain?",
537
+ "PATTERN RECOGNITION: Look for investigation loops or repeated dead ends",
538
+ "STRATEGY EVALUATION: Is current approach yielding diminishing returns?",
539
+ "PIVOT CRITERIA: Consider new approach if last 3 iterations provided no new insights"
540
+ ]
541
+ },
542
+ {
543
+ "id": "phase-5a-final-confidence",
544
+ "title": "Phase 5a: Final Confidence Assessment",
545
+ "prompt": "**FINAL CONFIDENCE ASSESSMENT** - Evaluate the investigation results.\n\n**If root cause found (rootCauseFound = true):**\n- Review all evidence for {{rootCauseHypothesis}}\n- Perform adversarial challenge\n- Calculate final confidence score\n\n**If no high-confidence root cause:**\n- Document what was learned\n- Identify remaining unknowns\n- Recommend next investigation steps\n\n**CONFIDENCE CALCULATION:**\n- Evidence Quality (1-10)\n- Explanation Completeness (1-10)\n- Alternative Likelihood (1-10, inverted)\n- Final = (Quality Γ— 0.4) + (Completeness Γ— 0.4) + (Alternative Γ— 0.2)\n\n**CONTEXT UPDATE**:\n- Use trackInvestigation('Investigation Complete', 'Confidence: {{finalConfidence}}/10')\n- Use addResumptionJson('phase-5a-final-confidence')\n- Document lessons learned in 'Dead Ends & Lessons' section\n\n**OUTPUT**: Final confidence assessment with recommendations",
546
+ "agentRole": "You are making the final determination about the root cause with rigorous confidence assessment.",
547
+ "guidance": [
548
+ "Be honest about confidence levels",
549
+ "Document all remaining uncertainties",
550
+ "Provide clear next steps if confidence is low"
551
+ ],
552
+ "validationCriteria": [
553
+ {
554
+ "type": "regex",
555
+ "pattern": "Final.*=.*[0-9\\.]+",
556
+ "message": "Must calculate final confidence score"
557
+ }
558
+ ],
559
+ "hasValidation": true
560
+ },
561
+ {
562
+ "id": "phase-2c-hypothesis-assumptions",
563
+ "title": "Phase 2c: Hypothesis Assumption Audit",
564
+ "prompt": "**AUDIT** each hypothesis for hidden assumptions:\n\n**FOR EACH HYPOTHESIS**:\n- List implicit assumptions\n- Rate assumption confidence (1-10)\n- Identify verification approach\n\n**REJECT** hypotheses built on unverified assumptions.",
565
+ "agentRole": "You are a rigorous scientist who rejects any hypothesis not grounded in verified facts.",
566
+ "guidance": [
567
+ "EXPLICIT LISTING: Write out every assumption, no matter how obvious it seems",
568
+ "CONFIDENCE SCORING: Rate 1-10 based on evidence quality, not intuition",
569
+ "VERIFICATION PLAN: For each assumption, specify how it can be tested",
570
+ "REJECTION CRITERIA: Any assumption with confidence <7 requires verification",
571
+ "DOCUMENT RATIONALE: Explain why each assumption is accepted or needs testing"
572
+ ],
573
+ "validationCriteria": [
574
+ {
575
+ "type": "contains",
576
+ "value": "Assumption confidence",
577
+ "message": "Must rate assumption confidence for each hypothesis"
578
+ }
579
+ ],
580
+ "hasValidation": true
581
+ },
582
+ {
583
+ "id": "phase-6-diagnostic-writeup",
584
+ "title": "Phase 6: Comprehensive Diagnostic Writeup",
585
+ "prompt": "**DIAGNOSTIC WRITEUP** - Create DIAGNOSTIC_REPORT.md:\n\n1. **Executive Summary**: Bug description, root cause, confidence, scope\n2. **Technical Deep Dive**: Root cause analysis, code locations, execution flow, state\n3. **Investigation Methodology**: Timeline, hypothesis evolution (H1-H5), evidence ratings\n4. **Historical Context**: findSimilarBugs() results, previous fixes, patterns, lessons\n5. **Knowledge Transfer**: Skills needed, prevention measures, action items, testing strategy\n6. **Context Finalization**: updateInvestigationContext('Final'), archive complete context\n\n**Format**: Clear sections, code snippets, 1500-3000 words\n**Goal**: Enable bug fixing, knowledge transfer, and organizational learning",
586
+ "agentRole": "You are a senior technical writer and diagnostic documentation specialist with expertise in creating comprehensive, actionable bug reports for enterprise environments. Your strength lies in translating complex technical investigations into clear, structured documentation that enables effective problem resolution, knowledge transfer, and organizational learning. You excel at creating reports that serve immediate fixing needs, long-term system improvement, and team collaboration.",
587
+ "guidance": [
588
+ "ENTERPRISE FOCUS: Write for multiple stakeholders including developers, managers, and future team members",
589
+ "KNOWLEDGE TRANSFER: Include methodology and reasoning, not just conclusions",
590
+ "COLLABORATIVE DESIGN: Structure content for peer review and team coordination",
591
+ "COMPREHENSIVE COVERAGE: Include all information needed for resolution and prevention",
592
+ "ACTIONABLE DOCUMENTATION: Provide specific, concrete next steps with clear ownership"
593
+ ]
594
+ }
595
+ ]
596
+ }