@exaudeus/workrail 0.5.0 → 0.6.1-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -72,7 +72,28 @@
|
|
|
72
72
|
"INTELLIGENT QUESTIONING: Use analysis findings to ask targeted, valuable clarifying questions.",
|
|
73
73
|
"ADAPTIVE STRUCTURE: Adjust documentation format and depth based on scope complexity and user needs.",
|
|
74
74
|
"CONTEXT PRESERVATION: Maintain detailed workflow context for seamless resumption across sessions.",
|
|
75
|
-
"COLLABORATIVE READY: Structure workflow for easy handoffs and team collaboration on large scopes."
|
|
75
|
+
"COLLABORATIVE READY: Structure workflow for easy handoffs and team collaboration on large scopes.",
|
|
76
|
+
"**VERTICAL SLICE FUNCTIONS:**",
|
|
77
|
+
"fun findSliceCandidates(scope, analysis) = 'Propose slice candidates grouped by feature/aspect (e.g., Realtime, Sending, Notifications, Storage, API). Use dependency clusters, call graphs, and data-flow boundaries. Return sliceCandidates[].'",
|
|
78
|
+
"fun chooseSliceHeuristics(preferences) = 'Select grouping heuristics: user-journey, bounded-context, subsystem, data-flow, interface-surface. Set sliceHeuristics context variable.'",
|
|
79
|
+
"fun deriveSlices(candidates, heuristics, components) = 'Generate slice definitions with: name, rationale, includedComponents[], entryPoints[], dependencies[], dataFlows[], risks[], plannedDocSections[]. Set sliceDocuments context variable.'",
|
|
80
|
+
"fun mapComponentsToSlices(components, slices) = 'Assign each component to ≥1 slice (allow overlaps if justified). Produce coverage map and set orphanComponents[]. Update Coverage Matrix by slice.'",
|
|
81
|
+
"fun checkSliceCoverageGate() = 'Ensure 100% component coverage across slices; no orphanComponents; overlaps have rationale. Update Quality Gates with status.'",
|
|
82
|
+
"fun optimizeSliceDocStructure(slices) = 'For each slice, create doc skeleton: Overview, Responsibilities, Public APIs, Data Flow, Dependencies, Error Handling, Edge Cases, Troubleshooting, Quality Signals.'",
|
|
83
|
+
"**DIVIO/TEMPLATE & QUALITY FUNCTIONS:**",
|
|
84
|
+
"fun createSliceTemplates(slices) = 'Create per-slice pages: Tutorial.md, HowTo.md, Concepts.md, Reference.md with standard headers and placeholders.'",
|
|
85
|
+
"fun enforceDivioSections(slice) = 'Verify required sections exist per page. Missing sections become blockers.'",
|
|
86
|
+
"fun generateSliceQuickstart(slice) = 'Produce a runnable Quickstart achieving first success in <5 minutes. Record commands and expected outputs.'",
|
|
87
|
+
"fun importReferenceArtifacts(slice) = 'Import OpenAPI/GraphQL/TypeDoc/Sphinx outputs into Reference.md with anchors and language tabs.'",
|
|
88
|
+
"fun addInteractiveSamples(slice) = 'Add Mermaid flow diagrams and Postman/cURL/API console samples for key paths.'",
|
|
89
|
+
"fun generateDiagrams(slice) = 'Create Mermaid diagrams: sequence (key flows), component (architecture), state (lifecycles) as applicable; save under docs/diagrams.'",
|
|
90
|
+
"fun generateTables(slice) = 'Produce tables for API endpoints, config options, errors, metrics; include columns for version, deprecation, owners.'",
|
|
91
|
+
"fun lintLinksAndCrossRefs(docs) = 'Check all intra/inter-doc links, anchors, and references; produce report and fix list.'",
|
|
92
|
+
"fun requireOwnership(slice) = 'Ensure owner metadata (team, maintainers) exists for each slice; update governance table.'",
|
|
93
|
+
"fun checkHelloWorldGate(slice) = 'Validate Quickstart is runnable, complete, and <5 minutes including prerequisites.'",
|
|
94
|
+
"fun checkLinkLintGate(docs) = 'Block progression if any broken links/anchors remain. Append evidence to progress doc.'",
|
|
95
|
+
"fun checkOwnershipGate(slices) = 'Block progression if any slice lacks owners; show missing entries and required actions.'",
|
|
96
|
+
"fun checkVisualsGate(slice) = 'Enforce presence of required visuals: ≥1 sequence diagram, ≥1 component diagram, and ≥1 key table per slice unless not applicable with rationale.'"
|
|
76
97
|
],
|
|
77
98
|
|
|
78
99
|
"steps": [
|
|
@@ -267,6 +288,34 @@
|
|
|
267
288
|
],
|
|
268
289
|
"requireConfirmation": true
|
|
269
290
|
},
|
|
291
|
+
{
|
|
292
|
+
"id": "phase-4a-vertical-slice-derivation",
|
|
293
|
+
"title": "Phase 4a: Vertical Slice Derivation & Mapping",
|
|
294
|
+
"prompt": "**VERTICAL SLICE PLANNING** - Derive a set of focused, standalone documents grouped by aspect/feature of the scope.\n\n**INPUTS**:\n- Use results from Phases 1-2 (analysis, existing docs) and answers from Phase 3 clarifications\n\n**TASKS**:\n1) Use findSliceCandidates(scope, analysis) to propose candidate slices (e.g., Realtime, Sending, Receiving, Storage, API, Admin, Observability)\n2) Use chooseSliceHeuristics(userPreferences) to pick grouping strategy (bounded context, user journey, subsystem, data flow)\n3) Use deriveSlices(sliceCandidates, sliceHeuristics, registeredComponents) to generate sliceDocuments[] with: name, rationale, includedComponents[], entryPoints[], dependencies[], dataFlows[], risks[], plannedDocSections[]\n4) Use mapComponentsToSlices(registeredComponents, sliceDocuments) to ensure every component is assigned; set orphanComponents[] if any\n5) Use optimizeSliceDocStructure(sliceDocuments) to define consistent per-slice document skeleton\n\n**OUTPUTS**:\n- Set context: useSlices=true, sliceHeuristics, sliceDocuments[]\n- Create SlicePlan.md summarizing slices with coverage table and rationales\n- Update DOCUMENTATION_CONTEXT.md with slice plan and navigation\n- If useSlices=true, plan to set documentationStructure = sliceDocuments in Phase 4\n\n**PROGRESS TRACKING**:\n- Log analysis step for slice planning\n- Update Coverage Matrix by slice and component\n- Calculate preliminary documentation units count",
|
|
295
|
+
"agentRole": "You are designing vertical slices that are cohesive, maintainable, and map cleanly to code boundaries for standalone docs that compose the whole.",
|
|
296
|
+
"guidance": [
|
|
297
|
+
"Favor slices that align with dependency clusters and clear data-flow boundaries",
|
|
298
|
+
"Avoid giant slices; keep each doc focused and navigable",
|
|
299
|
+
"Allow limited overlap only when justified by shared cross-cutting code",
|
|
300
|
+
"Ensure each slice can be read as 'every page is page one'"
|
|
301
|
+
],
|
|
302
|
+
"requireConfirmation": true
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
"id": "phase-4b-slice-coverage-gate",
|
|
306
|
+
"title": "Phase 4b: Slice Coverage Gate",
|
|
307
|
+
"prompt": "**SLICE COVERAGE GATE** - Enforce that all components are covered by at least one slice and no orphans remain.\n\n**VALIDATION**:\nUse checkSliceCoverageGate() to verify:\n- 100% of registered components mapped to sliceDocuments\n- Overlaps (components in >1 slice) have recorded rationale\n- No orphanComponents remain\n- Slice skeletons defined (optimizeSliceDocStructure)\n\n**ENFORCEMENT**:\nUse enforceProgressGates() to block if any condition fails. List specific missing components or rationale.\n\n**OUTPUT**:\n- Gate status recorded in progress doc\n- Proceed only when coverage is complete\n- Set documentationStructure = sliceDocuments when useSlices=true",
|
|
308
|
+
"agentRole": "You are enforcing coverage so vertical slices truly partition or appropriately overlay the scope without gaps.",
|
|
309
|
+
"guidance": [
|
|
310
|
+
"No component may remain unmapped",
|
|
311
|
+
"Keep slices cohesive and justifiable",
|
|
312
|
+
"Document any intentional overlaps with reasons"
|
|
313
|
+
],
|
|
314
|
+
"validationCriteria": [
|
|
315
|
+
{ "type": "contains", "value": "Slice Coverage: ✅", "message": "All components must be mapped to slices" }
|
|
316
|
+
],
|
|
317
|
+
"requireConfirmation": true
|
|
318
|
+
},
|
|
270
319
|
|
|
271
320
|
{
|
|
272
321
|
"id": "phase-4-documentation-planning",
|
|
@@ -285,86 +334,83 @@
|
|
|
285
334
|
{
|
|
286
335
|
"id": "phase-5-comprehensive-documentation",
|
|
287
336
|
"type": "loop",
|
|
288
|
-
"title": "Phase 5: Comprehensive Documentation Creation",
|
|
337
|
+
"title": "Phase 5: Comprehensive Documentation Creation (Per-Slice Divio Pages)",
|
|
289
338
|
"loop": {
|
|
290
|
-
"type": "forEach",
|
|
339
|
+
"type": "forEach",
|
|
291
340
|
"items": "documentationStructure",
|
|
292
|
-
"itemVar": "
|
|
293
|
-
"indexVar": "
|
|
294
|
-
"maxIterations":
|
|
341
|
+
"itemVar": "currentSlice",
|
|
342
|
+
"indexVar": "sliceIndex",
|
|
343
|
+
"maxIterations": 30
|
|
295
344
|
},
|
|
296
345
|
"body": [
|
|
297
346
|
{
|
|
298
|
-
"id": "
|
|
299
|
-
"title": "
|
|
300
|
-
"prompt": "
|
|
301
|
-
"agentRole": "You
|
|
347
|
+
"id": "prepare-slice-templates",
|
|
348
|
+
"title": "Prepare Slice Templates",
|
|
349
|
+
"prompt": "Use createSliceTemplates([currentSlice]) and enforceDivioSections(currentSlice).\nCreate files: Tutorial.md, HowTo.md, Concepts.md, Reference.md with headers and placeholders.\nRecord ownership metadata and navigation links.",
|
|
350
|
+
"agentRole": "You scaffold Divio-compliant pages for the slice.",
|
|
302
351
|
"guidance": [
|
|
303
|
-
"
|
|
304
|
-
"
|
|
305
|
-
"Ensure structure supports both human and agent consumption",
|
|
306
|
-
"Initialize progress tracking for this document"
|
|
352
|
+
"Ensure required sections exist on each page",
|
|
353
|
+
"Set up standard headers and page navigation"
|
|
307
354
|
],
|
|
308
355
|
"requireConfirmation": false
|
|
309
356
|
},
|
|
310
|
-
|
|
311
357
|
{
|
|
312
|
-
"id": "
|
|
313
|
-
"title": "
|
|
314
|
-
"prompt": "
|
|
315
|
-
"agentRole": "You
|
|
358
|
+
"id": "generate-quickstart-and-hello-world",
|
|
359
|
+
"title": "Generate Quickstart and Hello World Gate",
|
|
360
|
+
"prompt": "Run generateSliceQuickstart(currentSlice). Produce a minimal runnable path to success in <5 minutes.\nThen run checkHelloWorldGate(currentSlice). Block if unmet and list fixes.",
|
|
361
|
+
"agentRole": "You ensure each slice has a fast, runnable quickstart.",
|
|
362
|
+
"guidance": [
|
|
363
|
+
"Prioritize real, copy-pasteable commands",
|
|
364
|
+
"Call out prerequisites explicitly"
|
|
365
|
+
],
|
|
366
|
+
"requireConfirmation": true
|
|
367
|
+
},
|
|
368
|
+
{
|
|
369
|
+
"id": "synthesize-concepts-and-howto",
|
|
370
|
+
"title": "Write Concepts and How‑to Pages",
|
|
371
|
+
"prompt": "Populate Concepts.md with architecture, responsibilities, data flow, dependencies, and design rationale from analysis.\nPopulate HowTo.md with the top 3-7 tasks developers perform, each step-by-step with code. Cross-link to Tutorial and Reference.",
|
|
372
|
+
"agentRole": "You synthesize analysis into clear concepts and actionable how‑tos.",
|
|
316
373
|
"guidance": [
|
|
317
|
-
"
|
|
318
|
-
"
|
|
319
|
-
"Integrate user clarifications naturally",
|
|
320
|
-
"Balance depth with clarity for the intended audience"
|
|
374
|
+
"Ground every assertion in code evidence",
|
|
375
|
+
"Prefer tasks developers actually do"
|
|
321
376
|
],
|
|
322
377
|
"requireConfirmation": false
|
|
323
378
|
},
|
|
324
|
-
|
|
325
379
|
{
|
|
326
|
-
"id": "
|
|
327
|
-
"title": "
|
|
328
|
-
"prompt": "
|
|
329
|
-
"agentRole": "You
|
|
380
|
+
"id": "import-reference-and-samples",
|
|
381
|
+
"title": "Import Reference and Add Interactive Samples",
|
|
382
|
+
"prompt": "Use importReferenceArtifacts(currentSlice) to generate Reference.md from OpenAPI/TypeDoc/etc. Add language tabs.\nUse addInteractiveSamples(currentSlice) to include Mermaid diagrams and Postman/cURL.",
|
|
383
|
+
"agentRole": "You generate authoritative, current reference and interactive artifacts.",
|
|
330
384
|
"guidance": [
|
|
331
|
-
"
|
|
332
|
-
"
|
|
333
|
-
"Reference specific analysis sources for credibility",
|
|
334
|
-
"Maintain consistency across all sections"
|
|
385
|
+
"Reference must be source-of-truth and up-to-date",
|
|
386
|
+
"Include at least one sequence or flow diagram"
|
|
335
387
|
],
|
|
336
388
|
"requireConfirmation": false
|
|
337
389
|
},
|
|
338
|
-
|
|
339
390
|
{
|
|
340
|
-
"id": "
|
|
341
|
-
"title": "
|
|
342
|
-
"prompt": "
|
|
343
|
-
"agentRole": "You
|
|
391
|
+
"id": "generate-visuals-and-tables",
|
|
392
|
+
"title": "Generate Required Visuals and Tables",
|
|
393
|
+
"prompt": "Run generateDiagrams(currentSlice) to produce sequence/component/state diagrams as applicable.\nRun generateTables(currentSlice) to create API/config/errors/metrics tables with required columns.",
|
|
394
|
+
"agentRole": "You produce comprehensive visuals and structured tables for clarity.",
|
|
344
395
|
"guidance": [
|
|
345
|
-
"
|
|
346
|
-
"
|
|
347
|
-
"Ensure consistent terminology throughout",
|
|
348
|
-
"Complete all progress tracking requirements"
|
|
396
|
+
"Prefer Mermaid for diagrams stored as text",
|
|
397
|
+
"Ensure tables are complete and normalized"
|
|
349
398
|
],
|
|
350
399
|
"requireConfirmation": false
|
|
351
400
|
},
|
|
352
|
-
|
|
353
401
|
{
|
|
354
|
-
"id": "
|
|
355
|
-
"title": "Internal
|
|
356
|
-
"prompt": "
|
|
357
|
-
"agentRole": "You
|
|
402
|
+
"id": "link-lint-and-review",
|
|
403
|
+
"title": "Link Lint and Internal Review",
|
|
404
|
+
"prompt": "Run lintLinksAndCrossRefs(currentSlice.docs). Block via checkLinkLintGate(currentSlice.docs) if any broken links.\nRun checkVisualsGate(currentSlice) to enforce diagrams/tables presence.\nPerform internal review; rate documentQualityScore (≥8 required).",
|
|
405
|
+
"agentRole": "You enforce link integrity and quality standards.",
|
|
358
406
|
"guidance": [
|
|
359
|
-
"
|
|
360
|
-
"
|
|
361
|
-
"Verify technical accuracy by checking against code analysis",
|
|
362
|
-
"Assess value from both human and agent user perspectives"
|
|
407
|
+
"All links and anchors must resolve",
|
|
408
|
+
"Quality score below 8 triggers fixes"
|
|
363
409
|
],
|
|
364
410
|
"requireConfirmation": {
|
|
365
411
|
"or": [
|
|
366
|
-
{"var": "documentQualityScore", "lt": 8},
|
|
367
|
-
{"var": "automationLevel", "equals": "Low"}
|
|
412
|
+
{ "var": "documentQualityScore", "lt": 8 },
|
|
413
|
+
{ "var": "automationLevel", "equals": "Low" }
|
|
368
414
|
]
|
|
369
415
|
}
|
|
370
416
|
}
|
|
@@ -375,7 +421,7 @@
|
|
|
375
421
|
{
|
|
376
422
|
"id": "phase-5a-documentation-completion-gate",
|
|
377
423
|
"title": "Phase 5a: Documentation Completion Gate",
|
|
378
|
-
"prompt": "**DOCUMENTATION COMPLETION GATE** - Verify all components are fully documented before final integration.\n\n**COMPLETION VALIDATION**:\nUse checkCoverageGate('documentation-complete') to verify:\n- ✅ Documentation Coverage: 100%
|
|
424
|
+
"prompt": "**DOCUMENTATION COMPLETION GATE** - Verify all components and slices are fully documented before final integration.\n\n**COMPLETION VALIDATION**:\nUse checkCoverageGate('documentation-complete') and checkSliceCoverageGate() to verify:\n- ✅ Documentation Coverage: 100% components documented across slices\n- ✅ Slice Quality: All slice pages (Tutorial/How‑to/Concepts/Reference) ≥8/10\n- ✅ Quickstart: All slices pass Hello World gate\n- ✅ Links: Link-lint gate passes for all docs\n- ✅ Agent Optimization: Documents structured for agent consumption\n\n**COMPREHENSIVE CHECK**:\nUse enforceProgressGates() to ensure:\n- No components or slices remain incomplete\n- All planned documents created and validated\n- Ownership metadata present per slice\n\n**FINAL METRICS**:\nUse calculateCompletionMetrics() to compute:\n- `documentationComplete` = 100% (required)\n- `overallQualityScore` = weighted average across slices\n- `documentsAtRisk` = count of pages below quality threshold\n\n**BLOCKING CONDITIONS**:\nCannot proceed if any slice fails quickstart/links/quality or any component unmapped.\n\n**OUTPUT**: Documentation completion gate status with specific requirements if blocked",
|
|
379
425
|
"agentRole": "You are enforcing complete documentation coverage with quality standards before allowing final integration.",
|
|
380
426
|
"guidance": [
|
|
381
427
|
"No component can remain undocumented",
|
|
@@ -391,7 +437,7 @@
|
|
|
391
437
|
},
|
|
392
438
|
{
|
|
393
439
|
"type": "contains",
|
|
394
|
-
"value": "Quality
|
|
440
|
+
"value": "Slice Quality: ✅",
|
|
395
441
|
"message": "Cannot proceed until all documents meet quality standards (≥8/10)"
|
|
396
442
|
}
|
|
397
443
|
],
|
|
@@ -401,7 +447,7 @@
|
|
|
401
447
|
{
|
|
402
448
|
"id": "phase-6-final-integration",
|
|
403
449
|
"title": "Phase 6: Final Integration & Navigation Setup",
|
|
404
|
-
"prompt": "**FINAL INTEGRATION** - Create navigation, cross-references, and final documentation package.\n\n**INTEGRATION TASKS**:\n\n1. **Create
|
|
450
|
+
"prompt": "**FINAL INTEGRATION** - Create hub navigation, cross-references, and final documentation package.\n\n**INTEGRATION TASKS**:\n\n1. **Create Documentation Hub**:\n - Index page listing all slices (Tutorial, How‑to, Concepts, Reference per slice)\n - Global glossary and common patterns\n - Version switcher and changelog links\n\n2. **Cross-Reference Validation**:\n - Verify inter-slice links and \"See also\" references\n - Ensure bidirectional links where appropriate\n\n3. **Consistency Pass**:\n - Standardize terminology, style, and visuals across slices\n - Ensure code and samples use consistent languages/tabs\n\n4. **Completeness Verification**:\n - Confirm all slice gates passed (quickstart, links, quality)\n - Validate coverage against original scope boundaries\n\n5. **Agent Optimization**:\n - Add per-slice summaries and navigation metadata\n - Ensure each page is \"Every Page is Page One\" compliant\n\n**CREATE DOCUMENTATION PACKAGE**:\n- Hub index\n- Per-slice pages\n- Diagrams and samples\n- Change log\n\n**FINAL VALIDATION**: Use validateDocumentation(docs, scope)\n- Completeness and accuracy\n- Usability and maintainability\n\n**OUTPUT**: Complete, integrated documentation package ready for use",
|
|
405
451
|
"agentRole": "You are finalizing the documentation package, ensuring it works as a cohesive whole and serves its intended purpose effectively.",
|
|
406
452
|
"guidance": [
|
|
407
453
|
"Focus on the user experience of consuming this documentation",
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
"What was the last known working version or state if applicable?",
|
|
10
10
|
"Are there any time constraints or urgency factors for this investigation?",
|
|
11
11
|
"What level of system access do you have? (full codebase, limited access, production logs only)",
|
|
12
|
+
"What existing documentation is available? (README files, architecture docs, API docs, design documents, runbooks)",
|
|
13
|
+
"Do you have access to existing logs? (production logs, error logs, debug logs, metrics, traces)",
|
|
12
14
|
"Do you have preferences for handling large log volumes? (sub-chat analysis, inline summaries only, or no preference for automatic decision)"
|
|
13
15
|
],
|
|
14
16
|
"preconditions": [
|
|
@@ -109,61 +111,8 @@
|
|
|
109
111
|
"requireConfirmation": false
|
|
110
112
|
},
|
|
111
113
|
{
|
|
112
|
-
"id": "phase-0c-
|
|
113
|
-
"
|
|
114
|
-
"title": "Phase 0c: Reproducibility Verification Loop",
|
|
115
|
-
"loop": {
|
|
116
|
-
"type": "for",
|
|
117
|
-
"count": 3,
|
|
118
|
-
"maxIterations": 3,
|
|
119
|
-
"iterationVar": "reproductionAttempt"
|
|
120
|
-
},
|
|
121
|
-
"body": [
|
|
122
|
-
{
|
|
123
|
-
"id": "reproduce-bug",
|
|
124
|
-
"title": "Reproduction Attempt {{reproductionAttempt}}/3",
|
|
125
|
-
"prompt": "**REPRODUCTION ATTEMPT {{reproductionAttempt}}/3**\n\nExecute the provided reproduction steps:\n1. Follow exact steps from bug report\n2. Document outcome (Success/Failure)\n3. Note any variations in behavior\n4. Capture error messages/stack traces\n\n**Update context:**\n- Set `reproductionResults[{{reproductionAttempt - 1}}]` = true/false\n- If failed, document why\n- Track any intermittent patterns",
|
|
126
|
-
"agentRole": "You are systematically verifying bug reproducibility to ensure solid investigation foundation.",
|
|
127
|
-
"guidance": [
|
|
128
|
-
"Execute exactly as specified",
|
|
129
|
-
"Document any deviations",
|
|
130
|
-
"Capture all error details"
|
|
131
|
-
],
|
|
132
|
-
"requireConfirmation": false
|
|
133
|
-
}
|
|
134
|
-
],
|
|
135
|
-
"requireConfirmation": false
|
|
136
|
-
},
|
|
137
|
-
{
|
|
138
|
-
"id": "phase-0d-reproducibility-assessment",
|
|
139
|
-
"title": "Phase 0d: Reproducibility Assessment",
|
|
140
|
-
"prompt": "**ASSESS REPRODUCIBILITY**\n\nBased on 3 reproduction attempts:\n- **Success Rate**: Calculate percentage\n- **Pattern Analysis**: Identify any intermittent patterns\n- **Minimal Reproduction**: Create simplified test case if needed\n\n**DECISION:**\n- If 100% reproducible: Proceed to Phase 1\n- If intermittent: Apply stress techniques and document patterns\n- If 0% reproducible: Request more information from user\n\n**Set `isReproducible` = true/false based on assessment**",
|
|
141
|
-
"agentRole": "You are assessing reproduction results to determine investigation viability.",
|
|
142
|
-
"guidance": [
|
|
143
|
-
"100% reproduction is ideal but not always required",
|
|
144
|
-
"Document intermittent patterns for investigation",
|
|
145
|
-
"Create minimal test case for complex scenarios"
|
|
146
|
-
],
|
|
147
|
-
"validationCriteria": [
|
|
148
|
-
{
|
|
149
|
-
"type": "contains",
|
|
150
|
-
"value": "reproducib",
|
|
151
|
-
"message": "Must make reproducibility determination"
|
|
152
|
-
}
|
|
153
|
-
],
|
|
154
|
-
"hasValidation": true,
|
|
155
|
-
"runCondition": {
|
|
156
|
-
"var": "reproductionAttempt",
|
|
157
|
-
"equals": 3
|
|
158
|
-
}
|
|
159
|
-
},
|
|
160
|
-
{
|
|
161
|
-
"id": "phase-0e-tool-check",
|
|
162
|
-
"title": "Phase 0e: Tool Availability Verification",
|
|
163
|
-
"runCondition": {
|
|
164
|
-
"var": "isReproducible",
|
|
165
|
-
"equals": true
|
|
166
|
-
},
|
|
114
|
+
"id": "phase-0c-tool-check",
|
|
115
|
+
"title": "Phase 0c: Tool Availability Verification",
|
|
167
116
|
"prompt": "**TOOL AVAILABILITY CHECK** - Verify required debugging tools before investigation.\n\n**CORE TOOLS CHECK:**\n1. **Analysis Tools**:\n - grep_search: Text pattern searching\n - read_file: File content reading\n - codebase_search: Semantic code search\n - Test availability, note any failures\n\n2. **Git Operations**:\n - Check git availability: `git --version`\n - If unavailable, set `gitAvailable = false`\n - Plan fallback: manual change tracking\n\n3. **Build/Test Tools** (based on projectType):\n - npm/yarn for JavaScript\n - Maven/Gradle for Java\n - pytest/unittest for Python\n - Document which are available\n\n4. **Debugging Tools**:\n - Language-specific debuggers\n - Profilers if needed\n - Log aggregation tools\n\n**FALLBACK STRATEGIES:**\n- grep_search fails → use file_search\n- codebase_search fails → use grep_search with context\n- Git unavailable → track changes in INVESTIGATION_CONTEXT.md\n- Build tools missing → focus on static analysis\n\n**OUTPUT**:\n- Set `availableTools` context variable\n- Set `toolLimitations` with any restrictions\n- Document fallback strategies in context\n\n**ADAPTATION**: Adjust investigation approach based on available tools.",
|
|
168
117
|
"agentRole": "You are a tool availability specialist ensuring the investigation can proceed smoothly with available resources. You excel at creating fallback strategies.",
|
|
169
118
|
"guidance": [
|
|
@@ -175,13 +124,9 @@
|
|
|
175
124
|
"requireConfirmation": false
|
|
176
125
|
},
|
|
177
126
|
{
|
|
178
|
-
"id": "phase-
|
|
179
|
-
"title": "Phase
|
|
180
|
-
"
|
|
181
|
-
"var": "isReproducible",
|
|
182
|
-
"equals": true
|
|
183
|
-
},
|
|
184
|
-
"prompt": "**CREATE INVESTIGATION CONTEXT**\n\nUse createInvestigationBranch(), then create INVESTIGATION_CONTEXT.md with:\n\n1. **Bug Summary**: ID, description, complexity, reproducibility, status, automation level\n2. **Progress Tracking**: Use visualProgress() to show phases completed/remaining\n3. **Environment**: Project type, debugging mechanism, architecture, tools, user preferences\n4-8. **Section Placeholders**: Analysis, Hypotheses, Evidence, Experiments, Dead Ends\n9. **Function Definitions**: Include all from metaGuidance\n10. **Resumption Instructions**:\n - workflow_get: id=\"systematic-bug-investigation-with-loops\", mode=\"preview\"\n - workflow_next: JSON with workflowId, completedSteps, context variables\n\n**Key Variables**: bugComplexity, projectType, isReproducible, debuggingMechanism, isDistributed, automationLevel, userDebugPreferences, availableTools\n\n**Set contextInitialized = true**",
|
|
127
|
+
"id": "phase-0d-create-context",
|
|
128
|
+
"title": "Phase 0d: Initialize Investigation Context",
|
|
129
|
+
"prompt": "**CREATE INVESTIGATION CONTEXT** - Initialize comprehensive tracking document.\n\nUse createInvestigationBranch() to set up version control, then create INVESTIGATION_CONTEXT.md:\n\n```markdown\n# Investigation Context\n\n## 1. Bug Summary\n- **ID**: {{bugId || 'investigation-' + Date.now()}}\n- **Description**: [from bug report]\n- **Complexity**: {{bugComplexity}}\n- **Started**: {{new Date().toISOString()}}\n- **Status**: Phase 0d - Context Initialization\n- **Automation Level**: {{automationLevel}}\n\n## 2. Progress Tracking\n{{visualProgress()}}\n✅ Completed: Phase 0 (Triage), Phase 0a (Assumptions), Phase 0b (User Preferences), Phase 0c (Tools)\n🔄 Current: Phase 0d (Context Creation)\n⏳ Remaining: Phase 1 (Analysis), Phase 2 (Hypotheses), Phase 3-5 (Validation), Phase 6 (Writeup)\n📊 Confidence: 0/10\n\n## 3. Environment & Setup\n- **Project Type**: {{projectType}}\n- **Debugging Mechanism**: {{debuggingMechanism}}\n- **Architecture**: {{isDistributed ? 'Distributed' : 'Monolithic'}}\n- **User Preferences**: {{userDebugPreferences}}\n- **Available Tools**: {{availableTools}}\n- **Tool Limitations**: {{toolLimitations || 'None'}}\n\n## 4. Analysis Findings\n*To be populated during Phase 1*\n\n## 5. Hypothesis Registry\n*To be populated during Phase 2*\n\n## 6. Evidence Log\n*To be populated during validation*\n\n## 7. Experiment Results\n*To be populated if experiments conducted*\n\n## 8. Dead Ends & Lessons\n*Track approaches that didn't work*\n\n## 9. Function Definitions\n[Include all function definitions from metaGuidance for reference]\n\n## 10. Resumption Instructions\n\n### How to Resume This Investigation\n\n1. **Get the workflow**: Call `workflow_get` with:\n - id: \"systematic-bug-investigation-with-loops\"\n - mode: \"preview\" (to see next step)\n\n2. **Resume from saved state**: Call `workflow_next` with the JSON below:\n\n```json\n{\n \"workflowId\": \"systematic-bug-investigation-with-loops\",\n \"completedSteps\": [\"phase-0-triage\", \"phase-0a-assumption-check\", \"phase-0b-user-preferences\", \"phase-0c-tool-check\", \"phase-0d-create-context\"],\n \"context\": {\n \"bugComplexity\": \"{{bugComplexity}}\",\n \"projectType\": \"{{projectType}}\",\n \"debuggingMechanism\": \"{{debuggingMechanism}}\",\n \"isDistributed\": {{isDistributed || false}},\n \"automationLevel\": \"{{automationLevel}}\",\n \"userDebugPreferences\": {{JSON.stringify(userDebugPreferences)}},\n \"availableTools\": {{JSON.stringify(availableTools)}},\n \"toolLimitations\": {{JSON.stringify(toolLimitations)}}\n }\n}\n```\n\n3. **Continue investigation**: The workflow will pick up from where it left off\n\n### Important Notes\n- Update `completedSteps` array after completing each phase\n- Preserve all context variables for proper state restoration\n- This JSON should be updated after major milestones\n```\n\n**Set `contextInitialized` = true**",
|
|
185
130
|
"agentRole": "You are creating the central documentation hub for this investigation. This document will track all progress, findings, and enable seamless handoffs.",
|
|
186
131
|
"guidance": [
|
|
187
132
|
"Create a comprehensive but scannable document",
|
|
@@ -197,10 +142,6 @@
|
|
|
197
142
|
"id": "phase-1-iterative-analysis",
|
|
198
143
|
"type": "loop",
|
|
199
144
|
"title": "Phase 1: Multi-Dimensional Codebase Analysis",
|
|
200
|
-
"runCondition": {
|
|
201
|
-
"var": "isReproducible",
|
|
202
|
-
"equals": true
|
|
203
|
-
},
|
|
204
145
|
"loop": {
|
|
205
146
|
"type": "for",
|
|
206
147
|
"count": 4,
|
|
@@ -211,15 +152,22 @@
|
|
|
211
152
|
{
|
|
212
153
|
"id": "analysis-breadth-scan",
|
|
213
154
|
"title": "Analysis 1/4: Breadth Scan",
|
|
214
|
-
"prompt": "**BREADTH SCAN
|
|
155
|
+
"prompt": "**BREADTH SCAN - Cast Wide Net**\n\nGoal: Understand full system impact and identify all potentially involved components.\n\nPerform: Error propagation mapping, Component discovery, Data flow mapping, Recent changes analysis, and Historical pattern search.\n\n**Output**: Complete BreadthAnalysis.md with component interaction map, data flow diagram, suspicious areas ranked by likelihood, and list of all potentially related files and functions.",
|
|
215
156
|
"agentRole": "You are performing systematic analysis phase 1 of 4. Your focus is casting a wide net to find all potentially related components.",
|
|
216
157
|
"guidance": [
|
|
217
158
|
"This is analysis phase 1 of 4 total phases",
|
|
218
159
|
"Phase 1 = Breadth Scan - Cast wide net for all related components",
|
|
219
160
|
"Create BreadthAnalysis.md with structured findings",
|
|
161
|
+
"ERROR PROPAGATION MAPPING: Use grep_search for all error occurrences, trace error messages across all log files, map all stack traces to identify call chains, document every point where error appears or is handled",
|
|
162
|
+
"COMPONENT DISCOVERY: Find ALL components that interact with failing area, use codebase_search \"How is [failing component] used?\", identify all callers and callees, build component interaction map, note both direct and indirect relationships",
|
|
163
|
+
"DATA FLOW MAPPING: Trace data that flows through bug area, identify all transformations applied to data, find all persistence points (database, cache, files), document complete data journey, note where data could be corrupted or lost",
|
|
164
|
+
"RECENT CHANGES ANALYSIS: Git history for all identified components, check last 10 commits affecting these areas, identify when bug likely appeared, look for related PRs or issues, note any configuration or dependency changes",
|
|
165
|
+
"HISTORICAL PATTERN SEARCH: Use findSimilarBugs() to search for similar error patterns in codebase, previous fixes to related components, related test failures in history",
|
|
220
166
|
"Use findSimilarBugs() to search for historical patterns",
|
|
221
167
|
"Use the function definitions for standardized operations",
|
|
222
|
-
"Update INVESTIGATION_CONTEXT.md after completion"
|
|
168
|
+
"Update INVESTIGATION_CONTEXT.md after completion",
|
|
169
|
+
"Be thorough - it's better to include too much than miss something critical",
|
|
170
|
+
"Document your reasoning for why each component is potentially involved"
|
|
223
171
|
],
|
|
224
172
|
"runCondition": {"var": "analysisPhase", "equals": 1},
|
|
225
173
|
"requireConfirmation": false
|
|
@@ -227,7 +175,7 @@
|
|
|
227
175
|
{
|
|
228
176
|
"id": "analysis-deep-dive",
|
|
229
177
|
"title": "Analysis 2/4: Component Deep Dive",
|
|
230
|
-
"prompt": "**COMPONENT DEEP DIVE**\n\
|
|
178
|
+
"prompt": "**COMPONENT DEEP DIVE - Understand Internals**\n\nGoal: Deep understanding of top 5 suspicious components from breadth scan.\n\nFor each component, use recursiveAnalysis(component, 3) to perform 3-level analysis: Direct Implementation (Level 1), Direct Dependencies (Level 2), and Integration Points (Level 3). Document likelihood scores, suspicious code sections, failure modes, and red flags.\n\n**Output**: ComponentAnalysis.md with deep insights for top 5 components, ranked list of most likely root cause locations, detailed notes on internals, and dependency graph showing relationships.",
|
|
231
179
|
"agentRole": "You are performing systematic analysis phase 2 of 4. Your focus is deep diving into the most suspicious components to understand their internals.",
|
|
232
180
|
"guidance": [
|
|
233
181
|
"This is analysis phase 2 of 4 total phases",
|
|
@@ -235,7 +183,13 @@
|
|
|
235
183
|
"Build on findings from Phase 1 Breadth Scan",
|
|
236
184
|
"Create ComponentAnalysis.md with structured findings",
|
|
237
185
|
"Use recursiveAnalysis() for systematic exploration",
|
|
238
|
-
"
|
|
186
|
+
"LEVEL 1 - DIRECT IMPLEMENTATION: Read COMPLETE file including private methods, understand state management and data structures, analyze error handling patterns, check initialization and cleanup logic, document all public/private APIs, identify assumptions or invariants, note TODO/FIXME comments",
|
|
187
|
+
"LEVEL 2 - DIRECT DEPENDENCIES: Follow all imports and their usage, understand dependency contracts and interfaces, check version compatibility and breaking changes, analyze coupling points and data exchange, look for shared mutable state, identify circular dependencies, document how failures could propagate",
|
|
188
|
+
"LEVEL 3 - INTEGRATION POINTS: How component fits in larger system architecture, side effects and external calls (DB, API, file system), concurrency and threading concerns, resource management (memory, connections, handles), caching and state synchronization, event handling and callbacks, configuration and environment dependencies",
|
|
189
|
+
"FOR EACH COMPONENT DOCUMENT: Likelihood score (1-10) of being root cause, specific suspicious code sections with line numbers, potential failure modes and their symptoms, dependencies that could be sources of issues, red flags (complex logic, error handling gaps, race conditions)",
|
|
190
|
+
"Update INVESTIGATION_CONTEXT.md after completion",
|
|
191
|
+
"Go deep - read entire files, not just the obvious parts",
|
|
192
|
+
"Look for subtle issues like race conditions, edge cases, and assumptions"
|
|
239
193
|
],
|
|
240
194
|
"runCondition": {"var": "analysisPhase", "equals": 2},
|
|
241
195
|
"requireConfirmation": false
|
|
@@ -243,15 +197,21 @@
|
|
|
243
197
|
{
|
|
244
198
|
"id": "analysis-dependencies",
|
|
245
199
|
"title": "Analysis 3/4: Dependencies & Flow",
|
|
246
|
-
"prompt": "**DEPENDENCY & FLOW ANALYSIS**\n\
|
|
200
|
+
"prompt": "**DEPENDENCY & FLOW ANALYSIS - Trace Connections**\n\nGoal: Understand how components interact and data flows between them.\n\nPerform: Static dependency graph analysis, Runtime flow analysis, Data transformation pipeline tracing, and Integration analysis.\n\n**Output**: FlowAnalysis.md with sequence diagrams showing execution flow, data flow maps with transformation points, complete dependency graph, list of all integration points and failure modes, and timeline showing order of operations.",
|
|
247
201
|
"agentRole": "You are performing systematic analysis phase 3 of 4. Your focus is tracing how components connect and data flows between them.",
|
|
248
202
|
"guidance": [
|
|
249
203
|
"This is analysis phase 3 of 4 total phases",
|
|
250
204
|
"Phase 3 = Dependencies - Trace connections and data flows",
|
|
251
205
|
"Build on component understanding from Phase 2",
|
|
252
206
|
"Create FlowAnalysis.md with diagrams and flow charts",
|
|
207
|
+
"STATIC DEPENDENCY GRAPH: Build complete import/dependency tree, identify circular dependencies, find hidden dependencies (reflection, dynamic loading, DI), map version constraints and compatibility, document shared libraries and utilities, note tight coupling or fragile dependencies",
|
|
208
|
+
"RUNTIME FLOW ANALYSIS: Trace execution paths to bug, identify async/concurrent flows and coordination, map state changes through execution, document control flow (conditionals, loops, exceptions), track callback chains and event handlers, identify divergence points, note timing dependencies and race conditions",
|
|
209
|
+
"DATA TRANSFORMATION PIPELINE: Track data from input to error point, document each transformation with input/output types, identify validation points and what they check, find where data could be corrupted/lost, note serialization/deserialization boundaries, track data format conversions, document enrichment/filtering steps",
|
|
210
|
+
"INTEGRATION ANALYSIS: External service calls and failure modes, database interactions (reads/writes/transactions), message queue operations and formats, file system operations and error handling, network calls and timeout handling, cache usage and invalidation, third-party library calls",
|
|
253
211
|
"Focus on runtime behavior and integration points",
|
|
254
|
-
"Update INVESTIGATION_CONTEXT.md after completion"
|
|
212
|
+
"Update INVESTIGATION_CONTEXT.md after completion",
|
|
213
|
+
"Pay special attention to async boundaries and error propagation",
|
|
214
|
+
"Look for implicit dependencies that aren't obvious from imports"
|
|
255
215
|
],
|
|
256
216
|
"runCondition": {"var": "analysisPhase", "equals": 3},
|
|
257
217
|
"requireConfirmation": false
|
|
@@ -259,15 +219,22 @@
|
|
|
259
219
|
{
|
|
260
220
|
"id": "analysis-test-coverage",
|
|
261
221
|
"title": "Analysis 4/4: Test Coverage",
|
|
262
|
-
"prompt": "**TEST COVERAGE ANALYSIS
|
|
222
|
+
"prompt": "**TEST COVERAGE ANALYSIS - Leverage Existing Knowledge**\n\nGoal: Use existing tests as source of truth about system behavior.\n\nFor each suspicious component, use analyzeTests(component) to perform: Direct test coverage analysis, Integration test analysis, Test history investigation, Test execution with debugging, and Coverage gap analysis.\n\n**Output**: TestAnalysis.md with coverage gaps matrix, suspicious test patterns, test evidence for hypotheses, recommendations for tests to add, and complete test inventory for affected components.",
|
|
263
223
|
"agentRole": "You are performing systematic analysis phase 4 of 4. Your focus is leveraging existing tests to understand expected behavior and find coverage gaps.",
|
|
264
224
|
"guidance": [
|
|
265
225
|
"This is analysis phase 4 of 4 total phases",
|
|
266
226
|
"Phase 4 = Tests - Analyze test coverage and quality",
|
|
267
227
|
"Build on all previous analysis phases",
|
|
268
228
|
"Create TestAnalysis.md with coverage gap matrix",
|
|
229
|
+
"DIRECT TEST COVERAGE: Find all tests using grep/test discovery, analyze what's tested (happy/edge/error cases), identify what's NOT tested, check test quality and assertion strength, note mocking/stubbing that might hide issues, review test names and docs",
|
|
230
|
+
"INTEGRATION TEST ANALYSIS: Find end-to-end tests for bug area, analyze assumptions/preconditions, check for flaky tests, review disabled/skipped tests and why, look for TODO/incomplete tests, identify multi-component tests, verify if tests cover failing scenario",
|
|
231
|
+
"TEST HISTORY: When were tests added/modified? Do test changes correlate with bug appearance? Were tests removed/disabled recently? Use git blame for authors and context, look for related PRs/issues, review test evolution",
|
|
232
|
+
"TEST EXECUTION WITH DEBUGGING: Run tests with debug flags (--verbose, --debug), add instrumentation to tests themselves, compare expected vs actual in detail, run in isolation and in suite, try different orderings to check dependencies, monitor resource usage",
|
|
233
|
+
"COVERAGE GAP ANALYSIS: Use coverage tools for untested code paths, map coverage to bug components, identify branches/conditions never exercised, note error handling without tests, document missing edge cases, recommend tests to add",
|
|
269
234
|
"Run tests with debug flags for additional insights",
|
|
270
|
-
"After completion, use trackInvestigation('Phase 1 Complete', 'Moving to Hypothesis Development')"
|
|
235
|
+
"After completion, use trackInvestigation('Phase 1 Complete', 'Moving to Hypothesis Development')",
|
|
236
|
+
"Tests often reveal the 'expected' behavior - compare with actual behavior",
|
|
237
|
+
"Missing tests often indicate areas where bugs hide"
|
|
271
238
|
],
|
|
272
239
|
"runCondition": {"var": "analysisPhase", "equals": 4},
|
|
273
240
|
"requireConfirmation": false
|
|
@@ -360,8 +327,29 @@
|
|
|
360
327
|
"hasValidation": true
|
|
361
328
|
},
|
|
362
329
|
{
|
|
363
|
-
"id": "phase-2c-
|
|
364
|
-
"title": "Phase 2c:
|
|
330
|
+
"id": "phase-2c-hypothesis-assumptions",
|
|
331
|
+
"title": "Phase 2c: Hypothesis Assumption Audit",
|
|
332
|
+
"prompt": "**AUDIT** each hypothesis for hidden assumptions:\n\n**FOR EACH HYPOTHESIS**:\n- List implicit assumptions\n- Rate assumption confidence (1-10)\n- Identify verification approach\n\n**REJECT** hypotheses built on unverified assumptions.",
|
|
333
|
+
"agentRole": "You are a rigorous scientist who rejects any hypothesis not grounded in verified facts.",
|
|
334
|
+
"guidance": [
|
|
335
|
+
"EXPLICIT LISTING: Write out every assumption, no matter how obvious it seems",
|
|
336
|
+
"CONFIDENCE SCORING: Rate 1-10 based on evidence quality, not intuition",
|
|
337
|
+
"VERIFICATION PLAN: For each assumption, specify how it can be tested",
|
|
338
|
+
"REJECTION CRITERIA: Any assumption with confidence <7 requires verification",
|
|
339
|
+
"DOCUMENT RATIONALE: Explain why each assumption is accepted or needs testing"
|
|
340
|
+
],
|
|
341
|
+
"validationCriteria": [
|
|
342
|
+
{
|
|
343
|
+
"type": "contains",
|
|
344
|
+
"value": "Assumption confidence",
|
|
345
|
+
"message": "Must rate assumption confidence for each hypothesis"
|
|
346
|
+
}
|
|
347
|
+
],
|
|
348
|
+
"hasValidation": true
|
|
349
|
+
},
|
|
350
|
+
{
|
|
351
|
+
"id": "phase-2d-prepare-validation",
|
|
352
|
+
"title": "Phase 2d: Prepare Hypothesis Validation",
|
|
365
353
|
"prompt": "**PREPARE VALIDATION ARRAY** - Extract the top 3 hypotheses for systematic validation.\n\n**Create `hypothesesToValidate` array with:**\n```json\n[\n {\n \"id\": \"H1\",\n \"description\": \"[Hypothesis description]\",\n \"evidenceStrength\": [score],\n \"testability\": [score],\n \"validationPlan\": \"[Specific testing approach]\"\n },\n // ... H2, H3\n]\n```\n\n**Set context variables:**\n- `hypothesesToValidate`: Array of top 3 hypotheses\n- `currentConfidence`: 0 (will be updated during validation)\n- `validationIterations`: 0 (tracks validation cycles)",
|
|
366
354
|
"agentRole": "You are preparing the systematic validation process by structuring hypotheses for iteration.",
|
|
367
355
|
"guidance": [
|
|
@@ -372,8 +360,8 @@
|
|
|
372
360
|
"requireConfirmation": false
|
|
373
361
|
},
|
|
374
362
|
{
|
|
375
|
-
"id": "phase-
|
|
376
|
-
"title": "Phase
|
|
363
|
+
"id": "phase-2e-test-evidence-gathering",
|
|
364
|
+
"title": "Phase 2e: Test-Based Hypothesis Evidence",
|
|
377
365
|
"runCondition": {
|
|
378
366
|
"var": "hypothesesToValidate",
|
|
379
367
|
"not_equals": null
|
|
@@ -389,9 +377,9 @@
|
|
|
389
377
|
"requireConfirmation": false
|
|
390
378
|
},
|
|
391
379
|
{
|
|
392
|
-
"id": "phase-
|
|
380
|
+
"id": "phase-2f-hypothesis-verification",
|
|
393
381
|
"type": "loop",
|
|
394
|
-
"title": "Phase
|
|
382
|
+
"title": "Phase 2f: Hypothesis Verification & Refinement",
|
|
395
383
|
"runCondition": {
|
|
396
384
|
"var": "hypothesesToValidate",
|
|
397
385
|
"not_equals": null
|
|
@@ -444,9 +432,67 @@
|
|
|
444
432
|
"requireConfirmation": false
|
|
445
433
|
},
|
|
446
434
|
{
|
|
447
|
-
"id": "phase-
|
|
435
|
+
"id": "phase-2g-instrumentation-planning",
|
|
436
|
+
"title": "Phase 2g: Unified Instrumentation Planning",
|
|
437
|
+
"prompt": "**UNIFIED INSTRUMENTATION PLANNING** - Plan comprehensive logging strategy for all hypotheses before implementation.\n\n**GOAL**: Create a coordinated instrumentation plan that efficiently captures evidence for all hypotheses in a single execution.\n\n**STEP 1: Hypothesis Review**\nFor each hypothesis (H1, H2, H3):\n- **Component(s)**: Which components need instrumentation?\n- **Critical Paths**: Which execution paths must be logged?\n- **Key Variables**: What state/data must be captured?\n- **Decision Points**: What conditionals/branches matter?\n- **Timing Concerns**: Any concurrency or timing-sensitive areas?\n\n**STEP 2: Identify Instrumentation Locations**\n\nFor each hypothesis, list specific locations:\n```\nH1 Instrumentation Needs:\n - File: auth/login.ts, Function: validateCredentials, Lines: 45-67\n What to log: input credentials format, validation result, error conditions\n - File: auth/session.ts, Function: createSession, Lines: 23-34\n What to log: session creation parameters, user context\n\nH2 Instrumentation Needs:\n - File: auth/session.ts, Function: createSession, Lines: 23-34 [OVERLAP with H1]\n What to log: session storage backend, timing\n - File: database/connection.ts, Function: getConnection, Lines: 89-102\n What to log: connection pool state, timeout settings\n\nH3 Instrumentation Needs:\n - File: cache/redis.ts, Function: set, Lines: 156-178\n What to log: cache key, TTL, success/failure\n```\n\n**STEP 3: Identify Overlaps**\n\nWhere do multiple hypotheses need logging at the same location?\n```\nOverlapping Instrumentation:\n - auth/session.ts:23-34: Both H1 and H2 need logs here\n Strategy: Single log point with both [H1] and [H2] prefixes capturing all needed data\n \n - No other overlaps identified\n```\n\n**STEP 4: Plan Log Format & Structure**\n\nDefine what each log should contain:\n```\nLog Format Standard:\n [HX] ClassName.methodName:{lineNum} | timestamp | specific-data\n\nH1 Log Examples:\n [H1] LoginValidator.validateCredentials:45 | 2025-10-02T10:23:45.123Z | input={email: user@example.com, hasPassword: true}\n [H1] LoginValidator.validateCredentials:52 | 2025-10-02T10:23:45.145Z | validation=FAILED reason=\"invalid format\"\n\nH2 Log Examples:\n [H2] SessionManager.createSession:23 | 2025-10-02T10:23:45.167Z | backend=redis poolSize=10\n [H2] SessionManager.createSession:28 | 2025-10-02T10:23:45.189Z | sessionId=abc123 stored=true latency=22ms\n```\n\n**STEP 5: Plan Data Capture Strategy**\n\nWhat specific data values need to be captured:\n- **H1 requires**: Credential format, validation results, error messages\n- **H2 requires**: Backend type, connection timing, pool state\n- **H3 requires**: Cache keys, TTL values, hit/miss rates\n\n**STEP 6: Consider Edge Cases**\n\n- **High-frequency locations**: Plan aggregation (e.g., log every 10th iteration)\n- **Sensitive data**: Plan redaction (e.g., mask passwords, PII)\n- **Large data structures**: Plan summarization (e.g., object size, key count, not full dump)\n- **Error paths**: Ensure error cases are logged, not just happy path\n\n**STEP 7: Create Instrumentation Implementation Plan**\n\nProduce structured plan:\n```markdown\n# Instrumentation Implementation Plan\n\n## Summary\n- Total instrumentation points: [count]\n- Overlapping locations: [count]\n- Estimated log volume: [low/medium/high]\n- Sensitive data handling: [yes/no - describe]\n\n## H1 Instrumentation (Priority: High, Evidence Strength: 8/10)\n1. Location: auth/login.ts:45-67\n Function: validateCredentials\n Log: [H1] Input format and validation result\n Frequency: Per-call (not high-frequency)\n Data: {email format, hasPassword, validation result, error}\n\n2. Location: auth/session.ts:23-34 [SHARED with H2]\n Function: createSession \n Log: [H1] Session creation context\n Frequency: Per-call\n Data: {userContext, sessionType}\n\n## H2 Instrumentation (Priority: High, Evidence Strength: 7/10)\n[Similar detailed breakdown]\n\n## H3 Instrumentation (Priority: Medium, Evidence Strength: 6/10)\n[Similar detailed breakdown]\n\n## Implementation Order\n1. Shared locations first (avoid duplication)\n2. H1 specific locations\n3. H2 specific locations\n4. H3 specific locations\n\n## Validation Checklist\n- [ ] All hypotheses have instrumentation coverage\n- [ ] Overlaps identified and coordinated\n- [ ] Log format is consistent\n- [ ] Sensitive data is handled\n- [ ] High-frequency points have aggregation\n- [ ] Edge cases considered\n```\n\n**OUTPUT**:\n- Complete instrumentation implementation plan\n- Set `instrumentationPlanReady` = true\n- Create InstrumentationPlan.md file with detailed plan\n- Update INVESTIGATION_CONTEXT.md with plan summary",
|
|
438
|
+
"agentRole": "You are an instrumentation architect planning a comprehensive logging strategy. Your goal is to design efficient, coordinated instrumentation that captures all needed evidence in a single execution.",
|
|
439
|
+
"guidance": [
|
|
440
|
+
"Review ALL hypotheses together to identify synergies",
|
|
441
|
+
"Be specific about locations (file, function, line numbers)",
|
|
442
|
+
"Identify and optimize overlapping instrumentation needs",
|
|
443
|
+
"Plan log format for consistency and parseability",
|
|
444
|
+
"Consider practical concerns (volume, sensitivity, performance)",
|
|
445
|
+
"Create actionable implementation plan, not just theory",
|
|
446
|
+
"This plan will guide Phase 3 implementation"
|
|
447
|
+
],
|
|
448
|
+
"requireConfirmation": false
|
|
449
|
+
},
|
|
450
|
+
{
|
|
451
|
+
"id": "phase-2h-cognitive-reset",
|
|
452
|
+
"title": "Phase 2h: Cognitive Reset & Plan Review",
|
|
453
|
+
"prompt": "**COGNITIVE RESET** - Take a mental step back before implementing instrumentation.\n\n**GOAL**: Review the investigation with fresh eyes and validate the plan before execution.\n\n**STEP 1: Progress Summary**\n- What have we learned so far? (3-5 key insights)\n- What are our top hypotheses? (brief recap)\n- What's our instrumentation strategy? (high-level summary)\n\n**STEP 2: Critical Questions**\n- Are we missing any obvious alternative explanations?\n- Are our hypotheses too similar or too narrow?\n- Is our instrumentation plan efficient and comprehensive?\n- Are we making any unwarranted assumptions?\n- Is there a simpler approach we haven't considered?\n\n**STEP 3: Bias Check**\n- First impression bias: Are we anchored to initial theories?\n- Confirmation bias: Are we seeking evidence that confirms our beliefs?\n- Complexity bias: Are we overcomplicating a simple issue?\n- Recency bias: Are we over-weighting recent findings?\n\n**STEP 4: Sanity Checks**\n- Does the timeline make sense? (When did bug appear vs when hypothesized causes were introduced)\n- Do the symptoms match our theories? (All symptoms explained, no contradictions)\n- Are we investigating the right level? (Too high-level or too low-level)\n- Have we consulted existing documentation/logs adequately?\n\n**STEP 5: Plan Validation**\n- Review the instrumentation plan from Phase 2g\n- Will it actually answer our questions?\n- Are there any gaps or redundancies?\n- Is it safe to execute? (no production impacts, no data corruption risks)\n\n**STEP 6: Proceed or Pivot Decision**\n- **PROCEED**: Plan is sound, move to implementation\n- **REFINE**: Minor adjustments needed (update plan)\n- **PIVOT**: Major issues found (return to earlier phase)\n\n**OUTPUT**:\n- Cognitive reset complete with decision (PROCEED/REFINE/PIVOT)\n- Any plan adjustments documented\n- Set `resetComplete` = true",
|
|
454
|
+
"agentRole": "You are a senior debugger reviewing the investigation plan with fresh, critical eyes before committing to implementation.",
|
|
455
|
+
"guidance": [
|
|
456
|
+
"Be honest about potential biases and blind spots",
|
|
457
|
+
"Look for simpler explanations we might have missed",
|
|
458
|
+
"Validate the plan will actually answer our questions",
|
|
459
|
+
"Don't skip this - catching issues now saves hours later",
|
|
460
|
+
"It's okay to pivot if major issues are found"
|
|
461
|
+
],
|
|
462
|
+
"requireConfirmation": false
|
|
463
|
+
},
|
|
464
|
+
{
|
|
465
|
+
"id": "phase-3-comprehensive-instrumentation",
|
|
466
|
+
"title": "Phase 3: Comprehensive Debug Instrumentation",
|
|
467
|
+
"prompt": "**COMPREHENSIVE DEBUGGING INSTRUMENTATION** - Implement the instrumentation plan from Phase 2g.\n\n**FOLLOW THE PLAN**: Use the instrumentation plan created in Phase 2f as your implementation guide.\n\n**For each hypothesis in hypothesesToValidate, add targeted instrumentation:**\n\n**IMPLEMENTATION STRATEGY**:\n\n1. **Hypothesis-Specific Prefixes**: Each hypothesis gets unique logging prefix\n - H1: `[H1]` prefix for all H1-related logs\n - H2: `[H2]` prefix for all H2-related logs\n - H3: `[H3]` prefix for all H3-related logs\n\n2. **Standard Format for ALL hypotheses**:\n ```javascript\n className.methodName [HX] {timestamp}: Hypothesis-specific message\n ```\n\n3. **Smart Logging Implementation** (apply once, works for all hypotheses):\n ```javascript\n const debugState = { lastMsg: '', count: 0 };\n function smartLog(hypothesisId, msg) {\n const fullMsg = `[${hypothesisId}] ${msg}`;\n if (debugState.lastMsg === fullMsg) {\n debugState.count++;\n if (debugState.count % 10 === 0) {\n console.log(`${fullMsg} x${debugState.count}`);\n }\n } else {\n if (debugState.count > 1) {\n console.log(`Previous message x${debugState.count}`);\n }\n console.log(fullMsg);\n debugState.lastMsg = fullMsg;\n debugState.count = 1;\n }\n }\n ```\n\n4. **Instrumentation Points** for each hypothesis:\n - Add H1 logging at H1-relevant locations\n - Add H2 logging at H2-relevant locations\n - Add H3 logging at H3-relevant locations\n - Locations may overlap - that's fine, both will log\n\n5. **Operation Grouping** (for all hypotheses):\n ```javascript\n console.log(`=== [H1] Operation ${opName} Start ===`);\n // ... H1-relevant code ...\n console.log(`=== [H1] Operation ${opName} End ===`);\n ```\n\n**INSTRUMENTATION CHECKLIST**:\n- [ ] H1 instrumentation added at identified locations\n- [ ] H2 instrumentation added at identified locations \n- [ ] H3 instrumentation added at identified locations\n- [ ] Test instrumentation for hypothesis validation\n- [ ] Deduplication logic implemented\n- [ ] All logs use correct [HX] prefixes\n\n**OUTPUT**:\n- Comprehensive instrumented code with logging for ALL hypotheses\n- Set `allHypothesesInstrumented` = true\n- Document instrumentation locations in INVESTIGATION_CONTEXT.md",
|
|
468
|
+
"agentRole": "You are instrumenting code to validate ALL hypotheses simultaneously. Your goal is comprehensive, non-redundant logging that enables efficient evidence collection in a single execution.",
|
|
469
|
+
"guidance": [
|
|
470
|
+
"Add instrumentation for ALL hypotheses at once",
|
|
471
|
+
"Use unique [HX] prefixes to distinguish hypothesis-specific logs",
|
|
472
|
+
"Overlapping instrumentation is acceptable - multiple hypotheses can log at same location",
|
|
473
|
+
"Ensure non-intrusive implementation that doesn't change behavior",
|
|
474
|
+
"Single execution will produce logs for all hypotheses"
|
|
475
|
+
],
|
|
476
|
+
"requireConfirmation": false
|
|
477
|
+
},
|
|
478
|
+
{
|
|
479
|
+
"id": "phase-4-unified-evidence-collection",
|
|
480
|
+
"title": "Phase 4: Unified Evidence Collection",
|
|
481
|
+
"prompt": "**UNIFIED EVIDENCE COLLECTION** - Run instrumented code ONCE and collect all evidence.\n\n**EXECUTION**:\n1. **Single Test/Reproduction Run**:\n - Execute the reproduction steps with ALL instrumentation active\n - All hypotheses are tested in the same execution\n - Capture complete log output\n\n2. **Log Collection**:\n - Collect ALL debug logs from the single run\n - Logs will contain [H1], [H2], [H3] prefixed messages\n - Save complete log output for analysis\n\n3. **Log Organization**:\n - Parse logs by hypothesis prefix:\n - Extract all [H1] logs → H1 evidence\n - Extract all [H2] logs → H2 evidence \n - Extract all [H3] logs → H3 evidence\n - Preserve chronological order within each hypothesis\n - Note any cross-hypothesis interactions\n\n4. **Test Execution Evidence**:\n - Run instrumented tests\n - Collect test debug output\n - Note any test failures or unexpected behavior\n - Compare with production bug behavior\n\n5. **Evidence Quality Assessment**:\n - Rate overall log quality (1-10)\n - Note if execution reproduced the bug\n - Document any execution issues\n - Identify if additional instrumentation needed\n\n**If log volume >500 lines:**\n- Use aggregateDebugLogs() to create summaries\n- Group by hypothesis and operation\n- Create structured sub-analysis\n\n**OUTPUT**:\n- Complete log output with all hypothesis evidence\n- Organized evidence by hypothesis (H1, H2, H3)\n- Set `evidenceCollected` = true\n- Overall execution quality score",
|
|
482
|
+
"agentRole": "You are collecting comprehensive evidence from a single instrumented execution. Your goal is to capture all hypothesis-relevant data in one efficient run.",
|
|
483
|
+
"guidance": [
|
|
484
|
+
"Single execution tests all hypotheses simultaneously",
|
|
485
|
+
"Organize evidence by [HX] prefix for analysis",
|
|
486
|
+
"Preserve complete chronological log for cross-hypothesis insights",
|
|
487
|
+
"Note any unexpected behaviors or patterns",
|
|
488
|
+
"If execution fails, document why and attempt to collect partial evidence"
|
|
489
|
+
],
|
|
490
|
+
"requireConfirmation": false
|
|
491
|
+
},
|
|
492
|
+
{
|
|
493
|
+
"id": "phase-5-hypothesis-analysis-loop",
|
|
448
494
|
"type": "loop",
|
|
449
|
-
"title": "
|
|
495
|
+
"title": "Phase 5: Individual Hypothesis Analysis",
|
|
450
496
|
"loop": {
|
|
451
497
|
"type": "forEach",
|
|
452
498
|
"items": "hypothesesToValidate",
|
|
@@ -456,48 +502,20 @@
|
|
|
456
502
|
},
|
|
457
503
|
"body": [
|
|
458
504
|
{
|
|
459
|
-
"id": "
|
|
460
|
-
"title": "
|
|
461
|
-
"prompt": "**
|
|
462
|
-
"agentRole": "You are
|
|
463
|
-
"guidance": [
|
|
464
|
-
"This is hypothesis {{hypothesisIndex + 1}} of 3",
|
|
465
|
-
"Tailor instrumentation to the specific hypothesis",
|
|
466
|
-
"Ensure non-intrusive implementation"
|
|
467
|
-
],
|
|
468
|
-
"requireConfirmation": false
|
|
469
|
-
},
|
|
470
|
-
{
|
|
471
|
-
"id": "loop-phase-4-evidence",
|
|
472
|
-
"title": "Phase 4: Evidence Collection for {{currentHypothesis.id}}",
|
|
473
|
-
"prompt": "**EVIDENCE COLLECTION for {{currentHypothesis.id}}**\n\n**Execute instrumented code and collect evidence:**\n1. Run the instrumented test/reproduction\n2. Collect all {{currentHypothesis.id}}_DEBUG logs\n3. Analyze results against validation criteria\n4. Document evidence quality and relevance\n\n**TEST EXECUTION EVIDENCE**:\n- Run instrumented tests for {{currentHypothesis.id}}\n- Collect test debug output\n- Note any test failures or unexpected behavior\n- Compare with production bug behavior\n\n**EVIDENCE ASSESSMENT:**\n- Does evidence support {{currentHypothesis.id}}? (Yes/No/Partial)\n- Evidence quality score (1-10)\n- Contradicting evidence found?\n- Additional evidence needed?\n\n**If log volume >500 lines, use aggregateDebugLogs() and create sub-analysis prompt.**\n\n**OUTPUT**: Evidence assessment for {{currentHypothesis.id}} with quality scoring",
|
|
474
|
-
"agentRole": "You are collecting and analyzing evidence specifically for hypothesis {{currentHypothesis.id}}.",
|
|
475
|
-
"guidance": [
|
|
476
|
-
"Focus on evidence directly related to this hypothesis",
|
|
477
|
-
"Be objective in assessment - negative evidence is valuable",
|
|
478
|
-
"Track evidence quality quantitatively"
|
|
479
|
-
],
|
|
480
|
-
"requireConfirmation": false
|
|
481
|
-
},
|
|
482
|
-
{
|
|
483
|
-
"id": "loop-phase-5-synthesis",
|
|
484
|
-
"title": "Phase 5: Evidence Synthesis for {{currentHypothesis.id}}",
|
|
485
|
-
"prompt": "**EVIDENCE SYNTHESIS for {{currentHypothesis.id}}**\n\n**Synthesize findings:**\n1. **Evidence Summary**: What did we learn about {{currentHypothesis.id}}?\n2. **Confidence Update**: Based on evidence, rate confidence this is the root cause (0-10)\n3. **Status Update**: Mark hypothesis as Confirmed/Refuted/Needs-More-Evidence\n\n**If {{currentHypothesis.id}} is confirmed with high confidence (>8.0):**\n- Set `rootCauseFound` = true\n- Set `rootCauseHypothesis` = {{currentHypothesis.id}}\n- Update `currentConfidence` with confidence score\n\n**If all hypotheses validated but confidence <9.0:**\n- Consider additional investigation needs\n- Document what evidence is still missing\n\n**Context Update**:\n- Use updateInvestigationContext('Evidence Log', evidence summary for {{currentHypothesis.id}})\n- Every 3 iterations: Use trackInvestigation('Validation Progress', '{{hypothesisIndex + 1}}/3 hypotheses validated')",
|
|
486
|
-
"agentRole": "You are synthesizing evidence to determine if {{currentHypothesis.id}} is the root cause.",
|
|
505
|
+
"id": "analyze-hypothesis-evidence",
|
|
506
|
+
"title": "Analyze Evidence for {{currentHypothesis.id}}",
|
|
507
|
+
"prompt": "**EVIDENCE ANALYSIS for {{currentHypothesis.id}}**\n\n**Hypothesis**: {{currentHypothesis.description}}\n\n**ANALYZE {{currentHypothesis.id}} LOGS**:\n\n1. **Extract Relevant Logs**:\n - Review all [{{currentHypothesis.id}}] prefixed logs from Phase 4\n - Examine log sequence and timing\n - Look for patterns supporting or refuting the hypothesis\n\n2. **Evidence Assessment**:\n - Does evidence support {{currentHypothesis.id}}? (Yes/No/Partial)\n - Evidence quality score (1-10)\n - Contradicting evidence found?\n - Unexpected behaviors observed?\n\n3. **Cross-Hypothesis Insights**:\n - Did other hypothesis logs reveal relevant information?\n - Are there interactions between suspected components?\n - Does timeline analysis suggest different root cause?\n\n4. **Confidence Update**:\n - Based on evidence, rate confidence this is root cause (0-10)\n - What additional evidence would increase confidence?\n - Are there alternative explanations for the observed evidence?\n\n5. **Status Determination**:\n - Mark hypothesis as: Confirmed / Refuted / Needs-More-Evidence / Partially-Confirmed\n - If Confirmed with high confidence (>8.0):\n - Set `rootCauseFound` = true\n - Set `rootCauseHypothesis` = {{currentHypothesis.id}}\n - Set `currentConfidence` = confidence score\n\n**CONTEXT UPDATE**:\n- Use updateInvestigationContext('Evidence Log', evidence summary for {{currentHypothesis.id}})\n- Use trackInvestigation('Validation Progress', '{{hypothesisIndex + 1}}/3 hypotheses analyzed')\n\n**OUTPUT**: Complete evidence analysis and status for {{currentHypothesis.id}}",
|
|
508
|
+
"agentRole": "You are analyzing evidence collected from the unified execution to determine if {{currentHypothesis.id}} is the root cause.",
|
|
487
509
|
"guidance": [
|
|
488
|
-
"
|
|
489
|
-
"
|
|
490
|
-
"Be
|
|
510
|
+
"Analyze logs specific to this hypothesis ({{hypothesisIndex + 1}} of 3)",
|
|
511
|
+
"Consider evidence from all hypotheses - may reveal interactions",
|
|
512
|
+
"Be objective - negative evidence is valuable",
|
|
513
|
+
"Update hypothesis status based on concrete evidence",
|
|
514
|
+
"If high confidence root cause found, document thoroughly"
|
|
491
515
|
],
|
|
492
516
|
"requireConfirmation": false
|
|
493
517
|
}
|
|
494
518
|
],
|
|
495
|
-
"runCondition": {
|
|
496
|
-
"and": [
|
|
497
|
-
{ "var": "rootCauseFound", "not_equals": true },
|
|
498
|
-
{ "var": "validationIterations", "lt": 3 }
|
|
499
|
-
]
|
|
500
|
-
},
|
|
501
519
|
"requireConfirmation": false
|
|
502
520
|
},
|
|
503
521
|
{
|
|
@@ -507,7 +525,7 @@
|
|
|
507
525
|
"var": "currentConfidence",
|
|
508
526
|
"lt": 8.0
|
|
509
527
|
},
|
|
510
|
-
"prompt": "**CONTROLLED EXPERIMENTATION** - When observation isn't enough, experiment!\n\n**Current Investigation Status**: Leading hypothesis (Confidence: {{currentConfidence}}/10)\n\n**EXPERIMENT TYPES** (use controlledModification()):\n\n1. **Guard Additions (Non-Breaking)**:\n ```javascript\n // Add defensive check that logs but doesn't change behavior\n if (unexpectedCondition) {\n console.error('[H1_GUARD] Unexpected state detected:', state);\n // Continue normal execution\n }\n ```\n\n2. **Assertion Injections**:\n ```javascript\n // Add assertion that would fail if hypothesis is correct\n console.assert(expectedCondition, '[H1_ASSERT] Hypothesis H1 violated!');\n ```\n\n3. **Minimal Fix Test**:\n ```javascript\n // Apply minimal fix for hypothesis, see if bug disappears\n if (process.env.DEBUG_FIX_H1 === 'true') {\n // Apply hypothesized fix\n return fixedBehavior();\n }\n ```\n\n4. **Controlled Breaking**:\n ```javascript\n // Temporarily break suspected component to verify involvement\n if (process.env.DEBUG_BREAK_H1 === 'true') {\n throw new Error('[H1_BREAK] Intentionally breaking to test hypothesis');\n }\n ```\n\n**PROTOCOL**:\n1. Choose experiment type based on confidence and risk\n2. Implement modification with clear DEBUG markers\n3. Use createInvestigationBranch() if not already on investigation branch\n4. Commit: `git commit -m \"DEBUG: {{experiment_type}} for hypothesis investigation\"`\n5. Run reproduction steps\n6. Use collectEvidence() to gather results\n7. Revert changes: `git revert HEAD`\n8. Document results in ExperimentResults/hypothesis-experiment.md\n\n**SAFETY LIMITS**:\n- Max 3 experiments per hypothesis\n- Each experiment in separate commit\n- Always revert after evidence collection\n- Document everything in INVESTIGATION_CONTEXT.md\n\n**UPDATE**:\n- Hypothesis confidence based on experimental results\n- Use updateInvestigationContext('Experiment Results', experiment details and outcomes)\n- Track failed experiments in 'Dead Ends & Lessons' section",
|
|
528
|
+
"prompt": "**CONTROLLED EXPERIMENTATION** - When observation isn't enough, experiment!\n\n**Current Investigation Status**: Leading hypothesis (Confidence: {{currentConfidence}}/10)\n\n**⚠️ SAFETY PROTOCOLS (MANDATORY)**:\n\n1. **Git Branch Required**:\n - MUST be on investigation branch (use createInvestigationBranch() if not)\n - Verify with `git branch --show-current`\n - NEVER experiment directly on main/master\n\n2. **Pre-Experiment Baseline**:\n - Commit clean state: `git commit -m \"PRE-EXPERIMENT: baseline for {{hypothesis.id}}\"`\n - Record current test results\n - Document baseline behavior\n\n3. **Environment Restriction**:\n - ONLY run in test/dev environment\n - NEVER in production or staging\n - Set environment check: `if (process.env.NODE_ENV !== 'development') { throw new Error('Experiments only in dev'); }`\n\n4. **Automatic Revert**:\n - After evidence collection: `git revert HEAD --no-edit`\n - Verify code returned to baseline\n - Run tests to confirm clean state\n\n5. **Approval Gates**:\n - Low automation: Require approval for ALL experiments\n - Medium automation: Require approval for breaking/minimal-fix experiments\n - High automation: Auto-approve guards/logs only\n\n6. **Documentation**:\n - Create ExperimentLog.md entry with:\n - Timestamp, experiment type, hypothesis ID\n - Rationale and expected outcome\n - Actual outcome and evidence\n - Revert status (confirmed/failed)\n\n7. **Hard Limits**:\n - Max 3 experiments total (prevent endless experimentation)\n - Track with `experimentCount` context variable\n - Exit if limit reached, recommend different approach\n\n8. **Rollback Verification**:\n - After revert, run full test suite\n - Verify no unintended changes remain\n - Check git status is clean\n\n**EXPERIMENT TYPES** (use controlledModification()):\n\n1. **Guard Additions (Non-Breaking)**:\n ```javascript\n // Add defensive check that logs but doesn't change behavior\n if (unexpectedCondition) {\n console.error('[H1_GUARD] Unexpected state detected:', state);\n // Continue normal execution\n }\n ```\n\n2. **Assertion Injections**:\n ```javascript\n // Add assertion that would fail if hypothesis is correct\n console.assert(expectedCondition, '[H1_ASSERT] Hypothesis H1 violated!');\n ```\n\n3. **Minimal Fix Test**:\n ```javascript\n // Apply minimal fix for hypothesis, see if bug disappears\n if (process.env.DEBUG_FIX_H1 === 'true') {\n // Apply hypothesized fix\n return fixedBehavior();\n }\n ```\n\n4. **Controlled Breaking**:\n ```javascript\n // Temporarily break suspected component to verify involvement\n if (process.env.DEBUG_BREAK_H1 === 'true') {\n throw new Error('[H1_BREAK] Intentionally breaking to test hypothesis');\n }\n ```\n\n**PROTOCOL**:\n1. Choose experiment type based on confidence and risk\n2. Implement modification with clear DEBUG markers\n3. Use createInvestigationBranch() if not already on investigation branch\n4. Commit: `git commit -m \"DEBUG: {{experiment_type}} for hypothesis investigation\"`\n5. Run reproduction steps\n6. Use collectEvidence() to gather results\n7. Revert changes: `git revert HEAD`\n8. Document results in ExperimentResults/hypothesis-experiment.md\n\n**SAFETY LIMITS**:\n- Max 3 experiments per hypothesis\n- Each experiment in separate commit\n- Always revert after evidence collection\n- Document everything in INVESTIGATION_CONTEXT.md\n\n**UPDATE**:\n- Hypothesis confidence based on experimental results\n- Use updateInvestigationContext('Experiment Results', experiment details and outcomes)\n- Track failed experiments in 'Dead Ends & Lessons' section",
|
|
511
529
|
"agentRole": "You are a careful experimenter using controlled code modifications to validate hypotheses. Safety and reversibility are paramount.",
|
|
512
530
|
"guidance": [
|
|
513
531
|
"Start with non-breaking experiments (guards, logs)",
|
|
@@ -535,8 +553,8 @@
|
|
|
535
553
|
]
|
|
536
554
|
},
|
|
537
555
|
{
|
|
538
|
-
"id": "phase-
|
|
539
|
-
"title": "Phase
|
|
556
|
+
"id": "phase-3b-observability-setup",
|
|
557
|
+
"title": "Phase 3b: Distributed System Observability",
|
|
540
558
|
"runCondition": {
|
|
541
559
|
"var": "isDistributed",
|
|
542
560
|
"equals": true
|
|
@@ -552,8 +570,8 @@
|
|
|
552
570
|
]
|
|
553
571
|
},
|
|
554
572
|
{
|
|
555
|
-
"id": "phase-
|
|
556
|
-
"title": "Phase
|
|
573
|
+
"id": "phase-4c-distributed-evidence",
|
|
574
|
+
"title": "Phase 4c: Multi-Service Evidence Collection",
|
|
557
575
|
"runCondition": {
|
|
558
576
|
"var": "isDistributed",
|
|
559
577
|
"equals": true
|
|
@@ -604,31 +622,10 @@
|
|
|
604
622
|
],
|
|
605
623
|
"hasValidation": true
|
|
606
624
|
},
|
|
607
|
-
{
|
|
608
|
-
"id": "phase-2c-hypothesis-assumptions",
|
|
609
|
-
"title": "Phase 2c: Hypothesis Assumption Audit",
|
|
610
|
-
"prompt": "**AUDIT** each hypothesis for hidden assumptions:\n\n**FOR EACH HYPOTHESIS**:\n- List implicit assumptions\n- Rate assumption confidence (1-10)\n- Identify verification approach\n\n**REJECT** hypotheses built on unverified assumptions.",
|
|
611
|
-
"agentRole": "You are a rigorous scientist who rejects any hypothesis not grounded in verified facts.",
|
|
612
|
-
"guidance": [
|
|
613
|
-
"EXPLICIT LISTING: Write out every assumption, no matter how obvious it seems",
|
|
614
|
-
"CONFIDENCE SCORING: Rate 1-10 based on evidence quality, not intuition",
|
|
615
|
-
"VERIFICATION PLAN: For each assumption, specify how it can be tested",
|
|
616
|
-
"REJECTION CRITERIA: Any assumption with confidence <7 requires verification",
|
|
617
|
-
"DOCUMENT RATIONALE: Explain why each assumption is accepted or needs testing"
|
|
618
|
-
],
|
|
619
|
-
"validationCriteria": [
|
|
620
|
-
{
|
|
621
|
-
"type": "contains",
|
|
622
|
-
"value": "Assumption confidence",
|
|
623
|
-
"message": "Must rate assumption confidence for each hypothesis"
|
|
624
|
-
}
|
|
625
|
-
],
|
|
626
|
-
"hasValidation": true
|
|
627
|
-
},
|
|
628
625
|
{
|
|
629
626
|
"id": "phase-6-diagnostic-writeup",
|
|
630
627
|
"title": "Phase 6: Comprehensive Diagnostic Writeup",
|
|
631
|
-
"prompt": "**DIAGNOSTIC
|
|
628
|
+
"prompt": "**FINAL DIAGNOSTIC DOCUMENTATION** - I will create comprehensive writeup enabling effective bug fixing and knowledge transfer.\n\n**STEP 1: Executive Summary**\n- **Bug Summary**: Concise description of issue and impact\n- **Root Cause**: Clear, non-technical explanation of what is happening\n- **Confidence Level**: Final confidence assessment with calculation methodology\n- **Scope**: What systems, users, or scenarios are affected\n\n**STEP 2: Technical Deep Dive**\n- **Root Cause Analysis**: Detailed technical explanation of failure mechanism\n- **Code Component Analysis**: Specific files, functions, and lines with exact locations\n- **Execution Flow**: Step-by-step sequence of events leading to bug\n- **State Analysis**: How system state contributes to failure\n\n**STEP 3: Investigation Methodology**\n- **Investigation Timeline**: Chronological summary with phase time investments\n- **Hypothesis Evolution**: Complete record of hypotheses (H1-H5) with status changes\n- **Evidence Assessment**: Rating and reliability of evidence sources with key citations\n\n**STEP 4: Historical Context & Patterns**\n- **Similar Bugs**: Reference findings from findSimilarBugs() and SimilarPatterns.md\n- **Previous Fixes**: How similar issues were resolved\n- **Recurring Patterns**: Identify if this is part of a larger pattern\n- **Lessons Learned**: What can be applied from past experiences\n\n**STEP 5: Knowledge Transfer & Action Plan**\n- **Skill Requirements**: Technical expertise needed for understanding and fixing\n- **Prevention & Review**: Specific measures and code review checklist items\n- **Action Items**: Immediate mitigation steps and permanent fix areas with timelines\n- **Testing Strategy**: Comprehensive verification approach for fixes\n- **Recommended Next Investigations** (if confidence < 9.0):\n - Additional instrumentation locations and data points not yet captured\n - Alternative hypotheses to explore (theories that were deprioritized)\n - External expertise to consult (domain experts, similar bugs)\n - Environmental factors to test (load, concurrency, timing, config variations)\n - Expanded scope (related components, upstream/downstream systems)\n - Prioritized next steps based on evidence gaps\n\n**STEP 6: Context Finalization**\n- **Final Update**: Use updateInvestigationContext('Final Report', link to diagnostic report)\n- **Archive Context**: Ensure INVESTIGATION_CONTEXT.md is complete for future reference\n- **Knowledge Base**: Consider key findings for team knowledge base\n\n**DELIVERABLE**: Enterprise-grade diagnostic report enabling confident bug fixing, knowledge transfer, and organizational learning.",
|
|
632
629
|
"agentRole": "You are a senior technical writer and diagnostic documentation specialist with expertise in creating comprehensive, actionable bug reports for enterprise environments. Your strength lies in translating complex technical investigations into clear, structured documentation that enables effective problem resolution, knowledge transfer, and organizational learning. You excel at creating reports that serve immediate fixing needs, long-term system improvement, and team collaboration.",
|
|
633
630
|
"guidance": [
|
|
634
631
|
"ENTERPRISE FOCUS: Write for multiple stakeholders including developers, managers, and future team members",
|