@exaudeus/workrail 0.6.1-beta.8 → 0.6.1-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exaudeus/workrail",
3
- "version": "0.6.1-beta.8",
3
+ "version": "0.6.1-beta.9",
4
4
  "description": "MCP server for structured workflow orchestration and step-by-step task guidance",
5
5
  "license": "MIT",
6
6
  "bin": {
@@ -0,0 +1,27 @@
1
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2
+ <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3
+ <svg width="100%" height="100%" viewBox="0 0 2021 1081" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
4
+ <g id="Artboard1" transform="matrix(1,0,0,0.637382,-465,-607.425118)">
5
+ <rect x="465" y="953" width="2021" height="1696" style="fill:none;"/>
6
+ <g transform="matrix(1,0,0,1.568918,1474.921541,2274.181449)">
7
+ <g transform="matrix(1,0,0,1,-996.300469,-814.404769)">
8
+ <clipPath id="_clip1">
9
+ <rect x="0" y="0" width="1992.601" height="1628.81"/>
10
+ </clipPath>
11
+ <g clip-path="url(#_clip1)">
12
+ <g transform="matrix(0.416667,0,0,-0.416667,-1136.565729,2841.542938)">
13
+ <path d="M7154,6805C7032,6778 6928,6704 6847,6586C6782,6490 6708,6322 6430,5645C6286,5293 6164,5002 6160,4997C6157,4993 6040,5175 5901,5402C5761,5629 5600,5893 5541,5987L5435,6160L5362,6160C5290,6160 5289,6160 5301,6138C5332,6081 6161,4736 6174,4722C6184,4712 6192,4724 6215,4782C6231,4822 6313,5024 6398,5230C6483,5436 6592,5702 6640,5820C6872,6385 6923,6491 7009,6583C7084,6664 7113,6674 7275,6678L7415,6682L7463,6745C7489,6779 7510,6810 7510,6814C7510,6826 7216,6819 7154,6805Z" style="fill-rule:nonzero;"/>
14
+ <path d="M2732,6782C2739,6772 2763,6741 2785,6712L2825,6661L2950,6660C3232,6658 3269,6611 3570,5885C3643,5709 3781,5374 3876,5141C3971,4908 4051,4715 4054,4712C4057,4709 4062,4709 4063,4711C4068,4716 4477,5385 4639,5652C4708,5766 4769,5859 4773,5857C4778,5856 4949,5524 5154,5120L5526,4385L5672,4379L5475,4767C4986,5730 4785,6120 4779,6120C4775,6120 4746,6076 4715,6023C4683,5969 4552,5754 4425,5545C4297,5336 4169,5126 4141,5078C4112,5029 4086,4990 4082,4990C4075,4990 4027,5104 3824,5600C3531,6319 3476,6443 3402,6556C3322,6680 3209,6765 3092,6789C3063,6795 2967,6800 2879,6800C2726,6800 2719,6799 2732,6782Z" style="fill-rule:nonzero;"/>
15
+ <path d="M6650,6779C6450,6750 6333,6657 6178,6405C6105,6285 5807,5739 5807,5725C5807,5706 5874,5600 5881,5608C5884,5612 5952,5734 6030,5880C6308,6396 6388,6521 6518,6641C6558,6678 6624,6726 6667,6748C6747,6790 6745,6793 6650,6779Z" style="fill-rule:nonzero;"/>
16
+ <path d="M3566,6737C3786,6616 3854,6525 4217,5860C4291,5725 4354,5612 4358,5608C4362,5604 4380,5629 4399,5664L4433,5728L4390,5806C4067,6396 3996,6513 3901,6607C3811,6697 3698,6749 3560,6765L3505,6771L3566,6737Z" style="fill-rule:nonzero;"/>
17
+ <path d="M6880,6741C6846,6722 6751,6629 6717,6581C6703,6559 6690,6556 6598,6547L6495,6537L6450,6472C6425,6436 6403,6402 6401,6397C6399,6391 6435,6389 6504,6392C6583,6396 6610,6395 6607,6386C6604,6380 6579,6322 6550,6257L6497,6139L6255,6143L6213,6065C6189,6022 6171,5985 6173,5984C6175,5982 6229,5975 6293,5969C6357,5962 6410,5953 6410,5948C6410,5937 6299,5678 6289,5667C6283,5660 6101,5688 6040,5705C6019,5711 5933,5551 5951,5540C5957,5535 6015,5517 6079,5499C6143,5481 6197,5465 6199,5464C6201,5462 6181,5411 6155,5351C6111,5249 6108,5238 6120,5210C6127,5194 6135,5180 6139,5180C6142,5180 6163,5224 6184,5278C6268,5486 6639,6349 6680,6430C6739,6547 6813,6652 6879,6711C6937,6763 6937,6771 6880,6741Z" style="fill-rule:nonzero;"/>
18
+ <path d="M3372,6681C3455,6604 3514,6516 3585,6368C3633,6265 3931,5568 4045,5290C4071,5227 4094,5173 4095,5172C4097,5170 4105,5181 4114,5198C4128,5226 4127,5232 4085,5333C4061,5392 4044,5441 4048,5443C4052,5446 4109,5464 4175,5485C4241,5505 4297,5524 4299,5526C4307,5534 4226,5690 4214,5690C4208,5690 4153,5679 4092,5665C4031,5651 3975,5640 3967,5640C3959,5640 3929,5696 3892,5784C3858,5864 3830,5931 3830,5933C3830,5936 3884,5944 3950,5950C4024,5958 4070,5967 4070,5974C4069,5980 4052,6016 4031,6054L3992,6122L3869,6121L3745,6120L3692,6237C3664,6302 3640,6361 3640,6368C3640,6377 3663,6379 3740,6374C3795,6370 3840,6371 3840,6375C3840,6379 3819,6414 3794,6451L3747,6519L3549,6533L3504,6592C3461,6647 3369,6730 3350,6730C3345,6730 3333,6733 3323,6737C3313,6741 3335,6716 3372,6681Z" style="fill-rule:nonzero;"/>
19
+ <path d="M4850,6155C4850,6153 4917,6024 4999,5868C5080,5712 5193,5495 5250,5385C5306,5275 5432,5030 5530,4840C5628,4650 5721,4469 5736,4438C5760,4388 5768,4380 5792,4380C5807,4380 5820,4382 5820,4385C5820,4388 5809,4413 5795,4439L5771,4488L5895,4574C5963,4622 6025,4663 6032,4665C6040,4668 6057,4649 6074,4617L6103,4565L6116,4595C6126,4621 6126,4629 6110,4655C5992,4852 5286,6024 5251,6082C5215,6143 5200,6160 5181,6160C5164,6160 5160,6156 5164,6145C5167,6137 5173,6123 5176,6115C5181,6102 5164,6100 5052,6100L4922,6100L4910,6130C4901,6151 4891,6160 4874,6160C4861,6160 4850,6158 4850,6155ZM5347,5843C5384,5778 5416,5723 5418,5719C5419,5716 5381,5710 5333,5706C5285,5702 5222,5696 5193,5693L5141,5687L5078,5811C5044,5879 5014,5938 5012,5942C5010,5946 5053,5951 5107,5953C5161,5954 5221,5957 5242,5958L5278,5960L5347,5843ZM5597,5420C5638,5352 5672,5292 5673,5287C5675,5277 5409,5199 5397,5207C5393,5209 5356,5279 5315,5360L5240,5509L5273,5515C5290,5518 5339,5527 5380,5535C5519,5560 5512,5563 5597,5420ZM5845,5003C5877,4949 5911,4891 5922,4873L5940,4842L5804,4766C5728,4724 5666,4691 5665,4692C5652,4711 5510,4991 5510,4998C5510,5008 5771,5110 5781,5104C5784,5102 5813,5057 5845,5003Z" style="fill-rule:nonzero;"/>
20
+ <path d="M4438,5173C4261,4877 4113,4632 4111,4627C4110,4623 4114,4606 4120,4589L4133,4559L4163,4612C4179,4641 4194,4666 4195,4668C4198,4672 4448,4501 4455,4490C4458,4485 4449,4458 4435,4430L4410,4380L4475,4380L4644,4713C4737,4895 4847,5108 4888,5184C4961,5321 4962,5324 4947,5354C4933,5383 4931,5383 4924,5365C4920,5354 4899,5313 4878,5274L4839,5202L4712,5236C4642,5255 4580,5272 4573,5274C4565,5278 4575,5305 4605,5357C4630,5400 4665,5461 4683,5493L4715,5552L4783,5546L4850,5540L4809,5622C4786,5668 4766,5706 4765,5708C4763,5710 4616,5469 4438,5173ZM4595,5054C4661,5029 4719,5005 4723,5001C4729,4995 4598,4723 4574,4692C4570,4687 4312,4829 4306,4839C4299,4850 4450,5100 4464,5100C4470,5100 4529,5079 4595,5054Z" style="fill-rule:nonzero;"/>
21
+ <path d="M5004,5241L4568,4379L4642,4382L4715,4385L5075,5099L5040,5170L5004,5241Z" style="fill-rule:nonzero;"/>
22
+ </g>
23
+ </g>
24
+ </g>
25
+ </g>
26
+ </g>
27
+ </svg>
@@ -0,0 +1,27 @@
1
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2
+ <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3
+ <svg width="100%" height="100%" viewBox="0 0 2021 1081" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
4
+ <g id="Artboard1" transform="matrix(1,0,0,0.637382,-465,-607.425118)">
5
+ <rect x="465" y="953" width="2021" height="1696" style="fill:none;"/>
6
+ <g transform="matrix(1,0,0,1.568918,1474.921541,2274.181449)">
7
+ <g transform="matrix(1,0,0,1,-996.300469,-814.404769)">
8
+ <clipPath id="_clip1">
9
+ <rect x="0" y="0" width="1992.601" height="1628.81"/>
10
+ </clipPath>
11
+ <g clip-path="url(#_clip1)">
12
+ <g transform="matrix(0.416667,0,0,-0.416667,-1136.565729,2841.542938)">
13
+ <path d="M7154,6805C7032,6778 6928,6704 6847,6586C6782,6490 6708,6322 6430,5645C6286,5293 6164,5002 6160,4997C6157,4993 6040,5175 5901,5402C5761,5629 5600,5893 5541,5987L5435,6160L5362,6160C5290,6160 5289,6160 5301,6138C5332,6081 6161,4736 6174,4722C6184,4712 6192,4724 6215,4782C6231,4822 6313,5024 6398,5230C6483,5436 6592,5702 6640,5820C6872,6385 6923,6491 7009,6583C7084,6664 7113,6674 7275,6678L7415,6682L7463,6745C7489,6779 7510,6810 7510,6814C7510,6826 7216,6819 7154,6805Z" style="fill-rule:nonzero;"/>
14
+ <path d="M2732,6782C2739,6772 2763,6741 2785,6712L2825,6661L2950,6660C3232,6658 3269,6611 3570,5885C3643,5709 3781,5374 3876,5141C3971,4908 4051,4715 4054,4712C4057,4709 4062,4709 4063,4711C4068,4716 4477,5385 4639,5652C4708,5766 4769,5859 4773,5857C4778,5856 4949,5524 5154,5120L5526,4385L5672,4379L5475,4767C4986,5730 4785,6120 4779,6120C4775,6120 4746,6076 4715,6023C4683,5969 4552,5754 4425,5545C4297,5336 4169,5126 4141,5078C4112,5029 4086,4990 4082,4990C4075,4990 4027,5104 3824,5600C3531,6319 3476,6443 3402,6556C3322,6680 3209,6765 3092,6789C3063,6795 2967,6800 2879,6800C2726,6800 2719,6799 2732,6782Z" style="fill-rule:nonzero;"/>
15
+ <path d="M6650,6779C6450,6750 6333,6657 6178,6405C6105,6285 5807,5739 5807,5725C5807,5706 5874,5600 5881,5608C5884,5612 5952,5734 6030,5880C6308,6396 6388,6521 6518,6641C6558,6678 6624,6726 6667,6748C6747,6790 6745,6793 6650,6779Z" style="fill-rule:nonzero;"/>
16
+ <path d="M3566,6737C3786,6616 3854,6525 4217,5860C4291,5725 4354,5612 4358,5608C4362,5604 4380,5629 4399,5664L4433,5728L4390,5806C4067,6396 3996,6513 3901,6607C3811,6697 3698,6749 3560,6765L3505,6771L3566,6737Z" style="fill-rule:nonzero;"/>
17
+ <path d="M6880,6741C6846,6722 6751,6629 6717,6581C6703,6559 6690,6556 6598,6547L6495,6537L6450,6472C6425,6436 6403,6402 6401,6397C6399,6391 6435,6389 6504,6392C6583,6396 6610,6395 6607,6386C6604,6380 6579,6322 6550,6257L6497,6139L6255,6143L6213,6065C6189,6022 6171,5985 6173,5984C6175,5982 6229,5975 6293,5969C6357,5962 6410,5953 6410,5948C6410,5937 6299,5678 6289,5667C6283,5660 6101,5688 6040,5705C6019,5711 5933,5551 5951,5540C5957,5535 6015,5517 6079,5499C6143,5481 6197,5465 6199,5464C6201,5462 6181,5411 6155,5351C6111,5249 6108,5238 6120,5210C6127,5194 6135,5180 6139,5180C6142,5180 6163,5224 6184,5278C6268,5486 6639,6349 6680,6430C6739,6547 6813,6652 6879,6711C6937,6763 6937,6771 6880,6741Z" style="fill-rule:nonzero;"/>
18
+ <path d="M3372,6681C3455,6604 3514,6516 3585,6368C3633,6265 3931,5568 4045,5290C4071,5227 4094,5173 4095,5172C4097,5170 4105,5181 4114,5198C4128,5226 4127,5232 4085,5333C4061,5392 4044,5441 4048,5443C4052,5446 4109,5464 4175,5485C4241,5505 4297,5524 4299,5526C4307,5534 4226,5690 4214,5690C4208,5690 4153,5679 4092,5665C4031,5651 3975,5640 3967,5640C3959,5640 3929,5696 3892,5784C3858,5864 3830,5931 3830,5933C3830,5936 3884,5944 3950,5950C4024,5958 4070,5967 4070,5974C4069,5980 4052,6016 4031,6054L3992,6122L3869,6121L3745,6120L3692,6237C3664,6302 3640,6361 3640,6368C3640,6377 3663,6379 3740,6374C3795,6370 3840,6371 3840,6375C3840,6379 3819,6414 3794,6451L3747,6519L3549,6533L3504,6592C3461,6647 3369,6730 3350,6730C3345,6730 3333,6733 3323,6737C3313,6741 3335,6716 3372,6681Z" style="fill-rule:nonzero;"/>
19
+ <path d="M4850,6155C4850,6153 4917,6024 4999,5868C5080,5712 5193,5495 5250,5385C5306,5275 5432,5030 5530,4840C5628,4650 5721,4469 5736,4438C5760,4388 5768,4380 5792,4380C5807,4380 5820,4382 5820,4385C5820,4388 5809,4413 5795,4439L5771,4488L5895,4574C5963,4622 6025,4663 6032,4665C6040,4668 6057,4649 6074,4617L6103,4565L6116,4595C6126,4621 6126,4629 6110,4655C5992,4852 5286,6024 5251,6082C5215,6143 5200,6160 5181,6160C5164,6160 5160,6156 5164,6145C5167,6137 5173,6123 5176,6115C5181,6102 5164,6100 5052,6100L4922,6100L4910,6130C4901,6151 4891,6160 4874,6160C4861,6160 4850,6158 4850,6155ZM5347,5843C5384,5778 5416,5723 5418,5719C5419,5716 5381,5710 5333,5706C5285,5702 5222,5696 5193,5693L5141,5687L5078,5811C5044,5879 5014,5938 5012,5942C5010,5946 5053,5951 5107,5953C5161,5954 5221,5957 5242,5958L5278,5960L5347,5843ZM5597,5420C5638,5352 5672,5292 5673,5287C5675,5277 5409,5199 5397,5207C5393,5209 5356,5279 5315,5360L5240,5509L5273,5515C5290,5518 5339,5527 5380,5535C5519,5560 5512,5563 5597,5420ZM5845,5003C5877,4949 5911,4891 5922,4873L5940,4842L5804,4766C5728,4724 5666,4691 5665,4692C5652,4711 5510,4991 5510,4998C5510,5008 5771,5110 5781,5104C5784,5102 5813,5057 5845,5003Z" style="fill-rule:nonzero;"/>
20
+ <path d="M4438,5173C4261,4877 4113,4632 4111,4627C4110,4623 4114,4606 4120,4589L4133,4559L4163,4612C4179,4641 4194,4666 4195,4668C4198,4672 4448,4501 4455,4490C4458,4485 4449,4458 4435,4430L4410,4380L4475,4380L4644,4713C4737,4895 4847,5108 4888,5184C4961,5321 4962,5324 4947,5354C4933,5383 4931,5383 4924,5365C4920,5354 4899,5313 4878,5274L4839,5202L4712,5236C4642,5255 4580,5272 4573,5274C4565,5278 4575,5305 4605,5357C4630,5400 4665,5461 4683,5493L4715,5552L4783,5546L4850,5540L4809,5622C4786,5668 4766,5706 4765,5708C4763,5710 4616,5469 4438,5173ZM4595,5054C4661,5029 4719,5005 4723,5001C4729,4995 4598,4723 4574,4692C4570,4687 4312,4829 4306,4839C4299,4850 4450,5100 4464,5100C4470,5100 4529,5079 4595,5054Z" style="fill-rule:nonzero;"/>
21
+ <path d="M5004,5241L4568,4379L4642,4382L4715,4385L5075,5099L5040,5170L5004,5241Z" style="fill-rule:nonzero;"/>
22
+ </g>
23
+ </g>
24
+ </g>
25
+ </g>
26
+ </g>
27
+ </svg>
package/web/manifest.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "start_url": "/",
6
6
  "display": "standalone",
7
7
  "background_color": "#ffffff",
8
- "theme_color": "#2563eb",
8
+ "theme_color": "#F59E0B",
9
9
  "icons": [
10
10
  {
11
11
  "src": "/assets/images/icon-192.png",
@@ -1,5 +1,181 @@
1
1
  # Changelog - Systematic Bug Investigation Workflow
2
2
 
3
+ ## [1.1.0-beta.4] - 2025-11-06
4
+
5
+ ### 🎯 Major Enhancement: Sophisticated Code Analysis (Adapted from MR Review Workflow)
6
+
7
+ **Problem**: The codebase analysis in Phase 1 was weaker than it should be. It lacked explicit structural mapping, contracts & invariants discovery, and sophisticated call graph visualization that are essential for understanding bugs in complex codebases.
8
+
9
+ **Solution**: Added new **Phase 1a: Neighborhood, Call Graph & Contracts** analysis step, bringing total Phase 1 sub-phases from 4 to 5, and total workflow steps from 27 to 28.
10
+
11
+ ### 📊 New Phase 1a: Neighborhood, Call Graph & Contracts
12
+
13
+ This new first analysis step builds the structural foundation before diving into details:
14
+
15
+ #### **1. Module Root Computation**
16
+ - Find nearest common ancestor of error stack trace files
17
+ - Clamp to package/src boundary to define investigation scope
18
+ - Prevents unbounded analysis across entire codebase
19
+
20
+ #### **2. Neighborhood Map**
21
+ - Immediate neighbors (same directory, max 8)
22
+ - Imports/exports directly used (max 10)
23
+ - Co-located tests
24
+ - Closest entry points (routes, endpoints, CLI commands, max 5)
25
+ - Provides context for what's near the failing code
26
+
27
+ #### **3. Bounded Call Graph with Small Multiples**
28
+ - Build call graph ≤2 hops deep per failing symbol
29
+ - Cap total nodes at ≤15 per symbol
30
+ - **HOT Path Ranking** scoring system:
31
+ * Error location in path: +3
32
+ * Entry point to path: +2
33
+ * Test coverage exists: +1
34
+ * Mentioned in ticket/error: +1
35
+ * Tag as HOT if score ≥3
36
+ - **Small Multiples ASCII visualization**:
37
+ * Width ≤100 chars per path
38
+ * Format: `EntryPoint -> Caller -> [*FailingSymbol*] -> Callee`
39
+ * ≤8 total paths, prioritize HOT paths
40
+ * Alias Legend for repeated subpaths (A1, A2...)
41
+ - **Adjacency Summary** fallback if caps exceeded
42
+
43
+ #### **4. Flow Anchors**
44
+ - Map how users/systems trigger the bug
45
+ - HTTP routes → handlers → failing code
46
+ - CLI commands → execution → failing code
47
+ - Scheduled jobs, event handlers → failing code
48
+ - Cap at ≤5 most relevant anchors
49
+ - **Critical**: Shows HOW the bug is reached for reproduction
50
+
51
+ #### **5. Contracts & Invariants** (NEW - Most Critical Addition)
52
+ - Public API symbols (exported functions/classes)
53
+ - API endpoints (REST/GraphQL/RPC)
54
+ - Database tables/collections touched
55
+ - Message queue topics/events
56
+ - **Extract stated invariants** from:
57
+ * JSDoc/docstrings with @invariant
58
+ * Assertions in code
59
+ * Validation logic patterns
60
+ * Comments describing guarantees
61
+ - **Why this matters**: Contracts tell us what guarantees the code MUST maintain - bugs are often broken contracts
62
+
63
+ ### 📈 Benefits
64
+
65
+ 1. **Structural Scaffolding**: Phase 1a provides the map before exploring terrain
66
+ 2. **Contract-Driven Analysis**: Understanding what code promises helps identify where it breaks promises
67
+ 3. **HOT Path Prioritization**: Focus investigation on high-impact code paths first
68
+ 4. **Bounded Analysis**: Strict caps prevent 2-hour rabbit holes
69
+ 5. **Entry Point Clarity**: Flow anchors show how to reproduce bugs
70
+ 6. **Visual Call Graphs**: ASCII Small Multiples make relationships scannable
71
+
72
+ ### 🏗️ Updated Phase Structure
73
+
74
+ Phase 1 now has 5 sub-phases (up from 4):
75
+ - **1a**: Neighborhood, Call Graph & Contracts (NEW)
76
+ - **1b**: Breadth Scan & Pattern Discovery (was 1a)
77
+ - **1c**: Component Deep Dive (was 1b)
78
+ - **1d**: Dependencies & Flow (was 1c)
79
+ - **1e**: Test Coverage (was 1d)
80
+
81
+ ### 🎓 Adapted From MR Review Workflow
82
+
83
+ This enhancement adapts proven patterns from the `mr-review-workflow.json` Phase 1b:
84
+ - Bounded call graph with caps
85
+ - Small Multiples visualization
86
+ - HOT path ranking
87
+ - Alias Legend for repeated paths
88
+ - Adjacency Summary fallback
89
+ - Contracts & Invariants discovery
90
+
91
+ ## [1.1.0-beta.3] - 2025-11-06
92
+
93
+ ### 🚨 CRITICAL FIX: Prevent ALL Phase Skipping (Not Just Documentation)
94
+
95
+ **Problem Identified**: Agents were skipping not just the final documentation phase, but ALL investigation phases including:
96
+ - Hypothesis generation (Phase 2)
97
+ - Code analysis (Phase 1)
98
+ - Hypothesis verification (Phase 2b-2h)
99
+ - Instrumentation (Phase 3)
100
+ - Evidence gathering (Phase 4)
101
+
102
+ They were essentially "guessing" the bug and stopping immediately without any systematic investigation.
103
+
104
+ **Root Cause**: Agents didn't understand they are **executing a workflow** that requires repeatedly calling `workflow_next` until `isComplete=true`. They thought they could freestyle debug and stop whenever they felt confident.
105
+
106
+ ### 🎯 Comprehensive Solution
107
+
108
+ #### 1. **Mandatory Workflow Execution Instructions (metaGuidance)**
109
+ Added prominent `🚨 MANDATORY WORKFLOW EXECUTION` section that establishes:
110
+ - "YOU ARE EXECUTING A STRUCTURED WORKFLOW, NOT FREESTYLE DEBUGGING"
111
+ - "You CANNOT 'figure out the bug' and stop"
112
+ - "You MUST execute all 26 workflow steps by repeatedly calling workflow_next"
113
+ - "DO NOT STOP CALLING WORKFLOW_NEXT: Even if you think you know the bug"
114
+ - Clear explanation of workflow mechanics and why this structure exists
115
+
116
+ #### 2. **Early Commitment Checkpoint (Phase 0e)**
117
+ Added **Phase 0e: Workflow Execution Commitment** immediately after triage:
118
+ - Forces agent to explicitly acknowledge they understand workflow execution requirements
119
+ - Lists all remaining phases they MUST complete
120
+ - Requires stating: "I acknowledge I am executing a structured 26-step workflow..."
121
+ - Requires user confirmation before proceeding to investigation phases
122
+ - Acts as psychological commitment device to prevent freestyle debugging
123
+
124
+ #### 3. **Evidence-Based Persuasion**
125
+ Reinforced the **90% error rate statistic** throughout:
126
+ - metaGuidance: "agents who skip systematic investigation steps are wrong ~90% of the time"
127
+ - Phase 0e: "stopping early leads to incorrect conclusions ~90% of the time"
128
+ - Phase 5b: "agents who skip final documentation are wrong ~90% of the time"
129
+
130
+ ### 📊 Behavioral Impact
131
+
132
+ - **Before beta.3**: Agents could guess at bugs and stop immediately without executing any investigation phases
133
+ - **After beta.3**:
134
+ - Agents see prominent "MANDATORY WORKFLOW EXECUTION" instructions first
135
+ - Must acknowledge workflow commitment at Phase 0e before starting investigation
136
+ - User confirms agent's commitment before investigation proceeds
137
+ - Agent is psychologically committed to completing all phases
138
+
139
+ ### 🧪 Testing Scenarios
140
+
141
+ - **Scenario 1: Agent tries to conclude after Phase 0**: Should be blocked by Phase 0e checkpoint requiring workflow commitment
142
+ - **Scenario 2: Agent tries to skip Phase 1-4**: metaGuidance and Phase 0e commitment should prevent this
143
+ - **Scenario 3: Agent tries to skip Phase 6**: Phase 5b checkpoint should catch this
144
+
145
+ ### 🎭 Multi-Layered Defense
146
+
147
+ This release implements a comprehensive multi-layered defense against premature completion:
148
+
149
+ 1. **Layer 1 (Prevention)**: Strong metaGuidance establishing mandatory workflow execution
150
+ 2. **Layer 2 (Early Gate)**: Phase 0e commitment checkpoint with user confirmation
151
+ 3. **Layer 3 (Late Gate)**: Phase 5b completion checkpoint before documentation
152
+ 4. **Layer 4 (Evidence)**: 90% error rate statistic cited throughout
153
+ 5. **Layer 5 (Mechanical)**: Clear explanation of workflow_next mechanics
154
+
155
+ ## [1.1.0-beta.2] - 2025-11-06
156
+
157
+ ### 🎯 Major Enhancements
158
+ - **Mandatory Completion Checkpoint with User Confirmation**: Added Phase 5b checkpoint that requires explicit user confirmation before proceeding to Phase 6 or terminating early.
159
+ - **Evidence-Based Persuasion**: Introduced research-backed statistic that agents who skip final documentation are wrong ~90% of the time, even with high confidence.
160
+ - **Forced Decision Point**: Agents must explicitly choose between completing Phase 6 (recommended) or requesting early termination.
161
+ - **User Gate**: Early termination requires user approval regardless of automation level, making agents less likely to ignore completion requirements.
162
+ - **Professional Standard Reinforcement**: Checkpoint emphasizes that proceeding to Phase 6 is the professional standard backed by 20+ years of software engineering research.
163
+
164
+ ### 📚 metaGuidance Updates
165
+ - Added **EVIDENCE-BASED WARNING** section citing 20+ years of professional research on premature conclusions.
166
+ - Added **COMPLETION CHECKPOINT** section explaining the Phase 5b mandatory user confirmation requirement.
167
+ - Enhanced workflow discipline with research-backed rationale for completing all phases.
168
+
169
+ ### 🔬 Behavioral Impact
170
+ - **Before**: Agents could silently skip phases based on confidence assessment alone.
171
+ - **After**: Agents must acknowledge the 90% error rate and get explicit user approval to skip Phase 6, creating a strong psychological and procedural barrier to premature completion.
172
+ - **Expected Outcome**: Dramatically reduced premature completions as agents face both research evidence and user accountability at the decision point.
173
+
174
+ ### 🧪 Testing Scenarios
175
+ - **Scenario 1: Agent chooses to proceed**: Should state recommendation to continue, user approves, Phase 6 executes normally.
176
+ - **Scenario 2: Agent requests early termination**: Should acknowledge 90% error rate, request user approval, and only terminate if user explicitly approves with "terminate" response.
177
+ - **Scenario 3: High confidence with low patience**: User can now explicitly override agent's recommendation at the checkpoint, reinforcing their control while seeing the research-based warning.
178
+
3
179
  ## [1.1.0-beta.1] - 2025-11-06
4
180
 
5
181
  ### 🎯 Major Improvements
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "id": "systematic-bug-investigation-with-loops",
3
3
  "name": "Systematic Bug Investigation Workflow",
4
- "version": "1.1.0-beta.1",
4
+ "version": "1.1.0-beta.4",
5
5
  "description": "A comprehensive workflow for systematic bug and failing test investigation that prevents LLMs from jumping to conclusions. Enforces thorough evidence gathering, hypothesis formation, debugging instrumentation, and validation to achieve near 100% certainty about root causes. This workflow does NOT fix bugs - it produces detailed diagnostic writeups that enable effective fixing by providing complete understanding of what is happening, why it's happening, and supporting evidence.",
6
6
  "clarificationPrompts": [
7
7
  "What type of system is this? (web app, mobile app, backend service, desktop app, etc.)",
@@ -21,12 +21,24 @@
21
21
  "Bug is reproducible with specific steps or a minimal test case"
22
22
  ],
23
23
  "metaGuidance": [
24
+ "**🚨 MANDATORY WORKFLOW EXECUTION - READ THIS FIRST:**",
25
+ "YOU ARE EXECUTING A STRUCTURED WORKFLOW, NOT FREESTYLE DEBUGGING.",
26
+ "You CANNOT \"figure out the bug\" and stop. You MUST execute all 28 workflow steps by repeatedly calling workflow_next until the MCP returns isComplete=true.",
27
+ "WORKFLOW MECHANICS: Each call to workflow_next returns the next required step. You MUST execute that step, then call workflow_next again. Repeat until isComplete=true.",
28
+ "DO NOT STOP CALLING WORKFLOW_NEXT: Even if you think you know the bug, even if you have high confidence, even if it seems obvious - you MUST continue calling workflow_next.",
29
+ "STEP COUNTER: Every prompt shows \"Step X of 28\" - you are NOT done until you reach Step 28/28 and isComplete=true.",
30
+ "",
31
+ "**🎯 WHY THIS STRUCTURE EXISTS (Evidence-Based):**",
32
+ "Professional research spanning 20+ years shows agents who skip systematic investigation steps are wrong ~90% of the time, even with 9-10/10 self-reported confidence.",
33
+ "Quick conclusions miss: edge cases, alternative explanations, environment factors, interaction effects, and data corruption paths.",
34
+ "This workflow FORCES thoroughness through: code analysis, hypothesis formation, instrumentation, evidence gathering, adversarial review, and comprehensive documentation.",
35
+ "",
24
36
  "**CRITICAL WORKFLOW DISCIPLINE:**",
25
37
  "HIGH CONFIDENCE ≠ INVESTIGATION COMPLETE: Achieving 8-10/10 confidence in a hypothesis is excellent progress but does NOT mean the workflow is done.",
26
38
  "COMPLETE ALL PHASES: You MUST complete ALL phases (0 through 6) regardless of confidence level. Each phase builds critical evidence and documentation.",
27
39
  "WORKFLOW COMPLETION FLAG: Only set isWorkflowComplete=true when you complete Phase 6 (Comprehensive Diagnostic Writeup) AND produce the full deliverable.",
28
- "DO NOT SKIP PHASES: Even with high confidence, you must complete instrumentation (Phase 3), evidence collection (Phase 4), analysis (Phase 5), and writeup (Phase 6).",
29
- "PHASE PROGRESSION: An investigation that stops at hypothesis formation (Phase 2) or evidence collection (Phase 4) is INCOMPLETE - the diagnostic writeup is the required deliverable.",
40
+ "DO NOT SKIP PHASES: Even with high confidence, you must complete hypothesis generation (Phase 2), instrumentation (Phase 3), evidence collection (Phase 4), analysis (Phase 5), and writeup (Phase 6).",
41
+ "PHASE PROGRESSION: An investigation that stops at triage (Phase 0) or hypothesis formation (Phase 2) or evidence collection (Phase 4) is INCOMPLETE - the diagnostic writeup is the required deliverable.",
30
42
  "**FUNCTION DEFINITIONS:**",
31
43
  "fun instrumentCode(location, hypothesis) = 'Add debug logs at {location} for {hypothesis}. Format: ClassName.method [{hypothesis}]: message. Include timestamp, thread ID if concurrent.'",
32
44
  "fun collectEvidence(hypothesis) = 'Run instrumented code, collect logs, analyze results. Score evidence quality 1-10. Document in Evidence/{hypothesis}.md.'",
@@ -144,70 +156,104 @@
144
156
  ],
145
157
  "requireConfirmation": false
146
158
  },
159
+ {
160
+ "id": "phase-0e-workflow-commitment",
161
+ "title": "Phase 0e: Workflow Execution Commitment",
162
+ "prompt": "**⚠️ WORKFLOW EXECUTION COMMITMENT CHECKPOINT ⚠️**\n\nYou have completed Phase 0 (Triage & Setup). Before proceeding to the investigation phases, you MUST acknowledge your understanding of workflow execution requirements.\n\n**CRITICAL UNDERSTANDING:**\n\n1. **This is a 26-step structured workflow, not freestyle debugging**\n - You MUST call workflow_next repeatedly until isComplete=true\n - You CANNOT stop early, even if you think you know the bug\n - You CANNOT \"figure it out\" and skip steps\n\n2. **Professional research shows 90% error rate for premature conclusions**\n - Even with 9-10/10 confidence, skipping systematic steps leads to wrong conclusions\n - Edge cases, alternative explanations, and interaction effects are missed\n - The workflow FORCES thoroughness for a reason\n\n3. **Remaining phases you MUST complete (regardless of confidence):**\n - ✅ Phase 0: Triage & Setup (COMPLETED)\n - ⏳ Phase 1: Multi-Dimensional Codebase Analysis (4 sub-phases)\n - ⏳ Phase 2: Hypothesis Formation & Validation (8 sub-phases)\n - ⏳ Phase 3: Debugging Instrumentation\n - ⏳ Phase 4: Evidence Collection & Experimentation\n - ⏳ Phase 5: Final Analysis & Confidence Assessment\n - ⏳ Phase 6: Comprehensive Diagnostic Writeup (REQUIRED DELIVERABLE)\n\n4. **What \"completion\" means:**\n - isComplete=true from workflow_next (this happens ONLY after Phase 6)\n - Comprehensive diagnostic writeup produced\n - All evidence documented\n - Investigation context finalized\n\n**REQUIRED ACKNOWLEDGMENT:**\n\nState clearly:\n\"I acknowledge that I am executing a structured 26-step workflow and I MUST call workflow_next for every step until isComplete=true. I understand that stopping early leads to incorrect conclusions ~90% of the time. I will complete all phases (0-6) regardless of my confidence level.\"\n\nThen state: \"I am ready to proceed to Phase 1: Multi-Dimensional Codebase Analysis.\"\n\n**USER**: Do you confirm the agent will follow all workflow phases?",
163
+ "agentRole": "You are a workflow governance specialist ensuring agents understand they are bound to execute all workflow steps systematically.",
164
+ "guidance": [
165
+ "This checkpoint prevents premature termination",
166
+ "Agents must explicitly acknowledge they understand the workflow structure",
167
+ "This is a psychological commitment device to prevent freestyle debugging",
168
+ "Users must confirm the agent's commitment before proceeding"
169
+ ],
170
+ "requireConfirmation": true,
171
+ "confirmationPrompt": "The agent has acknowledged they will follow all 28 workflow steps through Phase 6. Do you approve proceeding with the systematic investigation?\n\nType 'yes' to continue, or 'no' to discuss modifications to the investigation approach."
172
+ },
147
173
  {
148
174
  "id": "phase-1-iterative-analysis",
149
175
  "type": "loop",
150
176
  "title": "Phase 1: Multi-Dimensional Codebase Analysis",
151
177
  "loop": {
152
178
  "type": "for",
153
- "count": 4,
154
- "maxIterations": 4,
179
+ "count": 5,
180
+ "maxIterations": 5,
155
181
  "iterationVar": "analysisPhase"
156
182
  },
157
183
  "body": [
184
+ {
185
+ "id": "analysis-neighborhood-contracts",
186
+ "title": "Analysis 1/5: Neighborhood, Call Graph & Contracts",
187
+ "prompt": "**NEIGHBORHOOD & CONTRACTS DISCOVERY - Build Structural Foundation**\n\nGoal: Build lightweight understanding of code structure, relationships, and contracts BEFORE diving into details. This provides the scaffolding for all subsequent analysis.\n\n**STEP 1: Compute Module Root**\n- Find nearest common ancestor of error stack trace files\n- Clamp to package boundary or src/ directory\n- This defines your investigation scope\n- Set `moduleRoot` context variable\n\n**STEP 2: Neighborhood Map** (cap per file to prevent analysis paralysis)\n- For each file in error stack trace:\n - List immediate neighbors (same directory, max 8)\n - Find imports/exports directly used (max 10)\n - Locate co-located tests (same name pattern)\n - Identify closest entry points: routes, endpoints, CLI commands (max 5)\n- Produce table: File | Neighbors | Tests | Entry Points\n\n**STEP 3: Bounded Call Graph** (Small Multiples with HOT Path Ranking)\n- For each failing function/class in stack trace:\n - Build call graph ≤2 hops deep (inbound and outbound)\n - Cap total nodes at ≤15 per failing symbol\n - Score edges for HOT path ranking:\n * Error location in path: +3\n * Entry point to path: +2 \n * Test coverage exists: +1\n * Mentioned in ticket/error message: +1\n - Tag paths as HOT if score ≥3\n - Use Small Multiples ASCII visualization:\n * Width ≤100 chars per path\n * Format: `EntryPoint -> Caller -> [*FailingSymbol*] -> Callee`\n * Mark changed/failing code as `[*name*]`\n * Add HOT tag for high-impact paths\n * ≤8 total paths, prioritize HOT paths first\n - If graph exceeds caps, use Adjacency Summary instead:\n * Table: Node | Inbound | Outbound | Notes\n * Top-K by degree/frequency\n- Create Alias Legend for repeated subpaths:\n * A1 = common.validation.validateInput\n * A2 = database.connection.getPool\n * Reuse aliases across all paths\n\n**STEP 4: Flow Anchors** (Entry Points to Bug)\n- Map how users/systems trigger the bug:\n - HTTP routes → handlers → failing code\n - CLI commands → execution → failing code \n - Scheduled jobs → workers → failing code\n - Event handlers → callbacks → failing code\n- Produce table: Anchor Type | Entry Point | Target Symbol | User Action\n- Cap at ≤5 most relevant anchors\n- Note: This tells us HOW the bug is reached\n\n**STEP 5: Contracts & Invariants**\n- Within `moduleRoot` and immediate neighbors:\n - List public API symbols (exported functions/classes)\n - Document API endpoints (REST/GraphQL/RPC)\n - Identify database tables/collections touched\n - Note message queue topics/events\n - Extract stated invariants from:\n * JSDoc/docstrings with @invariant\n * Assertions in code\n * Validation logic patterns\n * Comments describing guarantees\n- Produce table: Symbol/API | Contract | Invariant | Location\n- Focus on contracts related to failing code\n\n**OUTPUT: Create StructuralAnalysis.md with:**\n- Module Root declaration\n- Neighborhood Map table\n- Bounded Call Graph (Small Multiples ASCII or Adjacency Summary)\n- Alias Legend (for call graph subpaths)\n- Flow Anchors table\n- Contracts & Invariants table\n- Self-Critique: 1-2 areas of uncertainty\n\n**CAPS (strictly enforce to prevent analysis paralysis):**\n- ≤8 neighbors per file\n- ≤10 imports per file\n- ≤5 entry points total\n- ≤15 call graph nodes per failing symbol\n- ≤8 total call graph paths\n- ≤5 flow anchors\n- ≤100 chars width for ASCII paths",
188
+ "agentRole": "You are a codebase navigator building structural understanding. Your focus is mapping relationships, entry points, and contracts WITHOUT diving into implementation details yet.",
189
+ "guidance": [
190
+ "This is analysis phase 1 of 5 total phases",
191
+ "Phase 1a = Structure - Build the map before exploring terrain",
192
+ "Initialize majorIssuesFound = false",
193
+ "STRICTLY ENFORCE CAPS - this prevents 2-hour rabbit holes",
194
+ "Small Multiples: Render mini ASCII path diagrams (≤6 nodes per path)",
195
+ "HOT Path Ranking: Score and prioritize high-impact paths",
196
+ "Alias Legend: Collapse repeated subpaths with deterministic aliases (A1, A2...)",
197
+ "Adjacency Summary: If caps exceeded, use tabular summary instead of full graph",
198
+ "Contracts are CRITICAL: They tell us what guarantees the code must maintain",
199
+ "Flow Anchors show HOW users trigger the bug - essential for reproduction",
200
+ "Create StructuralAnalysis.md in investigation directory",
201
+ "Update INVESTIGATION_CONTEXT.md with module root and structural summary",
202
+ "This phase provides the scaffolding for all subsequent analysis"
203
+ ],
204
+ "runCondition": {"var": "analysisPhase", "equals": 1},
205
+ "requireConfirmation": false
206
+ },
158
207
  {
159
208
  "id": "analysis-breadth-scan",
160
- "title": "Analysis 1/4: Breadth Scan",
161
- "prompt": "**BREADTH SCAN - Cast Wide Net**\n\nGoal: Understand full system impact and identify all potentially involved components.\n\nPerform: Error propagation mapping, Component discovery, Data flow mapping, Recent changes analysis, and Historical pattern search.\n\n**Output**: Complete BreadthAnalysis.md with component interaction map, data flow diagram, suspicious areas ranked by likelihood, and list of all potentially related files and functions.",
162
- "agentRole": "You are performing systematic analysis phase 1 of 4. Your focus is casting a wide net to find all potentially related components.",
209
+ "title": "Analysis 2/5: Breadth Scan & Pattern Discovery",
210
+ "prompt": "**BREADTH SCAN - Cast Wide Net + Learn Expected Behavior**\n\nGoal: Understand full system impact, identify all potentially involved components, and discover existing code patterns to understand expected behavior.\n\n**PART A: Pattern Discovery (Learn How Code SHOULD Work)**\n1. **Compute Module Root**: Find nearest common ancestor of error stack trace files, clamped to package/src\n2. **Discover Patterns** (scan only moduleRoot, exclude failing files from pattern definition):\n - Naming conventions (classes, methods, variables)\n - Error handling patterns (try/catch, error propagation, logging)\n - Logging patterns (format, verbosity, error vs info vs debug)\n - Data validation patterns (where/how data is checked)\n - Test patterns (structure, naming, assertion style)\n - Require ≥2 occurrences across distinct files to qualify as pattern\n3. **Capture Pattern Catalog**: Document validated patterns with 1-3 exemplar locations (file:line)\n4. **Identify Pattern Deviations in Failing Code**: Compare failing code against pattern catalog\n\n**PART B: Error Propagation & Component Discovery**\n1. **ERROR PROPAGATION MAPPING**: Use grep_search for all error occurrences, trace error messages across log files, map stack traces to identify call chains, document every point where error appears/handled\n2. **COMPONENT DISCOVERY**: Find components interacting with failing area, use codebase_search \"How is [component] used?\", identify callers/callees, cap to top 10 most suspicious, rank by likelihood (1-10)\n3. **BOUNDED CALL GRAPH**: For failing function, build call graph ≤2 hops deep, cap at ≤15 total nodes, identify HOT paths (paths through error location), prioritize HOT paths in analysis\n4. **FLOW ANCHORS**: Map entry points (routes/endpoints/CLI commands) to failing code, cap at ≤5 anchors, note which user actions trigger the bug\n\n**PART C: Data Flow & Changes**\n1. **DATA FLOW MAPPING**: Trace data through bug area, identify transformations, persistence points, corruption opportunities - but CAP scope to moduleRoot and 2-hop neighborhood\n2. **RECENT CHANGES ANALYSIS**: Git history for identified components (last 10 commits), identify when bug appeared, related PRs/issues, config/dependency changes\n3. **HISTORICAL PATTERN SEARCH**: Use findSimilarBugs() for similar error patterns, previous fixes, related test failures\n\n**Output**: Create BreadthAnalysis.md with:\n- Pattern Catalog (validated patterns + exemplars)\n- Pattern Deviations (how failing code differs from expected patterns)\n- Bounded Call Graph (≤15 nodes, HOT paths highlighted)\n- Flow Anchors Table (entry point → failing symbol)\n- Suspicious Components (top 10, ranked 1-10)\n- Data Flow Map (scoped to moduleRoot + 2 hops)\n- Recent Changes Timeline\n- Historical Similar Bugs\n\n**Self-Critique**: List 1-2 areas where you have low confidence or missing information.",
211
+ "agentRole": "You are performing systematic analysis phase 2 of 5. Your focus is understanding both what IS happening (error propagation) and what SHOULD happen (pattern discovery) to identify deviations.",
163
212
  "guidance": [
164
- "This is analysis phase 1 of 4 total phases",
165
- "Phase 1 = Breadth Scan - Cast wide net for all related components",
213
+ "This is analysis phase 2 of 5 total phases",
214
+ "Phase 1b = Breadth + Patterns - Learn expected behavior AND map error propagation",
166
215
  "Create BreadthAnalysis.md with structured findings",
167
- "ERROR PROPAGATION MAPPING: Use grep_search for all error occurrences, trace error messages across all log files, map all stack traces to identify call chains, document every point where error appears or is handled",
168
- "COMPONENT DISCOVERY: Find ALL components that interact with failing area, use codebase_search \"How is [failing component] used?\", identify all callers and callees, build component interaction map, note both direct and indirect relationships",
169
- "DATA FLOW MAPPING: Trace data that flows through bug area, identify all transformations applied to data, find all persistence points (database, cache, files), document complete data journey, note where data could be corrupted or lost",
170
- "RECENT CHANGES ANALYSIS: Git history for all identified components, check last 10 commits affecting these areas, identify when bug likely appeared, look for related PRs or issues, note any configuration or dependency changes",
171
- "HISTORICAL PATTERN SEARCH: Use findSimilarBugs() to search for similar error patterns in codebase, previous fixes to related components, related test failures in history",
172
- "Use findSimilarBugs() to search for historical patterns",
173
- "Use the function definitions for standardized operations",
216
+ "CRITICAL: Discover patterns FIRST from working code, THEN compare failing code to patterns",
217
+ "Pattern deviations often reveal the bug (e.g., missing validation, different error handling)",
218
+ "Apply CAPS to prevent analysis paralysis: ≤10 components, ≤15 call graph nodes, ≤5 flow anchors, ≤2 hops",
219
+ "HOT PATH RANKING: Score paths by (error in path=3, entry point=2, test coverage=1); tag HOT if score≥3",
220
+ "BOUNDED CALL GRAPH: Use codebase_search to find callers/callees, stop at 2 hops, cap nodes, dedupe",
221
+ "PATTERN DISCOVERY: Require ≥2 occurrences to qualify as pattern; singletons are 'candidate conventions' only",
222
+ "SELF-CRITIQUE: Explicitly note 1-2 areas of uncertainty or missing information",
174
223
  "Update INVESTIGATION_CONTEXT.md after completion",
175
- "Be thorough - it's better to include too much than miss something critical",
176
- "Document your reasoning for why each component is potentially involved"
224
+ "Use the function definitions for standardized operations"
177
225
  ],
178
- "runCondition": {"var": "analysisPhase", "equals": 1},
226
+ "runCondition": {"var": "analysisPhase", "equals": 2},
179
227
  "requireConfirmation": false
180
228
  },
181
229
  {
182
230
  "id": "analysis-deep-dive",
183
- "title": "Analysis 2/4: Component Deep Dive",
184
- "prompt": "**COMPONENT DEEP DIVE - Understand Internals**\n\nGoal: Deep understanding of top 5 suspicious components from breadth scan.\n\nFor each component, use recursiveAnalysis(component, 3) to perform 3-level analysis: Direct Implementation (Level 1), Direct Dependencies (Level 2), and Integration Points (Level 3). Document likelihood scores, suspicious code sections, failure modes, and red flags.\n\n**Output**: ComponentAnalysis.md with deep insights for top 5 components, ranked list of most likely root cause locations, detailed notes on internals, and dependency graph showing relationships.",
185
- "agentRole": "You are performing systematic analysis phase 2 of 4. Your focus is deep diving into the most suspicious components to understand their internals.",
231
+ "title": "Analysis 3/5: Component Deep Dive with Hot-Path Focus",
232
+ "prompt": "**COMPONENT DEEP DIVE - Prioritized Investigation**\n\nGoal: Deep understanding of top 5 suspicious components from breadth scan, prioritizing HOT paths and pattern deviations.\n\n**PRIORITIZATION (from Phase 1):**\n1. Focus on components on HOT paths (score 3)\n2. Prioritize components with pattern deviations\n3. Rank by likelihood score from Phase 1\n4. Cap analysis to top 5 components\n\n**FOR EACH COMPONENT (recursive 3-level analysis):**\n\n**LEVEL 1 - DIRECT IMPLEMENTATION** (prioritize HOT paths and deviation areas):\n- Read complete file (or HOT path sections if file >500 lines)\n- Compare error handling against pattern catalog from Phase 1\n- Identify pattern deviations with file:line locations\n- Check state management, initialization, cleanup\n- Document invariants and assumptions\n- Note TODO/FIXME/HACK/BUG comments\n- Red flags: complex logic, missing validation, race conditions\n\n**LEVEL 2 - DIRECT DEPENDENCIES** (cap at ≤10 deps per component):\n- Follow imports on HOT paths first\n- Check dependency contracts and interfaces\n- Analyze coupling and data exchange\n- Look for shared mutable state\n- Identify circular dependencies\n- Document failure propagation paths\n\n**LEVEL 3 - INTEGRATION POINTS** (cap at ≤8 integration points):\n- External calls (DB, API, file system) - cap at ≤5\n- Concurrency/threading concerns\n- Resource management issues\n- Caching and state sync\n- Event handling and callbacks\n- Configuration dependencies\n\n**FOR EACH COMPONENT, PRODUCE:**\n- **Likelihood Score** (1-10): Weight HOT paths +3, pattern deviations +2, recent changes +1\n- **Suspicious Sections**: Specific file:line with rationale (≤5 per component)\n- **Failure Modes**: How this component could cause the observed bug (≤3 scenarios)\n- **Pattern Violations**: How it deviates from expected patterns (from Phase 1)\n- **Critical Dependencies**: Top 3 dependencies that could be sources\n\n**Output**: Create ComponentAnalysis.md with:\n- Component Rankings (1-5, sorted by likelihood score)\n- Per-Component Analysis (following structure above)\n- Pattern Violation Summary\n- Critical Path Map (which components are on HOT paths)\n- **Self-Critique**: 1-2 components you're uncertain about and why\n\n**CAPS TO PREVENT ANALYSIS PARALYSIS:**\n- Top 5 components only\n- ≤10 dependencies per component\n- ≤8 integration points per component\n- ≤5 suspicious sections per component\n- ≤3 failure modes per component",
233
+ "agentRole": "You are performing systematic analysis phase 3 of 5. Your focus is deep-diving into the most suspicious components, prioritizing HOT paths and pattern deviations.",
186
234
  "guidance": [
187
- "This is analysis phase 2 of 4 total phases",
188
- "Phase 2 = Deep Dive - Analyze suspicious components 3 levels deep",
189
- "Build on findings from Phase 1 Breadth Scan",
235
+ "This is analysis phase 3 of 5 total phases",
236
+ "Phase 1c = Deep Dive - Focus on HOT paths and pattern violations",
237
+ "Build on findings from Phase 1 (patterns, HOT paths, flow anchors)",
190
238
  "Create ComponentAnalysis.md with structured findings",
191
239
  "Use recursiveAnalysis() for systematic exploration",
192
- "LEVEL 1 - DIRECT IMPLEMENTATION: Read COMPLETE file including private methods, understand state management and data structures, analyze error handling patterns, check initialization and cleanup logic, document all public/private APIs, identify assumptions or invariants, note TODO/FIXME comments",
193
- "LEVEL 2 - DIRECT DEPENDENCIES: Follow all imports and their usage, understand dependency contracts and interfaces, check version compatibility and breaking changes, analyze coupling points and data exchange, look for shared mutable state, identify circular dependencies, document how failures could propagate",
194
- "LEVEL 3 - INTEGRATION POINTS: How component fits in larger system architecture, side effects and external calls (DB, API, file system), concurrency and threading concerns, resource management (memory, connections, handles), caching and state synchronization, event handling and callbacks, configuration and environment dependencies",
195
- "FOR EACH COMPONENT DOCUMENT: Likelihood score (1-10) of being root cause, specific suspicious code sections with line numbers, potential failure modes and their symptoms, dependencies that could be sources of issues, red flags (complex logic, error handling gaps, race conditions)",
196
- "Update INVESTIGATION_CONTEXT.md after completion",
197
- "Go deep - read entire files, not just the obvious parts",
198
- "Look for subtle issues like race conditions, edge cases, and assumptions"
240
+ "PRIORITIZE HOT PATHS: Analyze code on HOT paths before other code",
241
+ "PATTERN-DRIVEN: Compare actual code against pattern catalog from Phase 1",
242
+ "APPLY CAPS STRICTLY: Prevents spending 2 hours reading every file",
243
+ "SELF-CRITIQUE: Note where you're uncertain or making assumptions",
244
+ "Update INVESTIGATION_CONTEXT.md after completion"
199
245
  ],
200
- "runCondition": {"var": "analysisPhase", "equals": 2},
246
+ "runCondition": {"var": "analysisPhase", "equals": 3},
201
247
  "requireConfirmation": false
202
248
  },
203
249
  {
204
250
  "id": "analysis-dependencies",
205
- "title": "Analysis 3/4: Dependencies & Flow",
251
+ "title": "Analysis 4/5: Dependencies & Flow",
206
252
  "prompt": "**DEPENDENCY & FLOW ANALYSIS - Trace Connections**\n\nGoal: Understand how components interact and data flows between them.\n\nPerform: Static dependency graph analysis, Runtime flow analysis, Data transformation pipeline tracing, and Integration analysis.\n\n**Output**: FlowAnalysis.md with sequence diagrams showing execution flow, data flow maps with transformation points, complete dependency graph, list of all integration points and failure modes, and timeline showing order of operations.",
207
- "agentRole": "You are performing systematic analysis phase 3 of 4. Your focus is tracing how components connect and data flows between them.",
253
+ "agentRole": "You are performing systematic analysis phase 4 of 5. Your focus is tracing how components connect and data flows between them.",
208
254
  "guidance": [
209
- "This is analysis phase 3 of 4 total phases",
210
- "Phase 3 = Dependencies - Trace connections and data flows",
255
+ "This is analysis phase 4 of 5 total phases",
256
+ "Phase 1d = Dependencies - Trace connections and data flows",
211
257
  "Build on component understanding from Phase 2",
212
258
  "Create FlowAnalysis.md with diagrams and flow charts",
213
259
  "STATIC DEPENDENCY GRAPH: Build complete import/dependency tree, identify circular dependencies, find hidden dependencies (reflection, dynamic loading, DI), map version constraints and compatibility, document shared libraries and utilities, note tight coupling or fragile dependencies",
@@ -219,17 +265,17 @@
219
265
  "Pay special attention to async boundaries and error propagation",
220
266
  "Look for implicit dependencies that aren't obvious from imports"
221
267
  ],
222
- "runCondition": {"var": "analysisPhase", "equals": 3},
268
+ "runCondition": {"var": "analysisPhase", "equals": 4},
223
269
  "requireConfirmation": false
224
270
  },
225
271
  {
226
272
  "id": "analysis-test-coverage",
227
- "title": "Analysis 4/4: Test Coverage",
273
+ "title": "Analysis 5/5: Test Coverage",
228
274
  "prompt": "**TEST COVERAGE ANALYSIS - Leverage Existing Knowledge**\n\nGoal: Use existing tests as source of truth about system behavior.\n\nFor each suspicious component, use analyzeTests(component) to perform: Direct test coverage analysis, Integration test analysis, Test history investigation, Test execution with debugging, and Coverage gap analysis.\n\n**Output**: TestAnalysis.md with coverage gaps matrix, suspicious test patterns, test evidence for hypotheses, recommendations for tests to add, and complete test inventory for affected components.",
229
- "agentRole": "You are performing systematic analysis phase 4 of 4. Your focus is leveraging existing tests to understand expected behavior and find coverage gaps.",
275
+ "agentRole": "You are performing systematic analysis phase 5 of 5. Your focus is leveraging existing tests to understand expected behavior and find coverage gaps.",
230
276
  "guidance": [
231
- "This is analysis phase 4 of 4 total phases",
232
- "Phase 4 = Tests - Analyze test coverage and quality",
277
+ "This is analysis phase 5 of 5 total phases",
278
+ "Phase 1e = Tests - Analyze test coverage and quality",
233
279
  "Build on all previous analysis phases",
234
280
  "Create TestAnalysis.md with coverage gap matrix",
235
281
  "DIRECT TEST COVERAGE: Find all tests using grep/test discovery, analyze what's tested (happy/edge/error cases), identify what's NOT tested, check test quality and assertion strength, note mocking/stubbing that might hide issues, review test names and docs",
@@ -242,7 +288,7 @@
242
288
  "Tests often reveal the 'expected' behavior - compare with actual behavior",
243
289
  "Missing tests often indicate areas where bugs hide"
244
290
  ],
245
- "runCondition": {"var": "analysisPhase", "equals": 4},
291
+ "runCondition": {"var": "analysisPhase", "equals": 5},
246
292
  "requireConfirmation": false
247
293
  }
248
294
  ],
@@ -628,6 +674,21 @@
628
674
  ],
629
675
  "hasValidation": true
630
676
  },
677
+ {
678
+ "id": "phase-5b-completion-checkpoint",
679
+ "title": "Phase 5b: Workflow Completion Checkpoint",
680
+ "prompt": "**WORKFLOW COMPLETION DECISION CHECKPOINT**\n\nYou have completed Phase 5a and assessed your confidence in the root cause. Before proceeding to Phase 6 (the comprehensive diagnostic writeup), you must make an explicit decision:\n\n**OPTION 1: Complete Full Investigation (STRONGLY RECOMMENDED)**\n- Proceed to Phase 6 to create the comprehensive diagnostic writeup\n- This is the professional standard for thorough bug investigations\n- Takes ~15-30 minutes but provides complete documentation\n- **Research shows:** Investigations that include full writeups have 90% fix success rate\n\n**OPTION 2: Request Early Termination (DISCOURAGED)**\n- Skip Phase 6 and end the investigation now\n- Mark the investigation as \"Incomplete - User Terminated\"\n- ⚠️ **CRITICAL WARNING - EVIDENCE-BASED:**\n - **Professional research spanning 20+ years of software engineering practice shows that agents who skip final documentation phases are wrong about the root cause ~90% of the time**\n - Even with 9-10/10 confidence, missing the comprehensive writeup leads to:\n - Incomplete context transfer to fixing developers\n - Missed edge cases and alternative explanations\n - Poor reproducibility of findings\n - Inability to prevent similar bugs in the future\n - The writeup phase often reveals gaps in reasoning that change the conclusion\n\n**DECISION REQUIRED:**\n\nIf you believe you have sufficient information and want to request early termination:\n1. State your current confidence level\n2. Acknowledge the 90% error rate for skipping documentation\n3. Request user approval to terminate early\n4. **This will require EXPLICIT USER CONFIRMATION regardless of automation level**\n\nIf you want to complete the investigation professionally:\n1. Simply state you will proceed to Phase 6\n2. No user confirmation needed\n3. Continue to the comprehensive diagnostic writeup\n\n**RECOMMENDED ACTION:** Proceed to Phase 6. The writeup takes minimal additional time but dramatically improves fix success rates and organizational learning.",
681
+ "agentRole": "You are a workflow governance specialist ensuring agents make informed decisions about investigation completeness with full awareness of professional research on premature conclusions.",
682
+ "guidance": [
683
+ "Present both options clearly but emphasize the professional standard",
684
+ "Be transparent about the 90% error rate for skipping documentation",
685
+ "If requesting early termination, you MUST acknowledge the risks",
686
+ "User confirmation is REQUIRED for early termination, even in high automation mode",
687
+ "Proceeding to Phase 6 is the default professional behavior"
688
+ ],
689
+ "requireConfirmation": true,
690
+ "confirmationPrompt": "WORKFLOW COMPLETION CHECKPOINT:\n\nThe agent has assessed their confidence and is now deciding whether to:\n\nA) Proceed to Phase 6 (Comprehensive Diagnostic Writeup) - RECOMMENDED\nB) Request early termination (skip Phase 6)\n\n⚠️ EVIDENCE-BASED WARNING: Professional research shows agents who skip final documentation are wrong about the root cause ~90% of the time, even with high confidence.\n\nAgent's recommendation: {check their response above}\n\nDo you want to:\n- Type 'continue' to proceed to Phase 6 (recommended)\n- Type 'terminate' to end investigation now (discouraged)\n\nYour choice:"
691
+ },
631
692
  {
632
693
  "id": "phase-6-diagnostic-writeup",
633
694
  "title": "Phase 6: Comprehensive Diagnostic Writeup",
Binary file