@exaudeus/workrail 0.6.1-beta.8 → 0.6.1-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/web/assets/images/favicon-amber-16.png +0 -0
- package/web/assets/images/favicon-amber-32.png +0 -0
- package/web/assets/images/favicon-white-16-clean.png +0 -0
- package/web/assets/images/favicon-white-32-clean.png +0 -0
- package/web/assets/images/icon-amber-192.png +0 -0
- package/web/assets/images/icon-amber-512.png +0 -0
- package/web/assets/images/icon-amber.svg +27 -0
- package/web/assets/images/icon-white-192-clean.png +0 -0
- package/web/assets/images/icon-white-512-clean.png +0 -0
- package/web/assets/images/icon-white.svg +27 -0
- package/web/manifest.json +1 -1
- package/workflows/CHANGELOG-bug-investigation.md +176 -0
- package/workflows/systemic-bug-investigation-with-loops.json +105 -44
- package/web/assets/images/favicon-white-16.png +0 -0
- package/web/assets/images/favicon-white-32.png +0 -0
- package/web/assets/images/icon-white-192.png +0 -0
- package/web/assets/images/icon-white-512.png +0 -0
- package/web/assets/images/icon-white.png +0 -0
package/package.json
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
2
|
+
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
|
3
|
+
<svg width="100%" height="100%" viewBox="0 0 2021 1081" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
|
|
4
|
+
<g id="Artboard1" transform="matrix(1,0,0,0.637382,-465,-607.425118)">
|
|
5
|
+
<rect x="465" y="953" width="2021" height="1696" style="fill:none;"/>
|
|
6
|
+
<g transform="matrix(1,0,0,1.568918,1474.921541,2274.181449)">
|
|
7
|
+
<g transform="matrix(1,0,0,1,-996.300469,-814.404769)">
|
|
8
|
+
<clipPath id="_clip1">
|
|
9
|
+
<rect x="0" y="0" width="1992.601" height="1628.81"/>
|
|
10
|
+
</clipPath>
|
|
11
|
+
<g clip-path="url(#_clip1)">
|
|
12
|
+
<g transform="matrix(0.416667,0,0,-0.416667,-1136.565729,2841.542938)">
|
|
13
|
+
<path d="M7154,6805C7032,6778 6928,6704 6847,6586C6782,6490 6708,6322 6430,5645C6286,5293 6164,5002 6160,4997C6157,4993 6040,5175 5901,5402C5761,5629 5600,5893 5541,5987L5435,6160L5362,6160C5290,6160 5289,6160 5301,6138C5332,6081 6161,4736 6174,4722C6184,4712 6192,4724 6215,4782C6231,4822 6313,5024 6398,5230C6483,5436 6592,5702 6640,5820C6872,6385 6923,6491 7009,6583C7084,6664 7113,6674 7275,6678L7415,6682L7463,6745C7489,6779 7510,6810 7510,6814C7510,6826 7216,6819 7154,6805Z" style="fill-rule:nonzero;"/>
|
|
14
|
+
<path d="M2732,6782C2739,6772 2763,6741 2785,6712L2825,6661L2950,6660C3232,6658 3269,6611 3570,5885C3643,5709 3781,5374 3876,5141C3971,4908 4051,4715 4054,4712C4057,4709 4062,4709 4063,4711C4068,4716 4477,5385 4639,5652C4708,5766 4769,5859 4773,5857C4778,5856 4949,5524 5154,5120L5526,4385L5672,4379L5475,4767C4986,5730 4785,6120 4779,6120C4775,6120 4746,6076 4715,6023C4683,5969 4552,5754 4425,5545C4297,5336 4169,5126 4141,5078C4112,5029 4086,4990 4082,4990C4075,4990 4027,5104 3824,5600C3531,6319 3476,6443 3402,6556C3322,6680 3209,6765 3092,6789C3063,6795 2967,6800 2879,6800C2726,6800 2719,6799 2732,6782Z" style="fill-rule:nonzero;"/>
|
|
15
|
+
<path d="M6650,6779C6450,6750 6333,6657 6178,6405C6105,6285 5807,5739 5807,5725C5807,5706 5874,5600 5881,5608C5884,5612 5952,5734 6030,5880C6308,6396 6388,6521 6518,6641C6558,6678 6624,6726 6667,6748C6747,6790 6745,6793 6650,6779Z" style="fill-rule:nonzero;"/>
|
|
16
|
+
<path d="M3566,6737C3786,6616 3854,6525 4217,5860C4291,5725 4354,5612 4358,5608C4362,5604 4380,5629 4399,5664L4433,5728L4390,5806C4067,6396 3996,6513 3901,6607C3811,6697 3698,6749 3560,6765L3505,6771L3566,6737Z" style="fill-rule:nonzero;"/>
|
|
17
|
+
<path d="M6880,6741C6846,6722 6751,6629 6717,6581C6703,6559 6690,6556 6598,6547L6495,6537L6450,6472C6425,6436 6403,6402 6401,6397C6399,6391 6435,6389 6504,6392C6583,6396 6610,6395 6607,6386C6604,6380 6579,6322 6550,6257L6497,6139L6255,6143L6213,6065C6189,6022 6171,5985 6173,5984C6175,5982 6229,5975 6293,5969C6357,5962 6410,5953 6410,5948C6410,5937 6299,5678 6289,5667C6283,5660 6101,5688 6040,5705C6019,5711 5933,5551 5951,5540C5957,5535 6015,5517 6079,5499C6143,5481 6197,5465 6199,5464C6201,5462 6181,5411 6155,5351C6111,5249 6108,5238 6120,5210C6127,5194 6135,5180 6139,5180C6142,5180 6163,5224 6184,5278C6268,5486 6639,6349 6680,6430C6739,6547 6813,6652 6879,6711C6937,6763 6937,6771 6880,6741Z" style="fill-rule:nonzero;"/>
|
|
18
|
+
<path d="M3372,6681C3455,6604 3514,6516 3585,6368C3633,6265 3931,5568 4045,5290C4071,5227 4094,5173 4095,5172C4097,5170 4105,5181 4114,5198C4128,5226 4127,5232 4085,5333C4061,5392 4044,5441 4048,5443C4052,5446 4109,5464 4175,5485C4241,5505 4297,5524 4299,5526C4307,5534 4226,5690 4214,5690C4208,5690 4153,5679 4092,5665C4031,5651 3975,5640 3967,5640C3959,5640 3929,5696 3892,5784C3858,5864 3830,5931 3830,5933C3830,5936 3884,5944 3950,5950C4024,5958 4070,5967 4070,5974C4069,5980 4052,6016 4031,6054L3992,6122L3869,6121L3745,6120L3692,6237C3664,6302 3640,6361 3640,6368C3640,6377 3663,6379 3740,6374C3795,6370 3840,6371 3840,6375C3840,6379 3819,6414 3794,6451L3747,6519L3549,6533L3504,6592C3461,6647 3369,6730 3350,6730C3345,6730 3333,6733 3323,6737C3313,6741 3335,6716 3372,6681Z" style="fill-rule:nonzero;"/>
|
|
19
|
+
<path d="M4850,6155C4850,6153 4917,6024 4999,5868C5080,5712 5193,5495 5250,5385C5306,5275 5432,5030 5530,4840C5628,4650 5721,4469 5736,4438C5760,4388 5768,4380 5792,4380C5807,4380 5820,4382 5820,4385C5820,4388 5809,4413 5795,4439L5771,4488L5895,4574C5963,4622 6025,4663 6032,4665C6040,4668 6057,4649 6074,4617L6103,4565L6116,4595C6126,4621 6126,4629 6110,4655C5992,4852 5286,6024 5251,6082C5215,6143 5200,6160 5181,6160C5164,6160 5160,6156 5164,6145C5167,6137 5173,6123 5176,6115C5181,6102 5164,6100 5052,6100L4922,6100L4910,6130C4901,6151 4891,6160 4874,6160C4861,6160 4850,6158 4850,6155ZM5347,5843C5384,5778 5416,5723 5418,5719C5419,5716 5381,5710 5333,5706C5285,5702 5222,5696 5193,5693L5141,5687L5078,5811C5044,5879 5014,5938 5012,5942C5010,5946 5053,5951 5107,5953C5161,5954 5221,5957 5242,5958L5278,5960L5347,5843ZM5597,5420C5638,5352 5672,5292 5673,5287C5675,5277 5409,5199 5397,5207C5393,5209 5356,5279 5315,5360L5240,5509L5273,5515C5290,5518 5339,5527 5380,5535C5519,5560 5512,5563 5597,5420ZM5845,5003C5877,4949 5911,4891 5922,4873L5940,4842L5804,4766C5728,4724 5666,4691 5665,4692C5652,4711 5510,4991 5510,4998C5510,5008 5771,5110 5781,5104C5784,5102 5813,5057 5845,5003Z" style="fill-rule:nonzero;"/>
|
|
20
|
+
<path d="M4438,5173C4261,4877 4113,4632 4111,4627C4110,4623 4114,4606 4120,4589L4133,4559L4163,4612C4179,4641 4194,4666 4195,4668C4198,4672 4448,4501 4455,4490C4458,4485 4449,4458 4435,4430L4410,4380L4475,4380L4644,4713C4737,4895 4847,5108 4888,5184C4961,5321 4962,5324 4947,5354C4933,5383 4931,5383 4924,5365C4920,5354 4899,5313 4878,5274L4839,5202L4712,5236C4642,5255 4580,5272 4573,5274C4565,5278 4575,5305 4605,5357C4630,5400 4665,5461 4683,5493L4715,5552L4783,5546L4850,5540L4809,5622C4786,5668 4766,5706 4765,5708C4763,5710 4616,5469 4438,5173ZM4595,5054C4661,5029 4719,5005 4723,5001C4729,4995 4598,4723 4574,4692C4570,4687 4312,4829 4306,4839C4299,4850 4450,5100 4464,5100C4470,5100 4529,5079 4595,5054Z" style="fill-rule:nonzero;"/>
|
|
21
|
+
<path d="M5004,5241L4568,4379L4642,4382L4715,4385L5075,5099L5040,5170L5004,5241Z" style="fill-rule:nonzero;"/>
|
|
22
|
+
</g>
|
|
23
|
+
</g>
|
|
24
|
+
</g>
|
|
25
|
+
</g>
|
|
26
|
+
</g>
|
|
27
|
+
</svg>
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
2
|
+
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
|
3
|
+
<svg width="100%" height="100%" viewBox="0 0 2021 1081" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
|
|
4
|
+
<g id="Artboard1" transform="matrix(1,0,0,0.637382,-465,-607.425118)">
|
|
5
|
+
<rect x="465" y="953" width="2021" height="1696" style="fill:none;"/>
|
|
6
|
+
<g transform="matrix(1,0,0,1.568918,1474.921541,2274.181449)">
|
|
7
|
+
<g transform="matrix(1,0,0,1,-996.300469,-814.404769)">
|
|
8
|
+
<clipPath id="_clip1">
|
|
9
|
+
<rect x="0" y="0" width="1992.601" height="1628.81"/>
|
|
10
|
+
</clipPath>
|
|
11
|
+
<g clip-path="url(#_clip1)">
|
|
12
|
+
<g transform="matrix(0.416667,0,0,-0.416667,-1136.565729,2841.542938)">
|
|
13
|
+
<path d="M7154,6805C7032,6778 6928,6704 6847,6586C6782,6490 6708,6322 6430,5645C6286,5293 6164,5002 6160,4997C6157,4993 6040,5175 5901,5402C5761,5629 5600,5893 5541,5987L5435,6160L5362,6160C5290,6160 5289,6160 5301,6138C5332,6081 6161,4736 6174,4722C6184,4712 6192,4724 6215,4782C6231,4822 6313,5024 6398,5230C6483,5436 6592,5702 6640,5820C6872,6385 6923,6491 7009,6583C7084,6664 7113,6674 7275,6678L7415,6682L7463,6745C7489,6779 7510,6810 7510,6814C7510,6826 7216,6819 7154,6805Z" style="fill-rule:nonzero;"/>
|
|
14
|
+
<path d="M2732,6782C2739,6772 2763,6741 2785,6712L2825,6661L2950,6660C3232,6658 3269,6611 3570,5885C3643,5709 3781,5374 3876,5141C3971,4908 4051,4715 4054,4712C4057,4709 4062,4709 4063,4711C4068,4716 4477,5385 4639,5652C4708,5766 4769,5859 4773,5857C4778,5856 4949,5524 5154,5120L5526,4385L5672,4379L5475,4767C4986,5730 4785,6120 4779,6120C4775,6120 4746,6076 4715,6023C4683,5969 4552,5754 4425,5545C4297,5336 4169,5126 4141,5078C4112,5029 4086,4990 4082,4990C4075,4990 4027,5104 3824,5600C3531,6319 3476,6443 3402,6556C3322,6680 3209,6765 3092,6789C3063,6795 2967,6800 2879,6800C2726,6800 2719,6799 2732,6782Z" style="fill-rule:nonzero;"/>
|
|
15
|
+
<path d="M6650,6779C6450,6750 6333,6657 6178,6405C6105,6285 5807,5739 5807,5725C5807,5706 5874,5600 5881,5608C5884,5612 5952,5734 6030,5880C6308,6396 6388,6521 6518,6641C6558,6678 6624,6726 6667,6748C6747,6790 6745,6793 6650,6779Z" style="fill-rule:nonzero;"/>
|
|
16
|
+
<path d="M3566,6737C3786,6616 3854,6525 4217,5860C4291,5725 4354,5612 4358,5608C4362,5604 4380,5629 4399,5664L4433,5728L4390,5806C4067,6396 3996,6513 3901,6607C3811,6697 3698,6749 3560,6765L3505,6771L3566,6737Z" style="fill-rule:nonzero;"/>
|
|
17
|
+
<path d="M6880,6741C6846,6722 6751,6629 6717,6581C6703,6559 6690,6556 6598,6547L6495,6537L6450,6472C6425,6436 6403,6402 6401,6397C6399,6391 6435,6389 6504,6392C6583,6396 6610,6395 6607,6386C6604,6380 6579,6322 6550,6257L6497,6139L6255,6143L6213,6065C6189,6022 6171,5985 6173,5984C6175,5982 6229,5975 6293,5969C6357,5962 6410,5953 6410,5948C6410,5937 6299,5678 6289,5667C6283,5660 6101,5688 6040,5705C6019,5711 5933,5551 5951,5540C5957,5535 6015,5517 6079,5499C6143,5481 6197,5465 6199,5464C6201,5462 6181,5411 6155,5351C6111,5249 6108,5238 6120,5210C6127,5194 6135,5180 6139,5180C6142,5180 6163,5224 6184,5278C6268,5486 6639,6349 6680,6430C6739,6547 6813,6652 6879,6711C6937,6763 6937,6771 6880,6741Z" style="fill-rule:nonzero;"/>
|
|
18
|
+
<path d="M3372,6681C3455,6604 3514,6516 3585,6368C3633,6265 3931,5568 4045,5290C4071,5227 4094,5173 4095,5172C4097,5170 4105,5181 4114,5198C4128,5226 4127,5232 4085,5333C4061,5392 4044,5441 4048,5443C4052,5446 4109,5464 4175,5485C4241,5505 4297,5524 4299,5526C4307,5534 4226,5690 4214,5690C4208,5690 4153,5679 4092,5665C4031,5651 3975,5640 3967,5640C3959,5640 3929,5696 3892,5784C3858,5864 3830,5931 3830,5933C3830,5936 3884,5944 3950,5950C4024,5958 4070,5967 4070,5974C4069,5980 4052,6016 4031,6054L3992,6122L3869,6121L3745,6120L3692,6237C3664,6302 3640,6361 3640,6368C3640,6377 3663,6379 3740,6374C3795,6370 3840,6371 3840,6375C3840,6379 3819,6414 3794,6451L3747,6519L3549,6533L3504,6592C3461,6647 3369,6730 3350,6730C3345,6730 3333,6733 3323,6737C3313,6741 3335,6716 3372,6681Z" style="fill-rule:nonzero;"/>
|
|
19
|
+
<path d="M4850,6155C4850,6153 4917,6024 4999,5868C5080,5712 5193,5495 5250,5385C5306,5275 5432,5030 5530,4840C5628,4650 5721,4469 5736,4438C5760,4388 5768,4380 5792,4380C5807,4380 5820,4382 5820,4385C5820,4388 5809,4413 5795,4439L5771,4488L5895,4574C5963,4622 6025,4663 6032,4665C6040,4668 6057,4649 6074,4617L6103,4565L6116,4595C6126,4621 6126,4629 6110,4655C5992,4852 5286,6024 5251,6082C5215,6143 5200,6160 5181,6160C5164,6160 5160,6156 5164,6145C5167,6137 5173,6123 5176,6115C5181,6102 5164,6100 5052,6100L4922,6100L4910,6130C4901,6151 4891,6160 4874,6160C4861,6160 4850,6158 4850,6155ZM5347,5843C5384,5778 5416,5723 5418,5719C5419,5716 5381,5710 5333,5706C5285,5702 5222,5696 5193,5693L5141,5687L5078,5811C5044,5879 5014,5938 5012,5942C5010,5946 5053,5951 5107,5953C5161,5954 5221,5957 5242,5958L5278,5960L5347,5843ZM5597,5420C5638,5352 5672,5292 5673,5287C5675,5277 5409,5199 5397,5207C5393,5209 5356,5279 5315,5360L5240,5509L5273,5515C5290,5518 5339,5527 5380,5535C5519,5560 5512,5563 5597,5420ZM5845,5003C5877,4949 5911,4891 5922,4873L5940,4842L5804,4766C5728,4724 5666,4691 5665,4692C5652,4711 5510,4991 5510,4998C5510,5008 5771,5110 5781,5104C5784,5102 5813,5057 5845,5003Z" style="fill-rule:nonzero;"/>
|
|
20
|
+
<path d="M4438,5173C4261,4877 4113,4632 4111,4627C4110,4623 4114,4606 4120,4589L4133,4559L4163,4612C4179,4641 4194,4666 4195,4668C4198,4672 4448,4501 4455,4490C4458,4485 4449,4458 4435,4430L4410,4380L4475,4380L4644,4713C4737,4895 4847,5108 4888,5184C4961,5321 4962,5324 4947,5354C4933,5383 4931,5383 4924,5365C4920,5354 4899,5313 4878,5274L4839,5202L4712,5236C4642,5255 4580,5272 4573,5274C4565,5278 4575,5305 4605,5357C4630,5400 4665,5461 4683,5493L4715,5552L4783,5546L4850,5540L4809,5622C4786,5668 4766,5706 4765,5708C4763,5710 4616,5469 4438,5173ZM4595,5054C4661,5029 4719,5005 4723,5001C4729,4995 4598,4723 4574,4692C4570,4687 4312,4829 4306,4839C4299,4850 4450,5100 4464,5100C4470,5100 4529,5079 4595,5054Z" style="fill-rule:nonzero;"/>
|
|
21
|
+
<path d="M5004,5241L4568,4379L4642,4382L4715,4385L5075,5099L5040,5170L5004,5241Z" style="fill-rule:nonzero;"/>
|
|
22
|
+
</g>
|
|
23
|
+
</g>
|
|
24
|
+
</g>
|
|
25
|
+
</g>
|
|
26
|
+
</g>
|
|
27
|
+
</svg>
|
package/web/manifest.json
CHANGED
|
@@ -1,5 +1,181 @@
|
|
|
1
1
|
# Changelog - Systematic Bug Investigation Workflow
|
|
2
2
|
|
|
3
|
+
## [1.1.0-beta.4] - 2025-11-06
|
|
4
|
+
|
|
5
|
+
### 🎯 Major Enhancement: Sophisticated Code Analysis (Adapted from MR Review Workflow)
|
|
6
|
+
|
|
7
|
+
**Problem**: The codebase analysis in Phase 1 was weaker than it should be. It lacked explicit structural mapping, contracts & invariants discovery, and sophisticated call graph visualization that are essential for understanding bugs in complex codebases.
|
|
8
|
+
|
|
9
|
+
**Solution**: Added new **Phase 1a: Neighborhood, Call Graph & Contracts** analysis step, bringing total Phase 1 sub-phases from 4 to 5, and total workflow steps from 27 to 28.
|
|
10
|
+
|
|
11
|
+
### 📊 New Phase 1a: Neighborhood, Call Graph & Contracts
|
|
12
|
+
|
|
13
|
+
This new first analysis step builds the structural foundation before diving into details:
|
|
14
|
+
|
|
15
|
+
#### **1. Module Root Computation**
|
|
16
|
+
- Find nearest common ancestor of error stack trace files
|
|
17
|
+
- Clamp to package/src boundary to define investigation scope
|
|
18
|
+
- Prevents unbounded analysis across entire codebase
|
|
19
|
+
|
|
20
|
+
#### **2. Neighborhood Map**
|
|
21
|
+
- Immediate neighbors (same directory, max 8)
|
|
22
|
+
- Imports/exports directly used (max 10)
|
|
23
|
+
- Co-located tests
|
|
24
|
+
- Closest entry points (routes, endpoints, CLI commands, max 5)
|
|
25
|
+
- Provides context for what's near the failing code
|
|
26
|
+
|
|
27
|
+
#### **3. Bounded Call Graph with Small Multiples**
|
|
28
|
+
- Build call graph ≤2 hops deep per failing symbol
|
|
29
|
+
- Cap total nodes at ≤15 per symbol
|
|
30
|
+
- **HOT Path Ranking** scoring system:
|
|
31
|
+
* Error location in path: +3
|
|
32
|
+
* Entry point to path: +2
|
|
33
|
+
* Test coverage exists: +1
|
|
34
|
+
* Mentioned in ticket/error: +1
|
|
35
|
+
* Tag as HOT if score ≥3
|
|
36
|
+
- **Small Multiples ASCII visualization**:
|
|
37
|
+
* Width ≤100 chars per path
|
|
38
|
+
* Format: `EntryPoint -> Caller -> [*FailingSymbol*] -> Callee`
|
|
39
|
+
* ≤8 total paths, prioritize HOT paths
|
|
40
|
+
* Alias Legend for repeated subpaths (A1, A2...)
|
|
41
|
+
- **Adjacency Summary** fallback if caps exceeded
|
|
42
|
+
|
|
43
|
+
#### **4. Flow Anchors**
|
|
44
|
+
- Map how users/systems trigger the bug
|
|
45
|
+
- HTTP routes → handlers → failing code
|
|
46
|
+
- CLI commands → execution → failing code
|
|
47
|
+
- Scheduled jobs, event handlers → failing code
|
|
48
|
+
- Cap at ≤5 most relevant anchors
|
|
49
|
+
- **Critical**: Shows HOW the bug is reached for reproduction
|
|
50
|
+
|
|
51
|
+
#### **5. Contracts & Invariants** (NEW - Most Critical Addition)
|
|
52
|
+
- Public API symbols (exported functions/classes)
|
|
53
|
+
- API endpoints (REST/GraphQL/RPC)
|
|
54
|
+
- Database tables/collections touched
|
|
55
|
+
- Message queue topics/events
|
|
56
|
+
- **Extract stated invariants** from:
|
|
57
|
+
* JSDoc/docstrings with @invariant
|
|
58
|
+
* Assertions in code
|
|
59
|
+
* Validation logic patterns
|
|
60
|
+
* Comments describing guarantees
|
|
61
|
+
- **Why this matters**: Contracts tell us what guarantees the code MUST maintain - bugs are often broken contracts
|
|
62
|
+
|
|
63
|
+
### 📈 Benefits
|
|
64
|
+
|
|
65
|
+
1. **Structural Scaffolding**: Phase 1a provides the map before exploring terrain
|
|
66
|
+
2. **Contract-Driven Analysis**: Understanding what code promises helps identify where it breaks promises
|
|
67
|
+
3. **HOT Path Prioritization**: Focus investigation on high-impact code paths first
|
|
68
|
+
4. **Bounded Analysis**: Strict caps prevent 2-hour rabbit holes
|
|
69
|
+
5. **Entry Point Clarity**: Flow anchors show how to reproduce bugs
|
|
70
|
+
6. **Visual Call Graphs**: ASCII Small Multiples make relationships scannable
|
|
71
|
+
|
|
72
|
+
### 🏗️ Updated Phase Structure
|
|
73
|
+
|
|
74
|
+
Phase 1 now has 5 sub-phases (up from 4):
|
|
75
|
+
- **1a**: Neighborhood, Call Graph & Contracts (NEW)
|
|
76
|
+
- **1b**: Breadth Scan & Pattern Discovery (was 1a)
|
|
77
|
+
- **1c**: Component Deep Dive (was 1b)
|
|
78
|
+
- **1d**: Dependencies & Flow (was 1c)
|
|
79
|
+
- **1e**: Test Coverage (was 1d)
|
|
80
|
+
|
|
81
|
+
### 🎓 Adapted From MR Review Workflow
|
|
82
|
+
|
|
83
|
+
This enhancement adapts proven patterns from the `mr-review-workflow.json` Phase 1b:
|
|
84
|
+
- Bounded call graph with caps
|
|
85
|
+
- Small Multiples visualization
|
|
86
|
+
- HOT path ranking
|
|
87
|
+
- Alias Legend for repeated paths
|
|
88
|
+
- Adjacency Summary fallback
|
|
89
|
+
- Contracts & Invariants discovery
|
|
90
|
+
|
|
91
|
+
## [1.1.0-beta.3] - 2025-11-06
|
|
92
|
+
|
|
93
|
+
### 🚨 CRITICAL FIX: Prevent ALL Phase Skipping (Not Just Documentation)
|
|
94
|
+
|
|
95
|
+
**Problem Identified**: Agents were skipping not just the final documentation phase, but ALL investigation phases including:
|
|
96
|
+
- Hypothesis generation (Phase 2)
|
|
97
|
+
- Code analysis (Phase 1)
|
|
98
|
+
- Hypothesis verification (Phase 2b-2h)
|
|
99
|
+
- Instrumentation (Phase 3)
|
|
100
|
+
- Evidence gathering (Phase 4)
|
|
101
|
+
|
|
102
|
+
They were essentially "guessing" the bug and stopping immediately without any systematic investigation.
|
|
103
|
+
|
|
104
|
+
**Root Cause**: Agents didn't understand they are **executing a workflow** that requires repeatedly calling `workflow_next` until `isComplete=true`. They thought they could freestyle debug and stop whenever they felt confident.
|
|
105
|
+
|
|
106
|
+
### 🎯 Comprehensive Solution
|
|
107
|
+
|
|
108
|
+
#### 1. **Mandatory Workflow Execution Instructions (metaGuidance)**
|
|
109
|
+
Added prominent `🚨 MANDATORY WORKFLOW EXECUTION` section that establishes:
|
|
110
|
+
- "YOU ARE EXECUTING A STRUCTURED WORKFLOW, NOT FREESTYLE DEBUGGING"
|
|
111
|
+
- "You CANNOT 'figure out the bug' and stop"
|
|
112
|
+
- "You MUST execute all 26 workflow steps by repeatedly calling workflow_next"
|
|
113
|
+
- "DO NOT STOP CALLING WORKFLOW_NEXT: Even if you think you know the bug"
|
|
114
|
+
- Clear explanation of workflow mechanics and why this structure exists
|
|
115
|
+
|
|
116
|
+
#### 2. **Early Commitment Checkpoint (Phase 0e)**
|
|
117
|
+
Added **Phase 0e: Workflow Execution Commitment** immediately after triage:
|
|
118
|
+
- Forces agent to explicitly acknowledge they understand workflow execution requirements
|
|
119
|
+
- Lists all remaining phases they MUST complete
|
|
120
|
+
- Requires stating: "I acknowledge I am executing a structured 26-step workflow..."
|
|
121
|
+
- Requires user confirmation before proceeding to investigation phases
|
|
122
|
+
- Acts as psychological commitment device to prevent freestyle debugging
|
|
123
|
+
|
|
124
|
+
#### 3. **Evidence-Based Persuasion**
|
|
125
|
+
Reinforced the **90% error rate statistic** throughout:
|
|
126
|
+
- metaGuidance: "agents who skip systematic investigation steps are wrong ~90% of the time"
|
|
127
|
+
- Phase 0e: "stopping early leads to incorrect conclusions ~90% of the time"
|
|
128
|
+
- Phase 5b: "agents who skip final documentation are wrong ~90% of the time"
|
|
129
|
+
|
|
130
|
+
### 📊 Behavioral Impact
|
|
131
|
+
|
|
132
|
+
- **Before beta.3**: Agents could guess at bugs and stop immediately without executing any investigation phases
|
|
133
|
+
- **After beta.3**:
|
|
134
|
+
- Agents see prominent "MANDATORY WORKFLOW EXECUTION" instructions first
|
|
135
|
+
- Must acknowledge workflow commitment at Phase 0e before starting investigation
|
|
136
|
+
- User confirms agent's commitment before investigation proceeds
|
|
137
|
+
- Agent is psychologically committed to completing all phases
|
|
138
|
+
|
|
139
|
+
### 🧪 Testing Scenarios
|
|
140
|
+
|
|
141
|
+
- **Scenario 1: Agent tries to conclude after Phase 0**: Should be blocked by Phase 0e checkpoint requiring workflow commitment
|
|
142
|
+
- **Scenario 2: Agent tries to skip Phase 1-4**: metaGuidance and Phase 0e commitment should prevent this
|
|
143
|
+
- **Scenario 3: Agent tries to skip Phase 6**: Phase 5b checkpoint should catch this
|
|
144
|
+
|
|
145
|
+
### 🎭 Multi-Layered Defense
|
|
146
|
+
|
|
147
|
+
This release implements a comprehensive multi-layered defense against premature completion:
|
|
148
|
+
|
|
149
|
+
1. **Layer 1 (Prevention)**: Strong metaGuidance establishing mandatory workflow execution
|
|
150
|
+
2. **Layer 2 (Early Gate)**: Phase 0e commitment checkpoint with user confirmation
|
|
151
|
+
3. **Layer 3 (Late Gate)**: Phase 5b completion checkpoint before documentation
|
|
152
|
+
4. **Layer 4 (Evidence)**: 90% error rate statistic cited throughout
|
|
153
|
+
5. **Layer 5 (Mechanical)**: Clear explanation of workflow_next mechanics
|
|
154
|
+
|
|
155
|
+
## [1.1.0-beta.2] - 2025-11-06
|
|
156
|
+
|
|
157
|
+
### 🎯 Major Enhancements
|
|
158
|
+
- **Mandatory Completion Checkpoint with User Confirmation**: Added Phase 5b checkpoint that requires explicit user confirmation before proceeding to Phase 6 or terminating early.
|
|
159
|
+
- **Evidence-Based Persuasion**: Introduced research-backed statistic that agents who skip final documentation are wrong ~90% of the time, even with high confidence.
|
|
160
|
+
- **Forced Decision Point**: Agents must explicitly choose between completing Phase 6 (recommended) or requesting early termination.
|
|
161
|
+
- **User Gate**: Early termination requires user approval regardless of automation level, making agents less likely to ignore completion requirements.
|
|
162
|
+
- **Professional Standard Reinforcement**: Checkpoint emphasizes that proceeding to Phase 6 is the professional standard backed by 20+ years of software engineering research.
|
|
163
|
+
|
|
164
|
+
### 📚 metaGuidance Updates
|
|
165
|
+
- Added **EVIDENCE-BASED WARNING** section citing 20+ years of professional research on premature conclusions.
|
|
166
|
+
- Added **COMPLETION CHECKPOINT** section explaining the Phase 5b mandatory user confirmation requirement.
|
|
167
|
+
- Enhanced workflow discipline with research-backed rationale for completing all phases.
|
|
168
|
+
|
|
169
|
+
### 🔬 Behavioral Impact
|
|
170
|
+
- **Before**: Agents could silently skip phases based on confidence assessment alone.
|
|
171
|
+
- **After**: Agents must acknowledge the 90% error rate and get explicit user approval to skip Phase 6, creating a strong psychological and procedural barrier to premature completion.
|
|
172
|
+
- **Expected Outcome**: Dramatically reduced premature completions as agents face both research evidence and user accountability at the decision point.
|
|
173
|
+
|
|
174
|
+
### 🧪 Testing Scenarios
|
|
175
|
+
- **Scenario 1: Agent chooses to proceed**: Should state recommendation to continue, user approves, Phase 6 executes normally.
|
|
176
|
+
- **Scenario 2: Agent requests early termination**: Should acknowledge 90% error rate, request user approval, and only terminate if user explicitly approves with "terminate" response.
|
|
177
|
+
- **Scenario 3: High confidence with low patience**: User can now explicitly override agent's recommendation at the checkpoint, reinforcing their control while seeing the research-based warning.
|
|
178
|
+
|
|
3
179
|
## [1.1.0-beta.1] - 2025-11-06
|
|
4
180
|
|
|
5
181
|
### 🎯 Major Improvements
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "systematic-bug-investigation-with-loops",
|
|
3
3
|
"name": "Systematic Bug Investigation Workflow",
|
|
4
|
-
"version": "1.1.0-beta.
|
|
4
|
+
"version": "1.1.0-beta.4",
|
|
5
5
|
"description": "A comprehensive workflow for systematic bug and failing test investigation that prevents LLMs from jumping to conclusions. Enforces thorough evidence gathering, hypothesis formation, debugging instrumentation, and validation to achieve near 100% certainty about root causes. This workflow does NOT fix bugs - it produces detailed diagnostic writeups that enable effective fixing by providing complete understanding of what is happening, why it's happening, and supporting evidence.",
|
|
6
6
|
"clarificationPrompts": [
|
|
7
7
|
"What type of system is this? (web app, mobile app, backend service, desktop app, etc.)",
|
|
@@ -21,12 +21,24 @@
|
|
|
21
21
|
"Bug is reproducible with specific steps or a minimal test case"
|
|
22
22
|
],
|
|
23
23
|
"metaGuidance": [
|
|
24
|
+
"**🚨 MANDATORY WORKFLOW EXECUTION - READ THIS FIRST:**",
|
|
25
|
+
"YOU ARE EXECUTING A STRUCTURED WORKFLOW, NOT FREESTYLE DEBUGGING.",
|
|
26
|
+
"You CANNOT \"figure out the bug\" and stop. You MUST execute all 28 workflow steps by repeatedly calling workflow_next until the MCP returns isComplete=true.",
|
|
27
|
+
"WORKFLOW MECHANICS: Each call to workflow_next returns the next required step. You MUST execute that step, then call workflow_next again. Repeat until isComplete=true.",
|
|
28
|
+
"DO NOT STOP CALLING WORKFLOW_NEXT: Even if you think you know the bug, even if you have high confidence, even if it seems obvious - you MUST continue calling workflow_next.",
|
|
29
|
+
"STEP COUNTER: Every prompt shows \"Step X of 28\" - you are NOT done until you reach Step 28/28 and isComplete=true.",
|
|
30
|
+
"",
|
|
31
|
+
"**🎯 WHY THIS STRUCTURE EXISTS (Evidence-Based):**",
|
|
32
|
+
"Professional research spanning 20+ years shows agents who skip systematic investigation steps are wrong ~90% of the time, even with 9-10/10 self-reported confidence.",
|
|
33
|
+
"Quick conclusions miss: edge cases, alternative explanations, environment factors, interaction effects, and data corruption paths.",
|
|
34
|
+
"This workflow FORCES thoroughness through: code analysis, hypothesis formation, instrumentation, evidence gathering, adversarial review, and comprehensive documentation.",
|
|
35
|
+
"",
|
|
24
36
|
"**CRITICAL WORKFLOW DISCIPLINE:**",
|
|
25
37
|
"HIGH CONFIDENCE ≠ INVESTIGATION COMPLETE: Achieving 8-10/10 confidence in a hypothesis is excellent progress but does NOT mean the workflow is done.",
|
|
26
38
|
"COMPLETE ALL PHASES: You MUST complete ALL phases (0 through 6) regardless of confidence level. Each phase builds critical evidence and documentation.",
|
|
27
39
|
"WORKFLOW COMPLETION FLAG: Only set isWorkflowComplete=true when you complete Phase 6 (Comprehensive Diagnostic Writeup) AND produce the full deliverable.",
|
|
28
|
-
"DO NOT SKIP PHASES: Even with high confidence, you must complete instrumentation (Phase 3), evidence collection (Phase 4), analysis (Phase 5), and writeup (Phase 6).",
|
|
29
|
-
"PHASE PROGRESSION: An investigation that stops at hypothesis formation (Phase 2) or evidence collection (Phase 4) is INCOMPLETE - the diagnostic writeup is the required deliverable.",
|
|
40
|
+
"DO NOT SKIP PHASES: Even with high confidence, you must complete hypothesis generation (Phase 2), instrumentation (Phase 3), evidence collection (Phase 4), analysis (Phase 5), and writeup (Phase 6).",
|
|
41
|
+
"PHASE PROGRESSION: An investigation that stops at triage (Phase 0) or hypothesis formation (Phase 2) or evidence collection (Phase 4) is INCOMPLETE - the diagnostic writeup is the required deliverable.",
|
|
30
42
|
"**FUNCTION DEFINITIONS:**",
|
|
31
43
|
"fun instrumentCode(location, hypothesis) = 'Add debug logs at {location} for {hypothesis}. Format: ClassName.method [{hypothesis}]: message. Include timestamp, thread ID if concurrent.'",
|
|
32
44
|
"fun collectEvidence(hypothesis) = 'Run instrumented code, collect logs, analyze results. Score evidence quality 1-10. Document in Evidence/{hypothesis}.md.'",
|
|
@@ -144,70 +156,104 @@
|
|
|
144
156
|
],
|
|
145
157
|
"requireConfirmation": false
|
|
146
158
|
},
|
|
159
|
+
{
|
|
160
|
+
"id": "phase-0e-workflow-commitment",
|
|
161
|
+
"title": "Phase 0e: Workflow Execution Commitment",
|
|
162
|
+
"prompt": "**⚠️ WORKFLOW EXECUTION COMMITMENT CHECKPOINT ⚠️**\n\nYou have completed Phase 0 (Triage & Setup). Before proceeding to the investigation phases, you MUST acknowledge your understanding of workflow execution requirements.\n\n**CRITICAL UNDERSTANDING:**\n\n1. **This is a 26-step structured workflow, not freestyle debugging**\n - You MUST call workflow_next repeatedly until isComplete=true\n - You CANNOT stop early, even if you think you know the bug\n - You CANNOT \"figure it out\" and skip steps\n\n2. **Professional research shows 90% error rate for premature conclusions**\n - Even with 9-10/10 confidence, skipping systematic steps leads to wrong conclusions\n - Edge cases, alternative explanations, and interaction effects are missed\n - The workflow FORCES thoroughness for a reason\n\n3. **Remaining phases you MUST complete (regardless of confidence):**\n - ✅ Phase 0: Triage & Setup (COMPLETED)\n - ⏳ Phase 1: Multi-Dimensional Codebase Analysis (4 sub-phases)\n - ⏳ Phase 2: Hypothesis Formation & Validation (8 sub-phases)\n - ⏳ Phase 3: Debugging Instrumentation\n - ⏳ Phase 4: Evidence Collection & Experimentation\n - ⏳ Phase 5: Final Analysis & Confidence Assessment\n - ⏳ Phase 6: Comprehensive Diagnostic Writeup (REQUIRED DELIVERABLE)\n\n4. **What \"completion\" means:**\n - isComplete=true from workflow_next (this happens ONLY after Phase 6)\n - Comprehensive diagnostic writeup produced\n - All evidence documented\n - Investigation context finalized\n\n**REQUIRED ACKNOWLEDGMENT:**\n\nState clearly:\n\"I acknowledge that I am executing a structured 26-step workflow and I MUST call workflow_next for every step until isComplete=true. I understand that stopping early leads to incorrect conclusions ~90% of the time. I will complete all phases (0-6) regardless of my confidence level.\"\n\nThen state: \"I am ready to proceed to Phase 1: Multi-Dimensional Codebase Analysis.\"\n\n**USER**: Do you confirm the agent will follow all workflow phases?",
|
|
163
|
+
"agentRole": "You are a workflow governance specialist ensuring agents understand they are bound to execute all workflow steps systematically.",
|
|
164
|
+
"guidance": [
|
|
165
|
+
"This checkpoint prevents premature termination",
|
|
166
|
+
"Agents must explicitly acknowledge they understand the workflow structure",
|
|
167
|
+
"This is a psychological commitment device to prevent freestyle debugging",
|
|
168
|
+
"Users must confirm the agent's commitment before proceeding"
|
|
169
|
+
],
|
|
170
|
+
"requireConfirmation": true,
|
|
171
|
+
"confirmationPrompt": "The agent has acknowledged they will follow all 28 workflow steps through Phase 6. Do you approve proceeding with the systematic investigation?\n\nType 'yes' to continue, or 'no' to discuss modifications to the investigation approach."
|
|
172
|
+
},
|
|
147
173
|
{
|
|
148
174
|
"id": "phase-1-iterative-analysis",
|
|
149
175
|
"type": "loop",
|
|
150
176
|
"title": "Phase 1: Multi-Dimensional Codebase Analysis",
|
|
151
177
|
"loop": {
|
|
152
178
|
"type": "for",
|
|
153
|
-
"count":
|
|
154
|
-
"maxIterations":
|
|
179
|
+
"count": 5,
|
|
180
|
+
"maxIterations": 5,
|
|
155
181
|
"iterationVar": "analysisPhase"
|
|
156
182
|
},
|
|
157
183
|
"body": [
|
|
184
|
+
{
|
|
185
|
+
"id": "analysis-neighborhood-contracts",
|
|
186
|
+
"title": "Analysis 1/5: Neighborhood, Call Graph & Contracts",
|
|
187
|
+
"prompt": "**NEIGHBORHOOD & CONTRACTS DISCOVERY - Build Structural Foundation**\n\nGoal: Build lightweight understanding of code structure, relationships, and contracts BEFORE diving into details. This provides the scaffolding for all subsequent analysis.\n\n**STEP 1: Compute Module Root**\n- Find nearest common ancestor of error stack trace files\n- Clamp to package boundary or src/ directory\n- This defines your investigation scope\n- Set `moduleRoot` context variable\n\n**STEP 2: Neighborhood Map** (cap per file to prevent analysis paralysis)\n- For each file in error stack trace:\n - List immediate neighbors (same directory, max 8)\n - Find imports/exports directly used (max 10)\n - Locate co-located tests (same name pattern)\n - Identify closest entry points: routes, endpoints, CLI commands (max 5)\n- Produce table: File | Neighbors | Tests | Entry Points\n\n**STEP 3: Bounded Call Graph** (Small Multiples with HOT Path Ranking)\n- For each failing function/class in stack trace:\n - Build call graph ≤2 hops deep (inbound and outbound)\n - Cap total nodes at ≤15 per failing symbol\n - Score edges for HOT path ranking:\n * Error location in path: +3\n * Entry point to path: +2 \n * Test coverage exists: +1\n * Mentioned in ticket/error message: +1\n - Tag paths as HOT if score ≥3\n - Use Small Multiples ASCII visualization:\n * Width ≤100 chars per path\n * Format: `EntryPoint -> Caller -> [*FailingSymbol*] -> Callee`\n * Mark changed/failing code as `[*name*]`\n * Add HOT tag for high-impact paths\n * ≤8 total paths, prioritize HOT paths first\n - If graph exceeds caps, use Adjacency Summary instead:\n * Table: Node | Inbound | Outbound | Notes\n * Top-K by degree/frequency\n- Create Alias Legend for repeated subpaths:\n * A1 = common.validation.validateInput\n * A2 = database.connection.getPool\n * Reuse aliases across all paths\n\n**STEP 4: Flow Anchors** (Entry Points to Bug)\n- Map how users/systems trigger the bug:\n - HTTP routes → handlers → failing code\n - CLI commands → execution → failing code \n - Scheduled jobs → workers → failing code\n - Event handlers → callbacks → failing code\n- Produce table: Anchor Type | Entry Point | Target Symbol | User Action\n- Cap at ≤5 most relevant anchors\n- Note: This tells us HOW the bug is reached\n\n**STEP 5: Contracts & Invariants**\n- Within `moduleRoot` and immediate neighbors:\n - List public API symbols (exported functions/classes)\n - Document API endpoints (REST/GraphQL/RPC)\n - Identify database tables/collections touched\n - Note message queue topics/events\n - Extract stated invariants from:\n * JSDoc/docstrings with @invariant\n * Assertions in code\n * Validation logic patterns\n * Comments describing guarantees\n- Produce table: Symbol/API | Contract | Invariant | Location\n- Focus on contracts related to failing code\n\n**OUTPUT: Create StructuralAnalysis.md with:**\n- Module Root declaration\n- Neighborhood Map table\n- Bounded Call Graph (Small Multiples ASCII or Adjacency Summary)\n- Alias Legend (for call graph subpaths)\n- Flow Anchors table\n- Contracts & Invariants table\n- Self-Critique: 1-2 areas of uncertainty\n\n**CAPS (strictly enforce to prevent analysis paralysis):**\n- ≤8 neighbors per file\n- ≤10 imports per file\n- ≤5 entry points total\n- ≤15 call graph nodes per failing symbol\n- ≤8 total call graph paths\n- ≤5 flow anchors\n- ≤100 chars width for ASCII paths",
|
|
188
|
+
"agentRole": "You are a codebase navigator building structural understanding. Your focus is mapping relationships, entry points, and contracts WITHOUT diving into implementation details yet.",
|
|
189
|
+
"guidance": [
|
|
190
|
+
"This is analysis phase 1 of 5 total phases",
|
|
191
|
+
"Phase 1a = Structure - Build the map before exploring terrain",
|
|
192
|
+
"Initialize majorIssuesFound = false",
|
|
193
|
+
"STRICTLY ENFORCE CAPS - this prevents 2-hour rabbit holes",
|
|
194
|
+
"Small Multiples: Render mini ASCII path diagrams (≤6 nodes per path)",
|
|
195
|
+
"HOT Path Ranking: Score and prioritize high-impact paths",
|
|
196
|
+
"Alias Legend: Collapse repeated subpaths with deterministic aliases (A1, A2...)",
|
|
197
|
+
"Adjacency Summary: If caps exceeded, use tabular summary instead of full graph",
|
|
198
|
+
"Contracts are CRITICAL: They tell us what guarantees the code must maintain",
|
|
199
|
+
"Flow Anchors show HOW users trigger the bug - essential for reproduction",
|
|
200
|
+
"Create StructuralAnalysis.md in investigation directory",
|
|
201
|
+
"Update INVESTIGATION_CONTEXT.md with module root and structural summary",
|
|
202
|
+
"This phase provides the scaffolding for all subsequent analysis"
|
|
203
|
+
],
|
|
204
|
+
"runCondition": {"var": "analysisPhase", "equals": 1},
|
|
205
|
+
"requireConfirmation": false
|
|
206
|
+
},
|
|
158
207
|
{
|
|
159
208
|
"id": "analysis-breadth-scan",
|
|
160
|
-
"title": "Analysis
|
|
161
|
-
"prompt": "**BREADTH SCAN - Cast Wide Net**\n\nGoal: Understand full system impact
|
|
162
|
-
"agentRole": "You are performing systematic analysis phase
|
|
209
|
+
"title": "Analysis 2/5: Breadth Scan & Pattern Discovery",
|
|
210
|
+
"prompt": "**BREADTH SCAN - Cast Wide Net + Learn Expected Behavior**\n\nGoal: Understand full system impact, identify all potentially involved components, and discover existing code patterns to understand expected behavior.\n\n**PART A: Pattern Discovery (Learn How Code SHOULD Work)**\n1. **Compute Module Root**: Find nearest common ancestor of error stack trace files, clamped to package/src\n2. **Discover Patterns** (scan only moduleRoot, exclude failing files from pattern definition):\n - Naming conventions (classes, methods, variables)\n - Error handling patterns (try/catch, error propagation, logging)\n - Logging patterns (format, verbosity, error vs info vs debug)\n - Data validation patterns (where/how data is checked)\n - Test patterns (structure, naming, assertion style)\n - Require ≥2 occurrences across distinct files to qualify as pattern\n3. **Capture Pattern Catalog**: Document validated patterns with 1-3 exemplar locations (file:line)\n4. **Identify Pattern Deviations in Failing Code**: Compare failing code against pattern catalog\n\n**PART B: Error Propagation & Component Discovery**\n1. **ERROR PROPAGATION MAPPING**: Use grep_search for all error occurrences, trace error messages across log files, map stack traces to identify call chains, document every point where error appears/handled\n2. **COMPONENT DISCOVERY**: Find components interacting with failing area, use codebase_search \"How is [component] used?\", identify callers/callees, cap to top 10 most suspicious, rank by likelihood (1-10)\n3. **BOUNDED CALL GRAPH**: For failing function, build call graph ≤2 hops deep, cap at ≤15 total nodes, identify HOT paths (paths through error location), prioritize HOT paths in analysis\n4. **FLOW ANCHORS**: Map entry points (routes/endpoints/CLI commands) to failing code, cap at ≤5 anchors, note which user actions trigger the bug\n\n**PART C: Data Flow & Changes**\n1. **DATA FLOW MAPPING**: Trace data through bug area, identify transformations, persistence points, corruption opportunities - but CAP scope to moduleRoot and 2-hop neighborhood\n2. **RECENT CHANGES ANALYSIS**: Git history for identified components (last 10 commits), identify when bug appeared, related PRs/issues, config/dependency changes\n3. **HISTORICAL PATTERN SEARCH**: Use findSimilarBugs() for similar error patterns, previous fixes, related test failures\n\n**Output**: Create BreadthAnalysis.md with:\n- Pattern Catalog (validated patterns + exemplars)\n- Pattern Deviations (how failing code differs from expected patterns)\n- Bounded Call Graph (≤15 nodes, HOT paths highlighted)\n- Flow Anchors Table (entry point → failing symbol)\n- Suspicious Components (top 10, ranked 1-10)\n- Data Flow Map (scoped to moduleRoot + 2 hops)\n- Recent Changes Timeline\n- Historical Similar Bugs\n\n**Self-Critique**: List 1-2 areas where you have low confidence or missing information.",
|
|
211
|
+
"agentRole": "You are performing systematic analysis phase 2 of 5. Your focus is understanding both what IS happening (error propagation) and what SHOULD happen (pattern discovery) to identify deviations.",
|
|
163
212
|
"guidance": [
|
|
164
|
-
"This is analysis phase
|
|
165
|
-
"Phase
|
|
213
|
+
"This is analysis phase 2 of 5 total phases",
|
|
214
|
+
"Phase 1b = Breadth + Patterns - Learn expected behavior AND map error propagation",
|
|
166
215
|
"Create BreadthAnalysis.md with structured findings",
|
|
167
|
-
"
|
|
168
|
-
"
|
|
169
|
-
"
|
|
170
|
-
"
|
|
171
|
-
"
|
|
172
|
-
"
|
|
173
|
-
"
|
|
216
|
+
"CRITICAL: Discover patterns FIRST from working code, THEN compare failing code to patterns",
|
|
217
|
+
"Pattern deviations often reveal the bug (e.g., missing validation, different error handling)",
|
|
218
|
+
"Apply CAPS to prevent analysis paralysis: ≤10 components, ≤15 call graph nodes, ≤5 flow anchors, ≤2 hops",
|
|
219
|
+
"HOT PATH RANKING: Score paths by (error in path=3, entry point=2, test coverage=1); tag HOT if score≥3",
|
|
220
|
+
"BOUNDED CALL GRAPH: Use codebase_search to find callers/callees, stop at 2 hops, cap nodes, dedupe",
|
|
221
|
+
"PATTERN DISCOVERY: Require ≥2 occurrences to qualify as pattern; singletons are 'candidate conventions' only",
|
|
222
|
+
"SELF-CRITIQUE: Explicitly note 1-2 areas of uncertainty or missing information",
|
|
174
223
|
"Update INVESTIGATION_CONTEXT.md after completion",
|
|
175
|
-
"
|
|
176
|
-
"Document your reasoning for why each component is potentially involved"
|
|
224
|
+
"Use the function definitions for standardized operations"
|
|
177
225
|
],
|
|
178
|
-
"runCondition": {"var": "analysisPhase", "equals":
|
|
226
|
+
"runCondition": {"var": "analysisPhase", "equals": 2},
|
|
179
227
|
"requireConfirmation": false
|
|
180
228
|
},
|
|
181
229
|
{
|
|
182
230
|
"id": "analysis-deep-dive",
|
|
183
|
-
"title": "Analysis
|
|
184
|
-
"prompt": "**COMPONENT DEEP DIVE -
|
|
185
|
-
"agentRole": "You are performing systematic analysis phase
|
|
231
|
+
"title": "Analysis 3/5: Component Deep Dive with Hot-Path Focus",
|
|
232
|
+
"prompt": "**COMPONENT DEEP DIVE - Prioritized Investigation**\n\nGoal: Deep understanding of top 5 suspicious components from breadth scan, prioritizing HOT paths and pattern deviations.\n\n**PRIORITIZATION (from Phase 1):**\n1. Focus on components on HOT paths (score ≥3)\n2. Prioritize components with pattern deviations\n3. Rank by likelihood score from Phase 1\n4. Cap analysis to top 5 components\n\n**FOR EACH COMPONENT (recursive 3-level analysis):**\n\n**LEVEL 1 - DIRECT IMPLEMENTATION** (prioritize HOT paths and deviation areas):\n- Read complete file (or HOT path sections if file >500 lines)\n- Compare error handling against pattern catalog from Phase 1\n- Identify pattern deviations with file:line locations\n- Check state management, initialization, cleanup\n- Document invariants and assumptions\n- Note TODO/FIXME/HACK/BUG comments\n- Red flags: complex logic, missing validation, race conditions\n\n**LEVEL 2 - DIRECT DEPENDENCIES** (cap at ≤10 deps per component):\n- Follow imports on HOT paths first\n- Check dependency contracts and interfaces\n- Analyze coupling and data exchange\n- Look for shared mutable state\n- Identify circular dependencies\n- Document failure propagation paths\n\n**LEVEL 3 - INTEGRATION POINTS** (cap at ≤8 integration points):\n- External calls (DB, API, file system) - cap at ≤5\n- Concurrency/threading concerns\n- Resource management issues\n- Caching and state sync\n- Event handling and callbacks\n- Configuration dependencies\n\n**FOR EACH COMPONENT, PRODUCE:**\n- **Likelihood Score** (1-10): Weight HOT paths +3, pattern deviations +2, recent changes +1\n- **Suspicious Sections**: Specific file:line with rationale (≤5 per component)\n- **Failure Modes**: How this component could cause the observed bug (≤3 scenarios)\n- **Pattern Violations**: How it deviates from expected patterns (from Phase 1)\n- **Critical Dependencies**: Top 3 dependencies that could be sources\n\n**Output**: Create ComponentAnalysis.md with:\n- Component Rankings (1-5, sorted by likelihood score)\n- Per-Component Analysis (following structure above)\n- Pattern Violation Summary\n- Critical Path Map (which components are on HOT paths)\n- **Self-Critique**: 1-2 components you're uncertain about and why\n\n**CAPS TO PREVENT ANALYSIS PARALYSIS:**\n- Top 5 components only\n- ≤10 dependencies per component\n- ≤8 integration points per component\n- ≤5 suspicious sections per component\n- ≤3 failure modes per component",
|
|
233
|
+
"agentRole": "You are performing systematic analysis phase 3 of 5. Your focus is deep-diving into the most suspicious components, prioritizing HOT paths and pattern deviations.",
|
|
186
234
|
"guidance": [
|
|
187
|
-
"This is analysis phase
|
|
188
|
-
"Phase
|
|
189
|
-
"Build on findings from Phase 1
|
|
235
|
+
"This is analysis phase 3 of 5 total phases",
|
|
236
|
+
"Phase 1c = Deep Dive - Focus on HOT paths and pattern violations",
|
|
237
|
+
"Build on findings from Phase 1 (patterns, HOT paths, flow anchors)",
|
|
190
238
|
"Create ComponentAnalysis.md with structured findings",
|
|
191
239
|
"Use recursiveAnalysis() for systematic exploration",
|
|
192
|
-
"
|
|
193
|
-
"
|
|
194
|
-
"
|
|
195
|
-
"
|
|
196
|
-
"Update INVESTIGATION_CONTEXT.md after completion"
|
|
197
|
-
"Go deep - read entire files, not just the obvious parts",
|
|
198
|
-
"Look for subtle issues like race conditions, edge cases, and assumptions"
|
|
240
|
+
"PRIORITIZE HOT PATHS: Analyze code on HOT paths before other code",
|
|
241
|
+
"PATTERN-DRIVEN: Compare actual code against pattern catalog from Phase 1",
|
|
242
|
+
"APPLY CAPS STRICTLY: Prevents spending 2 hours reading every file",
|
|
243
|
+
"SELF-CRITIQUE: Note where you're uncertain or making assumptions",
|
|
244
|
+
"Update INVESTIGATION_CONTEXT.md after completion"
|
|
199
245
|
],
|
|
200
|
-
"runCondition": {"var": "analysisPhase", "equals":
|
|
246
|
+
"runCondition": {"var": "analysisPhase", "equals": 3},
|
|
201
247
|
"requireConfirmation": false
|
|
202
248
|
},
|
|
203
249
|
{
|
|
204
250
|
"id": "analysis-dependencies",
|
|
205
|
-
"title": "Analysis
|
|
251
|
+
"title": "Analysis 4/5: Dependencies & Flow",
|
|
206
252
|
"prompt": "**DEPENDENCY & FLOW ANALYSIS - Trace Connections**\n\nGoal: Understand how components interact and data flows between them.\n\nPerform: Static dependency graph analysis, Runtime flow analysis, Data transformation pipeline tracing, and Integration analysis.\n\n**Output**: FlowAnalysis.md with sequence diagrams showing execution flow, data flow maps with transformation points, complete dependency graph, list of all integration points and failure modes, and timeline showing order of operations.",
|
|
207
|
-
"agentRole": "You are performing systematic analysis phase
|
|
253
|
+
"agentRole": "You are performing systematic analysis phase 4 of 5. Your focus is tracing how components connect and data flows between them.",
|
|
208
254
|
"guidance": [
|
|
209
|
-
"This is analysis phase
|
|
210
|
-
"Phase
|
|
255
|
+
"This is analysis phase 4 of 5 total phases",
|
|
256
|
+
"Phase 1d = Dependencies - Trace connections and data flows",
|
|
211
257
|
"Build on component understanding from Phase 2",
|
|
212
258
|
"Create FlowAnalysis.md with diagrams and flow charts",
|
|
213
259
|
"STATIC DEPENDENCY GRAPH: Build complete import/dependency tree, identify circular dependencies, find hidden dependencies (reflection, dynamic loading, DI), map version constraints and compatibility, document shared libraries and utilities, note tight coupling or fragile dependencies",
|
|
@@ -219,17 +265,17 @@
|
|
|
219
265
|
"Pay special attention to async boundaries and error propagation",
|
|
220
266
|
"Look for implicit dependencies that aren't obvious from imports"
|
|
221
267
|
],
|
|
222
|
-
"runCondition": {"var": "analysisPhase", "equals":
|
|
268
|
+
"runCondition": {"var": "analysisPhase", "equals": 4},
|
|
223
269
|
"requireConfirmation": false
|
|
224
270
|
},
|
|
225
271
|
{
|
|
226
272
|
"id": "analysis-test-coverage",
|
|
227
|
-
"title": "Analysis
|
|
273
|
+
"title": "Analysis 5/5: Test Coverage",
|
|
228
274
|
"prompt": "**TEST COVERAGE ANALYSIS - Leverage Existing Knowledge**\n\nGoal: Use existing tests as source of truth about system behavior.\n\nFor each suspicious component, use analyzeTests(component) to perform: Direct test coverage analysis, Integration test analysis, Test history investigation, Test execution with debugging, and Coverage gap analysis.\n\n**Output**: TestAnalysis.md with coverage gaps matrix, suspicious test patterns, test evidence for hypotheses, recommendations for tests to add, and complete test inventory for affected components.",
|
|
229
|
-
"agentRole": "You are performing systematic analysis phase
|
|
275
|
+
"agentRole": "You are performing systematic analysis phase 5 of 5. Your focus is leveraging existing tests to understand expected behavior and find coverage gaps.",
|
|
230
276
|
"guidance": [
|
|
231
|
-
"This is analysis phase
|
|
232
|
-
"Phase
|
|
277
|
+
"This is analysis phase 5 of 5 total phases",
|
|
278
|
+
"Phase 1e = Tests - Analyze test coverage and quality",
|
|
233
279
|
"Build on all previous analysis phases",
|
|
234
280
|
"Create TestAnalysis.md with coverage gap matrix",
|
|
235
281
|
"DIRECT TEST COVERAGE: Find all tests using grep/test discovery, analyze what's tested (happy/edge/error cases), identify what's NOT tested, check test quality and assertion strength, note mocking/stubbing that might hide issues, review test names and docs",
|
|
@@ -242,7 +288,7 @@
|
|
|
242
288
|
"Tests often reveal the 'expected' behavior - compare with actual behavior",
|
|
243
289
|
"Missing tests often indicate areas where bugs hide"
|
|
244
290
|
],
|
|
245
|
-
"runCondition": {"var": "analysisPhase", "equals":
|
|
291
|
+
"runCondition": {"var": "analysisPhase", "equals": 5},
|
|
246
292
|
"requireConfirmation": false
|
|
247
293
|
}
|
|
248
294
|
],
|
|
@@ -628,6 +674,21 @@
|
|
|
628
674
|
],
|
|
629
675
|
"hasValidation": true
|
|
630
676
|
},
|
|
677
|
+
{
|
|
678
|
+
"id": "phase-5b-completion-checkpoint",
|
|
679
|
+
"title": "Phase 5b: Workflow Completion Checkpoint",
|
|
680
|
+
"prompt": "**WORKFLOW COMPLETION DECISION CHECKPOINT**\n\nYou have completed Phase 5a and assessed your confidence in the root cause. Before proceeding to Phase 6 (the comprehensive diagnostic writeup), you must make an explicit decision:\n\n**OPTION 1: Complete Full Investigation (STRONGLY RECOMMENDED)**\n- Proceed to Phase 6 to create the comprehensive diagnostic writeup\n- This is the professional standard for thorough bug investigations\n- Takes ~15-30 minutes but provides complete documentation\n- **Research shows:** Investigations that include full writeups have 90% fix success rate\n\n**OPTION 2: Request Early Termination (DISCOURAGED)**\n- Skip Phase 6 and end the investigation now\n- Mark the investigation as \"Incomplete - User Terminated\"\n- ⚠️ **CRITICAL WARNING - EVIDENCE-BASED:**\n - **Professional research spanning 20+ years of software engineering practice shows that agents who skip final documentation phases are wrong about the root cause ~90% of the time**\n - Even with 9-10/10 confidence, missing the comprehensive writeup leads to:\n - Incomplete context transfer to fixing developers\n - Missed edge cases and alternative explanations\n - Poor reproducibility of findings\n - Inability to prevent similar bugs in the future\n - The writeup phase often reveals gaps in reasoning that change the conclusion\n\n**DECISION REQUIRED:**\n\nIf you believe you have sufficient information and want to request early termination:\n1. State your current confidence level\n2. Acknowledge the 90% error rate for skipping documentation\n3. Request user approval to terminate early\n4. **This will require EXPLICIT USER CONFIRMATION regardless of automation level**\n\nIf you want to complete the investigation professionally:\n1. Simply state you will proceed to Phase 6\n2. No user confirmation needed\n3. Continue to the comprehensive diagnostic writeup\n\n**RECOMMENDED ACTION:** Proceed to Phase 6. The writeup takes minimal additional time but dramatically improves fix success rates and organizational learning.",
|
|
681
|
+
"agentRole": "You are a workflow governance specialist ensuring agents make informed decisions about investigation completeness with full awareness of professional research on premature conclusions.",
|
|
682
|
+
"guidance": [
|
|
683
|
+
"Present both options clearly but emphasize the professional standard",
|
|
684
|
+
"Be transparent about the 90% error rate for skipping documentation",
|
|
685
|
+
"If requesting early termination, you MUST acknowledge the risks",
|
|
686
|
+
"User confirmation is REQUIRED for early termination, even in high automation mode",
|
|
687
|
+
"Proceeding to Phase 6 is the default professional behavior"
|
|
688
|
+
],
|
|
689
|
+
"requireConfirmation": true,
|
|
690
|
+
"confirmationPrompt": "WORKFLOW COMPLETION CHECKPOINT:\n\nThe agent has assessed their confidence and is now deciding whether to:\n\nA) Proceed to Phase 6 (Comprehensive Diagnostic Writeup) - RECOMMENDED\nB) Request early termination (skip Phase 6)\n\n⚠️ EVIDENCE-BASED WARNING: Professional research shows agents who skip final documentation are wrong about the root cause ~90% of the time, even with high confidence.\n\nAgent's recommendation: {check their response above}\n\nDo you want to:\n- Type 'continue' to proceed to Phase 6 (recommended)\n- Type 'terminate' to end investigation now (discouraged)\n\nYour choice:"
|
|
691
|
+
},
|
|
631
692
|
{
|
|
632
693
|
"id": "phase-6-diagnostic-writeup",
|
|
633
694
|
"title": "Phase 6: Comprehensive Diagnostic Writeup",
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|