@exaudeus/workrail 0.8.1 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/application/app.d.ts +0 -1
- package/dist/application/app.js +0 -6
- package/dist/application/services/workflow-service.js +56 -4
- package/dist/config/feature-flags.js +8 -0
- package/dist/infrastructure/storage/file-workflow-storage.d.ts +4 -0
- package/dist/infrastructure/storage/file-workflow-storage.js +67 -26
- package/dist/mcp-server.js +0 -35
- package/package.json +1 -1
- package/workflows/bug-investigation.agentic.json +112 -0
- package/workflows/routines/context-gathering.json +149 -0
- package/workflows/routines/execution-simulation.json +84 -0
- package/workflows/routines/feature-implementation.json +119 -0
- package/workflows/routines/hypothesis-challenge.json +113 -0
- package/workflows/routines/plan-analysis.json +100 -0
- package/workflows/workflow-diagnose-environment.json +31 -0
- package/spec/mcp-compliance-summary.md +0 -211
- package/spec/mcp-protocol-handshake.md +0 -604
- package/web/DESIGN_SYSTEM_INTEGRATION.md +0 -305
- package/web/assets/images/favicon-amber-16.png +0 -0
- package/web/assets/images/favicon-amber-32.png +0 -0
- package/web/assets/images/favicon-white-16-clean.png +0 -0
- package/web/assets/images/favicon-white-32-clean.png +0 -0
- package/web/assets/images/icon-amber-192.png +0 -0
- package/web/assets/images/icon-amber-512.png +0 -0
- package/web/assets/images/icon-amber.svg +0 -27
- package/web/assets/images/icon-white-192-clean.png +0 -0
- package/web/assets/images/icon-white-512-clean.png +0 -0
- package/web/assets/images/icon-white.svg +0 -27
- package/web/examples/BEFORE_AFTER.md +0 -691
- package/workflows/IMPROVEMENTS-simplified.md +0 -124
- package/workflows/systematic-bug-investigation-simplified.backup-20251106-155300.json +0 -117
- package/workflows/systematic-bug-investigation-with-loops.backup-20251106-125543.json +0 -751
- package/workflows/systematic-bug-investigation-with-loops.backup-20251106-162241.json +0 -731
|
@@ -16,7 +16,6 @@ export declare const METHOD_NAMES: {
|
|
|
16
16
|
readonly WORKFLOW_LIST: "workflow_list";
|
|
17
17
|
readonly WORKFLOW_GET: "workflow_get";
|
|
18
18
|
readonly WORKFLOW_NEXT: "workflow_next";
|
|
19
|
-
readonly WORKFLOW_VALIDATE: "workflow_validate";
|
|
20
19
|
readonly INITIALIZE: "initialize";
|
|
21
20
|
readonly TOOLS_LIST: "tools/list";
|
|
22
21
|
readonly SHUTDOWN: "shutdown";
|
package/dist/application/app.js
CHANGED
|
@@ -70,13 +70,11 @@ const response_validator_1 = require("../validation/response-validator");
|
|
|
70
70
|
const list_workflows_1 = require("./use-cases/list-workflows");
|
|
71
71
|
const get_workflow_1 = require("./use-cases/get-workflow");
|
|
72
72
|
const get_next_step_1 = require("./use-cases/get-next-step");
|
|
73
|
-
const validate_step_output_1 = require("./use-cases/validate-step-output");
|
|
74
73
|
const simple_output_decorator_1 = require("./decorators/simple-output-decorator");
|
|
75
74
|
exports.METHOD_NAMES = {
|
|
76
75
|
WORKFLOW_LIST: 'workflow_list',
|
|
77
76
|
WORKFLOW_GET: 'workflow_get',
|
|
78
77
|
WORKFLOW_NEXT: 'workflow_next',
|
|
79
|
-
WORKFLOW_VALIDATE: 'workflow_validate',
|
|
80
78
|
INITIALIZE: 'initialize',
|
|
81
79
|
TOOLS_LIST: 'tools/list',
|
|
82
80
|
SHUTDOWN: 'shutdown'
|
|
@@ -87,7 +85,6 @@ function buildWorkflowApplication(workflowService, validator = request_validator
|
|
|
87
85
|
const listWorkflowsUseCase = (0, list_workflows_1.createListWorkflows)(workflowService);
|
|
88
86
|
const getWorkflowUseCase = (0, get_workflow_1.createGetWorkflow)(workflowService);
|
|
89
87
|
const getNextStepUseCase = (0, get_next_step_1.createGetNextStep)(workflowService);
|
|
90
|
-
const validateStepOutputUseCase = (0, validate_step_output_1.createValidateStepOutput)(workflowService);
|
|
91
88
|
app.register(exports.METHOD_NAMES.WORKFLOW_LIST, async (_params) => {
|
|
92
89
|
const workflows = await listWorkflowsUseCase();
|
|
93
90
|
return { workflows };
|
|
@@ -98,9 +95,6 @@ function buildWorkflowApplication(workflowService, validator = request_validator
|
|
|
98
95
|
app.register(exports.METHOD_NAMES.WORKFLOW_NEXT, async (params) => {
|
|
99
96
|
return getNextStepUseCase(params.workflowId, params.completedSteps || [], params.context);
|
|
100
97
|
});
|
|
101
|
-
app.register(exports.METHOD_NAMES.WORKFLOW_VALIDATE, async (params) => {
|
|
102
|
-
return validateStepOutputUseCase(params.workflowId, params.stepId, params.output);
|
|
103
|
-
});
|
|
104
98
|
app.register(exports.METHOD_NAMES.INITIALIZE, async (params) => {
|
|
105
99
|
const { initializeHandler } = await Promise.resolve().then(() => __importStar(require('../tools/mcp_initialize')));
|
|
106
100
|
return (await initializeHandler({ id: 0, params, method: 'initialize', jsonrpc: '2.0' })).result;
|
|
@@ -40,14 +40,55 @@ class DefaultWorkflowService {
|
|
|
40
40
|
const completed = [...(completedSteps || [])];
|
|
41
41
|
const enhancedContext = checkedContext;
|
|
42
42
|
const loopBodySteps = new Set();
|
|
43
|
+
const bodyStepToLoop = new Map();
|
|
43
44
|
for (const step of workflow.steps) {
|
|
44
45
|
if ((0, workflow_types_1.isLoopStep)(step)) {
|
|
45
46
|
const loopStep = step;
|
|
46
47
|
if (typeof loopStep.body === 'string') {
|
|
47
48
|
loopBodySteps.add(loopStep.body);
|
|
49
|
+
bodyStepToLoop.set(loopStep.body, loopStep);
|
|
48
50
|
}
|
|
49
51
|
else if (Array.isArray(loopStep.body)) {
|
|
50
|
-
loopStep.body.forEach(bodyStep =>
|
|
52
|
+
loopStep.body.forEach(bodyStep => {
|
|
53
|
+
loopBodySteps.add(bodyStep.id);
|
|
54
|
+
bodyStepToLoop.set(bodyStep.id, loopStep);
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
if (!enhancedContext._currentLoop) {
|
|
60
|
+
const completedLoopBodySteps = completed.filter(stepId => loopBodySteps.has(stepId));
|
|
61
|
+
if (completedLoopBodySteps.length > 0) {
|
|
62
|
+
const loopStep = bodyStepToLoop.get(completedLoopBodySteps[0]);
|
|
63
|
+
if (loopStep && !completed.includes(loopStep.id)) {
|
|
64
|
+
enhancedContext._currentLoop = {
|
|
65
|
+
loopId: loopStep.id,
|
|
66
|
+
loopStep: loopStep
|
|
67
|
+
};
|
|
68
|
+
if (!enhancedContext._loopState || !enhancedContext._loopState[loopStep.id]) {
|
|
69
|
+
const resolvedBody = this.loopStepResolver.resolveLoopBody(workflow, loopStep.body, loopStep.id);
|
|
70
|
+
let completedIterations = 0;
|
|
71
|
+
if (Array.isArray(resolvedBody)) {
|
|
72
|
+
const hasConditionalSteps = resolvedBody.some(step => step.runCondition);
|
|
73
|
+
if (hasConditionalSteps) {
|
|
74
|
+
completedIterations = completedLoopBodySteps.length;
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
completedIterations = 0;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
completedIterations = completedLoopBodySteps.length;
|
|
82
|
+
}
|
|
83
|
+
if (!enhancedContext._loopState) {
|
|
84
|
+
enhancedContext._loopState = {};
|
|
85
|
+
}
|
|
86
|
+
enhancedContext._loopState[loopStep.id] = {
|
|
87
|
+
iteration: completedIterations,
|
|
88
|
+
started: Date.now(),
|
|
89
|
+
warnings: []
|
|
90
|
+
};
|
|
91
|
+
}
|
|
51
92
|
}
|
|
52
93
|
}
|
|
53
94
|
}
|
|
@@ -55,9 +96,20 @@ class DefaultWorkflowService {
|
|
|
55
96
|
const { loopId, loopStep } = enhancedContext._currentLoop;
|
|
56
97
|
const loopContext = new loop_execution_context_1.LoopExecutionContext(loopId, loopStep.loop, enhancedContext._loopState?.[loopId]);
|
|
57
98
|
const bodyStep = this.loopStepResolver.resolveLoopBody(workflow, loopStep.body, loopStep.id);
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
99
|
+
let bodyIsCompleted;
|
|
100
|
+
if (Array.isArray(bodyStep)) {
|
|
101
|
+
const loopEnhancedContext = loopContext.injectVariables(enhancedContext, false);
|
|
102
|
+
const eligibleSteps = bodyStep.filter(step => {
|
|
103
|
+
if (!step.runCondition) {
|
|
104
|
+
return true;
|
|
105
|
+
}
|
|
106
|
+
return (0, condition_evaluator_1.evaluateCondition)(step.runCondition, loopEnhancedContext);
|
|
107
|
+
});
|
|
108
|
+
bodyIsCompleted = eligibleSteps.length === 0 || eligibleSteps.every(step => completed.includes(step.id));
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
bodyIsCompleted = completed.includes(bodyStep.id);
|
|
112
|
+
}
|
|
61
113
|
if (bodyIsCompleted) {
|
|
62
114
|
loopContext.incrementIteration();
|
|
63
115
|
if (!enhancedContext._loopState) {
|
|
@@ -27,6 +27,14 @@ exports.FEATURE_FLAG_DEFINITIONS = [
|
|
|
27
27
|
since: '0.6.0',
|
|
28
28
|
stable: true,
|
|
29
29
|
},
|
|
30
|
+
{
|
|
31
|
+
key: 'agenticRoutines',
|
|
32
|
+
envVar: 'WORKRAIL_ENABLE_AGENTIC_ROUTINES',
|
|
33
|
+
defaultValue: false,
|
|
34
|
+
description: 'Enable Agentic Orchestration features (subagent delegation, .agentic.json overrides, routines)',
|
|
35
|
+
since: '0.8.3',
|
|
36
|
+
stable: false,
|
|
37
|
+
},
|
|
30
38
|
];
|
|
31
39
|
function parseBoolean(value, defaultValue) {
|
|
32
40
|
if (value === undefined) {
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { IWorkflowStorage } from '../../types/storage';
|
|
2
2
|
import { Workflow, WorkflowSummary } from '../../types/mcp-types';
|
|
3
|
+
import { IFeatureFlagProvider } from '../../config/feature-flags';
|
|
3
4
|
interface FileWorkflowStorageOptions {
|
|
4
5
|
maxFileSizeBytes?: number;
|
|
5
6
|
cacheTTLms?: number;
|
|
6
7
|
cacheSize?: number;
|
|
7
8
|
indexCacheTTLms?: number;
|
|
9
|
+
featureFlagProvider?: IFeatureFlagProvider;
|
|
8
10
|
}
|
|
9
11
|
export declare class FileWorkflowStorage implements IWorkflowStorage {
|
|
10
12
|
private readonly baseDirReal;
|
|
@@ -13,9 +15,11 @@ export declare class FileWorkflowStorage implements IWorkflowStorage {
|
|
|
13
15
|
private readonly cacheLimit;
|
|
14
16
|
private readonly indexCacheTTL;
|
|
15
17
|
private readonly cache;
|
|
18
|
+
private readonly featureFlags;
|
|
16
19
|
private workflowIndex;
|
|
17
20
|
private indexExpires;
|
|
18
21
|
constructor(directory: string, options?: FileWorkflowStorageOptions);
|
|
22
|
+
private findJsonFiles;
|
|
19
23
|
private buildWorkflowIndex;
|
|
20
24
|
private getWorkflowIndex;
|
|
21
25
|
private loadWorkflowFromFile;
|
|
@@ -9,6 +9,7 @@ const promises_1 = __importDefault(require("fs/promises"));
|
|
|
9
9
|
const fs_1 = require("fs");
|
|
10
10
|
const path_1 = __importDefault(require("path"));
|
|
11
11
|
const error_handler_1 = require("../../core/error-handler");
|
|
12
|
+
const feature_flags_1 = require("../../config/feature-flags");
|
|
12
13
|
function sanitizeId(id) {
|
|
13
14
|
if (id.includes('\u0000')) {
|
|
14
15
|
throw new error_handler_1.SecurityError('Null byte detected in identifier', 'sanitizeId');
|
|
@@ -35,45 +36,85 @@ class FileWorkflowStorage {
|
|
|
35
36
|
this.cacheTTL = options.cacheTTLms ?? 5000;
|
|
36
37
|
this.cacheLimit = options.cacheSize ?? 100;
|
|
37
38
|
this.indexCacheTTL = options.indexCacheTTLms ?? 30000;
|
|
39
|
+
this.featureFlags = options.featureFlagProvider ?? (0, feature_flags_1.createFeatureFlagProvider)();
|
|
40
|
+
}
|
|
41
|
+
async findJsonFiles(dir) {
|
|
42
|
+
const files = [];
|
|
43
|
+
async function scan(currentDir) {
|
|
44
|
+
const entries = await promises_1.default.readdir(currentDir, { withFileTypes: true });
|
|
45
|
+
for (const entry of entries) {
|
|
46
|
+
const fullPath = path_1.default.join(currentDir, entry.name);
|
|
47
|
+
if (entry.isDirectory()) {
|
|
48
|
+
await scan(fullPath);
|
|
49
|
+
}
|
|
50
|
+
else if (entry.isFile() && entry.name.endsWith('.json')) {
|
|
51
|
+
files.push(fullPath);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
await scan(dir);
|
|
56
|
+
return files;
|
|
38
57
|
}
|
|
39
58
|
async buildWorkflowIndex() {
|
|
40
|
-
const
|
|
41
|
-
const
|
|
59
|
+
const allJsonFiles = await this.findJsonFiles(this.baseDirReal);
|
|
60
|
+
const relativeFiles = allJsonFiles.map(f => path_1.default.relative(this.baseDirReal, f));
|
|
42
61
|
const index = new Map();
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
assertWithinBase(filePathRaw, this.baseDirReal);
|
|
62
|
+
const idToFiles = new Map();
|
|
63
|
+
for (const file of relativeFiles) {
|
|
46
64
|
try {
|
|
65
|
+
if (!this.featureFlags.isEnabled('agenticRoutines')) {
|
|
66
|
+
if (file.includes('routines/') || path_1.default.basename(file).startsWith('routine-')) {
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
const filePathRaw = path_1.default.resolve(this.baseDirReal, file);
|
|
71
|
+
assertWithinBase(filePathRaw, this.baseDirReal);
|
|
47
72
|
const stats = (0, fs_1.statSync)(filePathRaw);
|
|
48
|
-
if (stats.size > this.maxFileSize)
|
|
49
|
-
console.warn(`[FileWorkflowStorage] Skipping oversized file: ${file}`);
|
|
73
|
+
if (stats.size > this.maxFileSize)
|
|
50
74
|
continue;
|
|
51
|
-
}
|
|
52
75
|
const raw = await promises_1.default.readFile(filePathRaw, 'utf-8');
|
|
53
76
|
const data = JSON.parse(raw);
|
|
54
|
-
if (!data.id)
|
|
55
|
-
console.warn(`[FileWorkflowStorage] Skipping file without id: ${file}`);
|
|
77
|
+
if (!data.id)
|
|
56
78
|
continue;
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
filename: file,
|
|
61
|
-
lastModified: stats.mtimeMs,
|
|
62
|
-
summary: {
|
|
63
|
-
id: data.id,
|
|
64
|
-
name: data.name,
|
|
65
|
-
description: data.description,
|
|
66
|
-
category: 'default',
|
|
67
|
-
version: data.version
|
|
68
|
-
}
|
|
69
|
-
};
|
|
70
|
-
index.set(data.id, entry);
|
|
79
|
+
const files = idToFiles.get(data.id) || [];
|
|
80
|
+
files.push(file);
|
|
81
|
+
idToFiles.set(data.id, files);
|
|
71
82
|
}
|
|
72
|
-
catch (
|
|
73
|
-
console.warn(`[FileWorkflowStorage] Skipping invalid file: ${file}`, err);
|
|
83
|
+
catch (e) {
|
|
74
84
|
continue;
|
|
75
85
|
}
|
|
76
86
|
}
|
|
87
|
+
for (const [id, files] of idToFiles) {
|
|
88
|
+
let selectedFile = files[0];
|
|
89
|
+
if (this.featureFlags.isEnabled('agenticRoutines')) {
|
|
90
|
+
const agenticFile = files.find(f => f.includes('.agentic.'));
|
|
91
|
+
if (agenticFile) {
|
|
92
|
+
selectedFile = agenticFile;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
const standardFile = files.find(f => !f.includes('.agentic.'));
|
|
97
|
+
if (standardFile) {
|
|
98
|
+
selectedFile = standardFile;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
const filePath = path_1.default.resolve(this.baseDirReal, selectedFile);
|
|
102
|
+
const stats = (0, fs_1.statSync)(filePath);
|
|
103
|
+
const raw = await promises_1.default.readFile(filePath, 'utf-8');
|
|
104
|
+
const data = JSON.parse(raw);
|
|
105
|
+
index.set(id, {
|
|
106
|
+
id: data.id,
|
|
107
|
+
filename: selectedFile,
|
|
108
|
+
lastModified: stats.mtimeMs,
|
|
109
|
+
summary: {
|
|
110
|
+
id: data.id,
|
|
111
|
+
name: data.name,
|
|
112
|
+
description: data.description,
|
|
113
|
+
category: 'default',
|
|
114
|
+
version: data.version
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
}
|
|
77
118
|
return index;
|
|
78
119
|
}
|
|
79
120
|
async getWorkflowIndex() {
|
package/dist/mcp-server.js
CHANGED
|
@@ -285,32 +285,6 @@ const WORKFLOW_NEXT_TOOL = {
|
|
|
285
285
|
additionalProperties: false
|
|
286
286
|
}
|
|
287
287
|
};
|
|
288
|
-
const WORKFLOW_VALIDATE_TOOL = {
|
|
289
|
-
name: "workflow_validate",
|
|
290
|
-
description: `(Optional but Recommended) Verifies the output of a step before proceeding. Use this after completing a step to check if your work is valid to prevent errors.`,
|
|
291
|
-
inputSchema: {
|
|
292
|
-
type: "object",
|
|
293
|
-
properties: {
|
|
294
|
-
workflowId: {
|
|
295
|
-
type: "string",
|
|
296
|
-
description: "The unique identifier of the workflow",
|
|
297
|
-
pattern: "^[A-Za-z0-9_-]+$"
|
|
298
|
-
},
|
|
299
|
-
stepId: {
|
|
300
|
-
type: "string",
|
|
301
|
-
description: "The unique identifier of the step to validate",
|
|
302
|
-
pattern: "^[A-Za-z0-9_-]+$"
|
|
303
|
-
},
|
|
304
|
-
output: {
|
|
305
|
-
type: "string",
|
|
306
|
-
description: "The output or result produced for this step",
|
|
307
|
-
maxLength: 10000
|
|
308
|
-
}
|
|
309
|
-
},
|
|
310
|
-
required: ["workflowId", "stepId", "output"],
|
|
311
|
-
additionalProperties: false
|
|
312
|
-
}
|
|
313
|
-
};
|
|
314
288
|
const WORKFLOW_VALIDATE_JSON_TOOL = {
|
|
315
289
|
name: "workflow_validate_json",
|
|
316
290
|
description: `Validates workflow JSON content directly without external tools. Use this tool when you need to verify that a workflow JSON file is syntactically correct and follows the proper schema.
|
|
@@ -386,7 +360,6 @@ async function runServer() {
|
|
|
386
360
|
WORKFLOW_LIST_TOOL,
|
|
387
361
|
WORKFLOW_GET_TOOL,
|
|
388
362
|
WORKFLOW_NEXT_TOOL,
|
|
389
|
-
WORKFLOW_VALIDATE_TOOL,
|
|
390
363
|
WORKFLOW_VALIDATE_JSON_TOOL,
|
|
391
364
|
WORKFLOW_GET_SCHEMA_TOOL,
|
|
392
365
|
...workflowServer.getSessionTools()
|
|
@@ -429,14 +402,6 @@ async function runServer() {
|
|
|
429
402
|
};
|
|
430
403
|
}
|
|
431
404
|
return await workflowServer.getNextStep(args['workflowId'], args['completedSteps'] || [], args['context']);
|
|
432
|
-
case "workflow_validate":
|
|
433
|
-
if (!args?.['workflowId'] || !args?.['stepId'] || !args?.['output']) {
|
|
434
|
-
return {
|
|
435
|
-
content: [{ type: "text", text: "Error: workflowId, stepId, and output parameters are required" }],
|
|
436
|
-
isError: true
|
|
437
|
-
};
|
|
438
|
-
}
|
|
439
|
-
return await workflowServer.validateStep(args['workflowId'], args['stepId'], args['output']);
|
|
440
405
|
case "workflow_validate_json":
|
|
441
406
|
if (!args?.['workflowJson']) {
|
|
442
407
|
return {
|
package/package.json
CHANGED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "bug-investigation",
|
|
3
|
+
"name": "Bug Investigation (Agentic)",
|
|
4
|
+
"version": "2.0.0",
|
|
5
|
+
"description": "Agentic-enhanced bug investigation workflow with strategic delegation to specialized subagents. Delegates context gathering, hypothesis challenge, execution simulation, and adversarial validation to optimize investigation quality and efficiency.",
|
|
6
|
+
"clarificationPrompts": [
|
|
7
|
+
"What type of system is this? (web app, backend service, CLI tool, etc.)",
|
|
8
|
+
"How reproducible is this bug? (always, sometimes, rarely)",
|
|
9
|
+
"What access do you have? (full codebase, logs, tests, etc.)"
|
|
10
|
+
],
|
|
11
|
+
"preconditions": [
|
|
12
|
+
"User has a specific bug or failing test to investigate",
|
|
13
|
+
"Agent has codebase access and can run tests/build",
|
|
14
|
+
"Bug is reproducible with specific steps"
|
|
15
|
+
],
|
|
16
|
+
"metaGuidance": [
|
|
17
|
+
"WHO YOU ARE: You are a special investigator - one of the few who has the patience, determination, and skill to find the TRUE source of bugs.",
|
|
18
|
+
"Most investigators stop at the obvious explanation. You don't. You look past red herrings, challenge assumptions, and dig until you have certainty.",
|
|
19
|
+
"YOUR MISSION: Find the REAL cause of this bug. Not the apparent cause, not the first explanation, but the actual source with evidence to prove it.",
|
|
20
|
+
"WHY THIS WORKFLOW EXISTS: It gives you a systematic process to avoid the traps that catch other investigators - jumping to conclusions, confirmation bias, surface-level analysis.",
|
|
21
|
+
"HOW IT WORKS: Each phase has two steps: First you PLAN your approach (think strategically), then you EXECUTE it (do the work).",
|
|
22
|
+
"This planning step is critical - it forces you to think about HOW you'll investigate before diving in. Better plans lead to better investigations.",
|
|
23
|
+
"THE PHASES:",
|
|
24
|
+
"Phase 0: Understand what you're investigating and set up your workspace",
|
|
25
|
+
"Phase 1: Trace how execution flows from entry point to error (follow the code path)",
|
|
26
|
+
"Phase 2: Form multiple hypotheses about what could be causing this (stay open-minded)",
|
|
27
|
+
"Phase 3: Design and add instrumentation to gather evidence (set up your surveillance)",
|
|
28
|
+
"Phase 4: Run instrumented code and collect evidence (gather proof, not assumptions)",
|
|
29
|
+
"Phase 5: Validate your conclusion rigorously (be your harshest critic)",
|
|
30
|
+
"Phase 6: Document your findings so others can understand and fix it (prove your case)",
|
|
31
|
+
"CRITICAL DISTINCTION - THEORY VS PROOF:",
|
|
32
|
+
"When you read code and think 'I found it!', you have a THEORY. Theories feel certain but are often wrong.",
|
|
33
|
+
"PROOF comes from running instrumented code, collecting evidence, ruling out alternatives, and validating rigorously.",
|
|
34
|
+
"You must complete all phases to get from theory to proof. No shortcuts, even with high confidence.",
|
|
35
|
+
"YOUR DELIVERABLE: A diagnostic writeup that proves you found the true source - complete with evidence, alternative explanations ruled out, and reproduction steps.",
|
|
36
|
+
"SUCCESS MEANS: Someone reading your writeup can fix the bug confidently because you've proven what's actually happening and why.",
|
|
37
|
+
"WORKFLOW MECHANICS: Call workflow_next to get each phase. Complete the phase (both plan and execute). Call workflow_next again. Repeat until isComplete=true."
|
|
38
|
+
],
|
|
39
|
+
"steps": [
|
|
40
|
+
{
|
|
41
|
+
"id": "phase-0-setup",
|
|
42
|
+
"title": "Phase 0: Investigation Setup (Agentic)",
|
|
43
|
+
"prompt": "**UNDERSTAND THE PROBLEM & SET UP YOUR WORKSPACE**\n\nBefore you start investigating, you need to understand what you're looking for and prepare your workspace.\n\n**DELEGATION OPPORTUNITY: Context Gathering**\n\nThis phase benefits from systematic codebase exploration. Consider delegating to your Context Researcher subagent.\n\n**DELEGATION WORK PACKAGE:**\n```\nMISSION: Gather comprehensive context about this bug\n\nTARGETS:\n- Bug description: [Insert bug description]\n- Error messages/stack traces: [Insert if available]\n- Reproduction steps: [Insert if known]\n- Affected areas: [Insert suspected files/features]\n\nROUTINE: routine-context-gathering\nDEPTH: 2 (Explore - balance breadth and detail)\n\nDELIVERABLE:\n- File structure map of affected areas\n- Key files identified (entry points, error locations, dependencies)\n- Patterns observed (architecture, data flow, error handling)\n- Initial suspects (files/functions that could be involved)\n- Gaps (missing information or unclear areas)\n```\n\n**AFTER DELEGATION:**\nReview the Context Researcher's deliverable and use it to:\n\n**Set Up Your Investigation**:\n- Create INVESTIGATION_CONTEXT.md to track your investigation\n- Document the bug description and reproduction steps\n- Note the key files and areas identified by the researcher\n- List initial assumptions you'll need to verify\n- Set up a workspace (branch or directory) if appropriate\n- Clarify any user preferences\n\n**OUTPUT**: INVESTIGATION_CONTEXT.md with:\n- Clear description of the bug\n- Reproduction steps\n- Initial information (stack traces, logs, errors)\n- Key files and areas to investigate (from context gathering)\n- Your workspace location\n- Any early assumptions to verify later\n\n**Before Proceeding**: Can you clearly explain this bug to someone else? Do you know how to reproduce it? Do you know where to start looking?",
|
|
44
|
+
"agentRole": "You are an investigative coordinator, leveraging specialized researchers to build a comprehensive understanding before diving into analysis.",
|
|
45
|
+
"requireConfirmation": false
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"id": "phase-1a-plan",
|
|
49
|
+
"title": "Phase 1A: Plan Your Investigation Approach",
|
|
50
|
+
"prompt": "**PLAN HOW YOU'LL TRACK DOWN THIS BUG**\n\nYou're about to analyze the codebase. But first, think strategically about HOW you'll investigate.\n\n**Think Through**:\n\n1. **Where does execution start?**\n - What triggers this bug? (API call, user action, test, scheduled job?)\n - Where in the code does execution begin?\n\n2. **What's your investigation strategy?**\n - Will you trace execution flow from entry to error?\n - Will you start at the error and work backwards?\n - Will you examine recent changes first?\n - How will you identify the key points to investigate?\n\n3. **What could cause you to miss the real issue?**\n - Focusing too narrowly on one area?\n - Missing indirect causes or side effects?\n - Assuming things work as documented?\n - Not checking alternative execution paths?\n\n4. **What's your analysis plan?**\n - List the sequence of investigations you'll do\n - What will you look for at each step?\n - How will you know when you understand enough?\n\n**OUTPUT**: Update INVESTIGATION_CONTEXT.md with \"Phase 1 Investigation Plan\" section:\n- Your investigation strategy\n- Sequence of steps you'll take\n- Key questions you need to answer\n- Risks you're watching out for\n\n**Self-Check**: Is your plan specific enough to follow? Does it account for the ways you might miss the real cause?",
|
|
51
|
+
"agentRole": "You are a strategic investigator planning your approach. Think before you dive in.",
|
|
52
|
+
"requireConfirmation": false
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"id": "phase-1b-execute",
|
|
56
|
+
"title": "Phase 1B: Execute Your Investigation",
|
|
57
|
+
"prompt": "**CARRY OUT YOUR INVESTIGATION PLAN**\n\nNow execute the investigation strategy you designed.\n\n**Execute Your Plan**:\n- Follow the sequence of investigations you planned\n- Trace execution flow from entry point to error\n- Track how data flows and state changes\n- Read the actual code at key points\n- Note anything suspicious or unexpected\n- Adapt your plan if you discover new information\n\n**Document As You Go**:\nCreate ExecutionFlow.md with:\n- **Entry Point**: Where execution begins\n- **Call Chain**: Step-by-step path from entry to error (with file:line)\n- **Data Flow**: How data transforms along the way\n- **State Changes**: What gets modified\n- **Suspicious Points**: Code that could be problematic\n- **Patterns**: How things normally work vs how they work in failing case\n\n**Self-Critique**:\n- Did you follow your plan or skip steps?\n- Did you actually trace the execution flow, or just read code?\n- What did you learn that surprised you?\n- What are you still uncertain about?\n- Did your plan work, or should you investigate differently?\n\n**Critical Reminder**: You're building understanding of what the code DOES. You don't have a diagnosis yet - that comes later after you form and test hypotheses.",
|
|
58
|
+
"agentRole": "You are executing your investigation plan. Stay systematic and document what you find.",
|
|
59
|
+
"requireConfirmation": false
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
"id": "phase-2a-plan",
|
|
63
|
+
"title": "Phase 2A: Plan Your Hypothesis Development",
|
|
64
|
+
"prompt": "**PLAN HOW YOU'LL FORM HYPOTHESES**\n\nBased on your investigation, you'll now develop hypotheses about what's causing the bug.\n\n**Think Through**:\n\n1. **What patterns did you notice?**\n - From your execution flow tracing, what stood out?\n - What code seemed suspicious?\n - What assumptions are baked into the code?\n\n2. **What types of causes should you consider?**\n - Logic errors in the code?\n - Data issues (wrong format, corruption, missing)?\n - Timing or race conditions?\n - Environment or configuration issues?\n - Integration problems with dependencies?\n\n3. **How will you avoid anchoring on your first idea?**\n - How many alternative hypotheses will you generate?\n - How will you challenge your initial impressions?\n - What evidence would contradict your leading theory?\n\n4. **What makes a good hypothesis?**\n - Specific enough to test\n - Explains all the symptoms\n - Has clear evidence for/against\n - Can be proven or disproven\n\n**OUTPUT**: Update INVESTIGATION_CONTEXT.md with \"Phase 2 Hypothesis Strategy\":\n- How you'll generate multiple hypotheses\n- What types of causes you'll consider\n- How you'll avoid confirmation bias\n- How you'll test your hypotheses\n\n**Self-Check**: Are you committed to generating multiple hypotheses, or are you already attached to one idea?",
|
|
65
|
+
"agentRole": "You are strategizing about hypothesis formation. Commit to staying open-minded.",
|
|
66
|
+
"requireConfirmation": false
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
"id": "phase-2b-execute",
|
|
70
|
+
"title": "Phase 2B: Develop and Prioritize Hypotheses",
|
|
71
|
+
"prompt": "**FORM MULTIPLE HYPOTHESES ABOUT THE BUG**\n\nNow generate your hypotheses following your strategy.\n\n**Generate Hypotheses**:\n\nFor each possible cause, create a hypothesis:\n\n**Hypothesis Template**:\n- **ID**: H1, H2, H3, etc.\n- **Statement**: \"The bug occurs because [specific cause]\"\n- **Evidence For**: What from your investigation supports this?\n- **Evidence Against**: What contradicts this or makes it unlikely?\n- **How to Test**: What evidence would prove/disprove this?\n- **Likelihood** (1-10): Based on current evidence\n\n**Generate 3-7 hypotheses**. Force yourself to consider alternatives even if one seems obvious.\n\n**DELEGATION OPPORTUNITY: Adversarial Challenge**\n\nOnce you have initial hypotheses, consider delegating to your Hypothesis Challenger subagent for rigorous critique.\n\n**DELEGATION WORK PACKAGE:**\n```\nMISSION: Challenge and stress-test my bug hypotheses\n\nHYPOTHESES: [Paste your 3-7 hypotheses from above]\n\nEVIDENCE:\n- ExecutionFlow.md (file reference)\n- INVESTIGATION_CONTEXT.md (file reference)\n- [Any other evidence files]\n\nROUTINE: routine-hypothesis-challenge\nRIGOR: 3 (Deep - thorough adversarial review)\n\nDELIVERABLE:\nFor each hypothesis:\n- Strengths (what evidence supports it)\n- Weaknesses (gaps, contradictions, assumptions)\n- Alternative explanations (what else could cause this)\n- Verdict (Keep/Revise/Reject/Insufficient Evidence)\n- Recommendations (how to strengthen or what to test)\n```\n\n**AFTER DELEGATION:**\nReview the Hypothesis Challenger's critique and refine your hypotheses.\n\n**Prioritize**:\nRank by:\n1. Likelihood (evidence strength after challenge)\n2. Testability (can you validate it?)\n3. Completeness (explains all symptoms?)\n\n**Plan Validation**:\nFor top 3-5 hypotheses:\n- What instrumentation would prove/disprove each?\n- What tests should you run?\n- What experiments could distinguish between them?\n\n**OUTPUT**: Create Hypotheses.md with all hypotheses, challenger feedback, rankings, and validation strategy.\n\n**🚨 CRITICAL - YOU ARE NOT DONE:**\n\nYou now have theories. You do NOT have proof.\n\nEven if H1 has 10/10 likelihood, it's based on reading code, not evidence from running code.\n\nYou MUST continue to Phase 3 (design instrumentation) and Phase 4 (collect evidence).\n\nThis is not optional. High confidence without evidence = educated guess, not diagnosis.\n\nCall workflow_next to continue.",
|
|
72
|
+
"agentRole": "You are forming competing hypotheses and subjecting them to rigorous challenge. Stay open to alternatives even if one seems obvious.",
|
|
73
|
+
"requireConfirmation": false
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"id": "phase-3a-plan",
|
|
77
|
+
"title": "Phase 3A: Design Your Instrumentation Strategy",
|
|
78
|
+
"prompt": "**PLAN HOW YOU'LL GATHER EVIDENCE**\n\nYou have hypotheses. Now design how you'll gather evidence to test them.\n\n**Think Through**:\n\n1. **What evidence would prove each hypothesis?**\n - For H1, what specific data points would confirm it?\n - For H2, what would you observe if it's correct?\n - How can you distinguish between competing hypotheses?\n\n2. **Where should you add instrumentation?**\n - What points in the execution flow are critical?\n - Where could you observe the data/state you need?\n - What's already being logged vs what do you need to add?\n\n3. **What's the right level of detail?**\n - Too much logging = noise and hard to analyze\n - Too little = gaps and missing evidence\n - How will you balance this?\n\n4. **Can you use existing tests?**\n - Are there tests you can enhance instead of adding new logging?\n - Can you modify tests to expose the state you need?\n - Should you write new targeted tests?\n\n**OUTPUT**: Update INVESTIGATION_CONTEXT.md with \"Phase 3 Instrumentation Plan\":\n- What evidence you need for each hypothesis\n- Where you'll add instrumentation (file:line)\n- What you'll log/observe at each point\n- Test scenarios you'll prepare\n- How you'll organize output to distinguish hypotheses\n\n**Self-Check**: Will this instrumentation actually give you the evidence you need? What might you miss?",
|
|
79
|
+
"agentRole": "You are designing your evidence collection strategy. Think carefully about what you need to prove.",
|
|
80
|
+
"requireConfirmation": false
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"id": "phase-3b-execute",
|
|
84
|
+
"title": "Phase 3B: Implement Your Instrumentation",
|
|
85
|
+
"prompt": "**ADD INSTRUMENTATION AND PREPARE TEST SCENARIOS**\n\nNow implement the instrumentation strategy you designed.\n\n**DELEGATION OPPORTUNITY: Execution Simulation**\n\nBefore adding instrumentation, consider simulating execution to predict outcomes and refine your strategy.\n\n**DELEGATION WORK PACKAGE:**\n```\nMISSION: Simulate execution paths for my top hypotheses\n\nHYPOTHESES: [Paste top 3-5 hypotheses]\n\nCONTEXT:\n- ExecutionFlow.md (file reference)\n- Hypotheses.md (file reference)\n- Key files: [List critical files from investigation]\n\nROUTINE: routine-execution-simulation\nMODE: trace (Detailed execution path analysis)\n\nDELIVERABLE:\nFor each hypothesis:\n- Predicted execution path (step-by-step)\n- State changes at each step\n- Where instrumentation would be most revealing\n- Expected outputs if hypothesis is correct\n- Distinguishing characteristics between hypotheses\n```\n\n**AFTER DELEGATION:**\nUse the simulation results to refine your instrumentation plan.\n\n**Implement**:\n- Add debug logging at the points identified by simulation\n- Enhance or create tests to expose necessary state\n- Add assertions to catch violations\n- Set up controlled experiments if needed\n- Label everything clearly ([H1], [H2], etc.)\n\n**Prepare Test Scenarios**:\n- Minimal reproduction case\n- Edge cases that might behave differently\n- Working scenarios for comparison\n- Variations that test specific hypotheses\n\n**OUTPUT**: Update INVESTIGATION_CONTEXT.md with:\n- List of instrumentation added (what/where/why)\n- Test scenarios prepared\n- Expected outcomes for each hypothesis (from simulation)\n- How you'll analyze results\n\n**Self-Critique**:\n- Did you add the instrumentation you planned?\n- Did you skip any because it seemed unnecessary?\n- Is your instrumentation labeled clearly?\n- Are your test scenarios sufficient?\n\n**Readiness Check**: If you run these tests, will you get the evidence you need to prove/disprove your hypotheses?",
|
|
86
|
+
"agentRole": "You are implementing your evidence collection plan with precision, informed by execution simulation. Good instrumentation is the foundation of proof.",
|
|
87
|
+
"requireConfirmation": false
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"id": "phase-4-execute",
|
|
91
|
+
"title": "Phase 4: Collect Evidence",
|
|
92
|
+
"prompt": "**RUN INSTRUMENTED CODE AND COLLECT EVIDENCE**\n\nNow run your test scenarios and collect the evidence.\n\n**Execute**:\n- Run minimal reproduction case\n- Run edge cases and variations\n- Run working scenarios for comparison\n- Capture all output (logs, errors, test results)\n\n**Organize Evidence**:\nFor each hypothesis, create Evidence_H1.md, Evidence_H2.md, etc.:\n- What did the instrumentation reveal?\n- Does behavior match predictions?\n- What unexpected findings emerged?\n- Quality rating (1-10): How strong is this evidence?\n\n**Analyze Patterns**:\n- Which hypotheses are supported by evidence?\n- Which are contradicted?\n- Are there patterns you didn't predict?\n- Do you need different instrumentation?\n- Should you form new hypotheses?\n\n**Update Hypotheses**:\nUpdate Hypotheses.md with:\n- Evidence collected for each\n- New likelihood scores based on evidence\n- Evidence quality ratings\n- New insights or remaining questions\n\n**Decision Point**:\n- Strong evidence (8+/10) for one hypothesis? → Proceed to validation\n- Need more instrumentation? → Go back and add it\n- Need to revise hypotheses? → Update them\n\nBut you're not done until you have strong evidence. Keep investigating.",
|
|
93
|
+
"agentRole": "You are collecting evidence systematically. Let the data guide you, not your assumptions.",
|
|
94
|
+
"requireConfirmation": false
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"id": "phase-5-validate",
|
|
98
|
+
"title": "Phase 5: Validate Your Conclusion",
|
|
99
|
+
"prompt": "**RIGOROUSLY VALIDATE YOUR FINDING**\n\nYou have a leading hypothesis with evidence. Now be your harshest critic.\n\n**State Your Conclusion**:\n- What hypothesis has the strongest evidence?\n- What's your confidence (1-10)?\n- What evidence supports it?\n\n**DELEGATION OPPORTUNITY: Adversarial Validation**\n\nYour conclusion needs rigorous challenge. Delegate to your Hypothesis Challenger for maximum-rigor adversarial review.\n\n**DELEGATION WORK PACKAGE:**\n```\nMISSION: Rigorously validate my bug diagnosis\n\nHYPOTHESES:\n- Leading hypothesis: [Your conclusion]\n- Alternatives considered: [List other hypotheses you ruled out]\n\nEVIDENCE:\n- Evidence_H*.md files (file references)\n- Hypotheses.md (file reference)\n- INVESTIGATION_CONTEXT.md (file reference)\n- Instrumentation output/logs (file references or inline)\n\nROUTINE: routine-hypothesis-challenge\nRIGOR: 5 (Maximum - exhaustive adversarial review)\n\nDELIVERABLE:\n- Strengths of leading hypothesis\n- Weaknesses and gaps\n- Alternative explanations not yet ruled out\n- Contradicting evidence\n- Edge cases that might break the explanation\n- Verdict with confidence assessment\n- Recommendations for additional validation\n```\n\n**AFTER DELEGATION:**\nReview the Hypothesis Challenger's adversarial critique.\n\n**If confidence < 9/10**:\n- What specific test would raise confidence?\n- What alternative should you rule out?\n- What additional evidence do you need?\n- Go collect that evidence\n\n**Final Assessment**:\nAnswer these YES/NO:\n- Does this explain all observed symptoms?\n- Have you ruled out major alternatives?\n- Can you reproduce the bug based on this understanding?\n- Would you stake your reputation on this diagnosis?\n- Is there any contradicting evidence?\n- Did the adversarial review strengthen or weaken your confidence?\n\n**OUTPUT**: ValidationReport.md with:\n- Leading hypothesis and evidence\n- Alternatives considered and ruled out\n- Adversarial review findings (from Hypothesis Challenger)\n- Final confidence score\n- Remaining uncertainties\n\n**Threshold**: 9+/10 confidence with strong evidence to proceed. If not, keep investigating.",
|
|
100
|
+
"agentRole": "You are validating your conclusion with maximum rigor, leveraging adversarial challenge to ensure you haven't missed anything.",
|
|
101
|
+
"requireConfirmation": false
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"id": "phase-6-writeup",
|
|
105
|
+
"title": "Phase 6: Prove Your Case",
|
|
106
|
+
"prompt": "**DOCUMENT YOUR INVESTIGATION - PROVE YOU FOUND THE TRUE SOURCE**\n\nYou've found the true source of the bug. Now prove it to others.\n\n**Your Task**: Create a diagnostic writeup that proves your case.\n\n**Structure**:\n\n**1. EXECUTIVE SUMMARY** (3-5 sentences)\n- What's the bug?\n- What's the true cause?\n- How confident are you? (should be 9-10/10)\n- What's the impact?\n\n**2. THE TRUE SOURCE** (detailed)\n- Explain the root cause\n- Why this causes the observed symptoms\n- Code locations (file:line)\n- Relevant code snippets\n\n**3. THE PROOF** (your evidence)\n- Key evidence that proves this diagnosis\n- How you collected it (instrumentation, tests)\n- Evidence quality and sources\n- Why alternative explanations don't fit\n\n**4. HOW TO REPRODUCE**\n- Minimal steps to reproduce\n- What to observe that confirms the diagnosis\n- Conditions required\n\n**5. YOUR INVESTIGATION**\n- What you analyzed\n- Hypotheses you tested\n- How you arrived at the conclusion\n- Key turning points\n\n**6. FIXING IT**\n- Suggested approach (conceptual)\n- Risks to consider\n- How to verify the fix\n- Tests that should be added\n\n**7. UNCERTAINTIES** (if any)\n- What you're still unsure about\n- Edge cases needing more investigation\n\n**OUTPUT**: DIAGNOSTIC_WRITEUP.md\n\n**Quality Check**:\n- Could someone fix this bug confidently from your writeup?\n- Have you proven your case with evidence?\n- Is it clear WHY this is the true source, not just a symptom?\n\n**Mission Complete**: You've tracked down the true source and proven it. Well done.",
|
|
107
|
+
"agentRole": "You are documenting your successful investigation. You found the truth - now prove it to others.",
|
|
108
|
+
"requireConfirmation": false
|
|
109
|
+
}
|
|
110
|
+
]
|
|
111
|
+
}
|
|
112
|
+
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "routine-context-gathering",
|
|
3
|
+
"name": "Context Gathering Routine",
|
|
4
|
+
"version": "2.1.0",
|
|
5
|
+
"description": "Systematic codebase exploration using an Ideate -> Plan -> Execute strategy. Configurable depth levels (0-4) allow for progressively deeper understanding.",
|
|
6
|
+
"clarificationPrompts": [
|
|
7
|
+
"What specific area or files should I investigate?",
|
|
8
|
+
"What depth level do you need? (0=Survey, 1=Scan, 2=Explore, 3=Analyze, 4=Dissect)",
|
|
9
|
+
"What are you trying to understand? (bug investigation, feature planning, refactoring, etc.)"
|
|
10
|
+
],
|
|
11
|
+
"preconditions": [
|
|
12
|
+
"Target areas are broadly identified",
|
|
13
|
+
"Depth level (0-4) is specified",
|
|
14
|
+
"Mission/goal is clear",
|
|
15
|
+
"Agent has read access to codebase"
|
|
16
|
+
],
|
|
17
|
+
"metaGuidance": [
|
|
18
|
+
"**ROUTINE PURPOSE:**",
|
|
19
|
+
"This routine performs systematic codebase investigation. It separates strategy (Ideate/Plan) from execution (Survey/Scan/Explore/etc.).",
|
|
20
|
+
"**PHASES:**",
|
|
21
|
+
"1. IDEATE: Brainstorm relevant areas and potential connections",
|
|
22
|
+
"2. PLAN: Define specific targets and search strategy",
|
|
23
|
+
"3. EXECUTE: Run the investigation at the requested depth",
|
|
24
|
+
"4. SYNTHESIZE: Combine findings into actionable insights",
|
|
25
|
+
"**CORE PRINCIPLES:**",
|
|
26
|
+
"- STRATEGY FIRST: Don't start reading files until you know WHY and WHERE",
|
|
27
|
+
"- SYSTEMATIC: Follow the plan, don't rabbit-hole",
|
|
28
|
+
"- HONEST: Note gaps and limitations",
|
|
29
|
+
"- CITED: Support findings with file:line references"
|
|
30
|
+
],
|
|
31
|
+
"steps": [
|
|
32
|
+
{
|
|
33
|
+
"id": "step-0-ideate-targets",
|
|
34
|
+
"title": "Step 0: Ideate Research Targets",
|
|
35
|
+
"prompt": "**EXPLORE POTENTIAL TARGETS**\n\nBefore diving in, brainstorm what areas of the codebase might be relevant to your mission.\n\n**YOUR MISSION:** Identify all potential areas of interest and why they might matter.\n\n**EXECUTE:**\n1. Review the Mission/Goal\n2. List all potential subsystems, modules, or directories that could be involved\n3. Consider indirect connections (shared utils, config, infrastructure)\n4. Brainstorm keywords or patterns to look for\n\n**REFLECT:**\n- Am I missing any obvious areas?\n- Are there dependencies I should check?\n- Is my scope too narrow or too broad?\n\n**WORKING NOTES:**\n- Mission Summary\n- Potential Targets (List)\n- Keywords/Patterns to search\n- Why each area matters",
|
|
36
|
+
"agentRole": "You are a scout surveying the landscape. Look for signals and potential paths.",
|
|
37
|
+
"requireConfirmation": false,
|
|
38
|
+
"guidance": [
|
|
39
|
+
"BRAINSTORM: List widely before narrowing down",
|
|
40
|
+
"KEYWORDS: Think of specific terms to grep for later",
|
|
41
|
+
"CONNECTIONS: Think about how components interact"
|
|
42
|
+
]
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"id": "step-1-plan-strategy",
|
|
46
|
+
"title": "Step 1: Plan Research Strategy",
|
|
47
|
+
"prompt": "**DEFINE RESEARCH PLAN**\n\nNow select the specific targets and define your strategy for the requested depth.\n\n**YOUR MISSION:** Create a concrete plan for your investigation.\n\n**EXECUTE:**\n1. Select the primary targets from Step 0\n2. Define the **Search Scope** (specific directories/files)\n3. Define the **Exclusions** (what to ignore, e.g., tests, legacy)\n4. Define the **Focus** for the requested depth (e.g., \"Map structure\" for D0, \"Trace logic\" for D2)\n\n**DELIVERABLE:**\nCreate `research-plan.md` with:\n- Primary Targets\n- Search Scope\n- Key Questions to Answer\n- Depth Strategy",
|
|
48
|
+
"agentRole": "You are a research lead defining the scope and method. Be specific.",
|
|
49
|
+
"requireConfirmation": false,
|
|
50
|
+
"guidance": [
|
|
51
|
+
"SCOPE: Be specific about directories (e.g., src/auth, not just 'auth')",
|
|
52
|
+
"FOCUS: Align the strategy with the requested depth level",
|
|
53
|
+
"QUESTIONS: List the specific questions you want to answer"
|
|
54
|
+
]
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"id": "step-execute-depth-0",
|
|
58
|
+
"title": "Execution: Depth 0 (Survey)",
|
|
59
|
+
"runCondition": {
|
|
60
|
+
"var": "depth",
|
|
61
|
+
"gte": 0
|
|
62
|
+
},
|
|
63
|
+
"prompt": "**EXECUTE DEPTH 0: SURVEY**\n\nExecute your research plan at Depth 0 (Map the territory).\n\n**MISSION:** Understand what exists in the target area without reading contents.\n\n**EXECUTE:**\n1. Follow `research-plan.md`\n2. Use `list_dir` on target scopes\n3. Map file structure and purpose\n4. Identify architectural patterns\n\n**WORKING NOTES:**\n- File Tree & Purpose\n- Observed Patterns\n- Suspicious Areas",
|
|
64
|
+
"agentRole": "You are a systematic cartographer mapping the territory.",
|
|
65
|
+
"requireConfirmation": false,
|
|
66
|
+
"guidance": [
|
|
67
|
+
"TOOL: Use list_dir only",
|
|
68
|
+
"CONSTRAINT: Do not read file contents yet",
|
|
69
|
+
"OUTPUT: Structural map and high-level observations"
|
|
70
|
+
]
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"id": "step-execute-depth-1",
|
|
74
|
+
"title": "Execution: Depth 1 (Scan)",
|
|
75
|
+
"runCondition": {
|
|
76
|
+
"var": "depth",
|
|
77
|
+
"gte": 1
|
|
78
|
+
},
|
|
79
|
+
"prompt": "**EXECUTE DEPTH 1: SCAN**\n\nExecute your research plan at Depth 1 (Identify components).\n\n**MISSION:** Understand components and relationships without reading implementations.\n\n**EXECUTE:**\n1. Follow `research-plan.md`\n2. Read file headers (top 50 lines)\n3. Map imports/exports and dependencies\n4. Identify architectural layers\n\n**WORKING NOTES:**\n- Component Map\n- Dependency Graph\n- Key Interfaces",
|
|
80
|
+
"agentRole": "You are an architect mapping component relationships.",
|
|
81
|
+
"requireConfirmation": false,
|
|
82
|
+
"guidance": [
|
|
83
|
+
"TOOL: read_file --limit 50 for headers",
|
|
84
|
+
"TOOL: grep for cross-references",
|
|
85
|
+
"CONSTRAINT: Don't read full implementations"
|
|
86
|
+
]
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"id": "step-execute-depth-2",
|
|
90
|
+
"title": "Execution: Depth 2 (Explore)",
|
|
91
|
+
"runCondition": {
|
|
92
|
+
"var": "depth",
|
|
93
|
+
"gte": 2
|
|
94
|
+
},
|
|
95
|
+
"prompt": "**EXECUTE DEPTH 2: EXPLORE**\n\nExecute your research plan at Depth 2 (Understand functionality).\n\n**MISSION:** Trace execution flows and understand behavior.\n\n**EXECUTE:**\n1. Follow `research-plan.md`\n2. Read function signatures and docstrings\n3. Trace key execution paths\n4. Map data flow and transformations\n\n**WORKING NOTES:**\n- Execution Flow Diagrams\n- Functional Summaries\n- Data Flow Maps",
|
|
96
|
+
"agentRole": "You are a systems analyst tracing flows and data.",
|
|
97
|
+
"requireConfirmation": false,
|
|
98
|
+
"guidance": [
|
|
99
|
+
"TOOL: Read full signatures and docstrings",
|
|
100
|
+
"TRACE: Follow the call chain",
|
|
101
|
+
"DATA: Track how data transforms"
|
|
102
|
+
]
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"id": "step-execute-depth-3",
|
|
106
|
+
"title": "Execution: Depth 3 (Analyze)",
|
|
107
|
+
"runCondition": {
|
|
108
|
+
"var": "depth",
|
|
109
|
+
"gte": 3
|
|
110
|
+
},
|
|
111
|
+
"prompt": "**EXECUTE DEPTH 3: ANALYZE**\n\nExecute your research plan at Depth 3 (Deep implementation analysis).\n\n**MISSION:** Understand exact behavior, edge cases, and potential issues.\n\n**EXECUTE:**\n1. Follow `research-plan.md`\n2. Read full implementations\n3. Analyze conditionals and loops\n4. Check for race conditions and edge cases\n\n**WORKING NOTES:**\n- Detailed Implementation Analysis\n- Edge Case Analysis\n- Potential Issues",
|
|
112
|
+
"agentRole": "You are a code reviewer looking for logic errors and edge cases.",
|
|
113
|
+
"requireConfirmation": false,
|
|
114
|
+
"guidance": [
|
|
115
|
+
"TOOL: Read full files",
|
|
116
|
+
"EDGE CASES: Check nulls, boundaries, empty states",
|
|
117
|
+
"LOGIC: Verify all conditional branches"
|
|
118
|
+
]
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
"id": "step-execute-depth-4",
|
|
122
|
+
"title": "Execution: Depth 4 (Dissect)",
|
|
123
|
+
"runCondition": {
|
|
124
|
+
"var": "depth",
|
|
125
|
+
"gte": 4
|
|
126
|
+
},
|
|
127
|
+
"prompt": "**EXECUTE DEPTH 4: DISSECT**\n\nExecute your research plan at Depth 4 (Forensic line-by-line).\n\n**MISSION:** Find every bug, vulnerability, and subtle issue.\n\n**EXECUTE:**\n1. Follow `research-plan.md`\n2. Analyze critical sections line-by-line\n3. Verify every assumption and state\n4. Check security and performance\n\n**WORKING NOTES:**\n- Line-by-Line Analysis\n- Security Vulnerabilities\n- Performance Bottlenecks",
|
|
128
|
+
"agentRole": "You are a forensic analyst performing exhaustive checks.",
|
|
129
|
+
"requireConfirmation": false,
|
|
130
|
+
"guidance": [
|
|
131
|
+
"LINE-BY-LINE: Analyze every single line",
|
|
132
|
+
"SECURITY: Check for injection, auth bypass, data leaks",
|
|
133
|
+
"PERFORMANCE: Check for N+1, loops, memory leaks"
|
|
134
|
+
]
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
"id": "step-synthesize",
|
|
138
|
+
"title": "Step 5: Synthesize Findings",
|
|
139
|
+
"prompt": "**SYNTHESIZE FINDINGS**\n\nSynthesize all your findings from the execution steps.\n\n**MISSION:** Create a clear, actionable report.\n\n**EXECUTE:**\n1. Review all Working Notes\n2. Identify key patterns and insights\n3. Structure the final deliverable\n\n**DELIVERABLE:**\nCreate `{deliverableName}`:\n- Summary\n- Detailed Findings (by depth)\n- Suspicious Points\n- Gaps & Limitations\n- Recommendations",
|
|
140
|
+
"agentRole": "You are a senior consultant synthesizing insights.",
|
|
141
|
+
"requireConfirmation": false,
|
|
142
|
+
"guidance": [
|
|
143
|
+
"SYNTHESIS: Connect the dots, don't just list facts",
|
|
144
|
+
"ACTIONABLE: Give clear next steps",
|
|
145
|
+
"CITED: Support everything with file:line refs"
|
|
146
|
+
]
|
|
147
|
+
}
|
|
148
|
+
]
|
|
149
|
+
}
|