npm - @exaudeus/workrail - Versions diffs - 0.8.0 → 0.8.2 - Mend

@exaudeus/workrail 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/dist/application/app.d.ts +0 -1
package/dist/application/app.js +0 -6
package/dist/application/services/workflow-service.js +56 -4
package/dist/mcp-server.js +0 -35
package/package.json +1 -1
package/workflows/bug-investigation.agentic.json +112 -0
package/workflows/document-creation-workflow.json +1 -1
package/workflows/documentation-update-workflow.json +1 -1
package/workflows/routines/plan-analysis.json +139 -0
package/workflows/scoped-documentation-workflow.json +252 -0
package/workflows/workflow-diagnose-environment.json +24 -0
package/spec/mcp-compliance-summary.md +0 -211
package/spec/mcp-protocol-handshake.md +0 -604
package/web/DESIGN_SYSTEM_INTEGRATION.md +0 -305
package/web/assets/images/favicon-amber-16.png +0 -0
package/web/assets/images/favicon-amber-32.png +0 -0
package/web/assets/images/favicon-white-16-clean.png +0 -0
package/web/assets/images/favicon-white-32-clean.png +0 -0
package/web/assets/images/icon-amber-192.png +0 -0
package/web/assets/images/icon-amber-512.png +0 -0
package/web/assets/images/icon-amber.svg +0 -27
package/web/assets/images/icon-white-192-clean.png +0 -0
package/web/assets/images/icon-white-512-clean.png +0 -0
package/web/assets/images/icon-white.svg +0 -27
package/web/examples/BEFORE_AFTER.md +0 -691
package/workflows/IMPROVEMENTS-simplified.md +0 -122
package/workflows/systematic-bug-investigation-simplified.backup-20251106-155300.json +0 -117
package/workflows/systematic-bug-investigation-with-loops.backup-20251106-162241.json +0 -731

package/dist/application/app.d.ts CHANGED Viewed

@@ -16,7 +16,6 @@ export declare const METHOD_NAMES: {
     readonly WORKFLOW_LIST: "workflow_list";
     readonly WORKFLOW_GET: "workflow_get";
     readonly WORKFLOW_NEXT: "workflow_next";
-    readonly WORKFLOW_VALIDATE: "workflow_validate";
     readonly INITIALIZE: "initialize";
     readonly TOOLS_LIST: "tools/list";
     readonly SHUTDOWN: "shutdown";

package/dist/application/app.js CHANGED Viewed

@@ -70,13 +70,11 @@ const response_validator_1 = require("../validation/response-validator");
 const list_workflows_1 = require("./use-cases/list-workflows");
 const get_workflow_1 = require("./use-cases/get-workflow");
 const get_next_step_1 = require("./use-cases/get-next-step");
-const validate_step_output_1 = require("./use-cases/validate-step-output");
 const simple_output_decorator_1 = require("./decorators/simple-output-decorator");
 exports.METHOD_NAMES = {
     WORKFLOW_LIST: 'workflow_list',
     WORKFLOW_GET: 'workflow_get',
     WORKFLOW_NEXT: 'workflow_next',
-    WORKFLOW_VALIDATE: 'workflow_validate',
     INITIALIZE: 'initialize',
     TOOLS_LIST: 'tools/list',
     SHUTDOWN: 'shutdown'
@@ -87,7 +85,6 @@ function buildWorkflowApplication(workflowService, validator = request_validator
     const listWorkflowsUseCase = (0, list_workflows_1.createListWorkflows)(workflowService);
     const getWorkflowUseCase = (0, get_workflow_1.createGetWorkflow)(workflowService);
     const getNextStepUseCase = (0, get_next_step_1.createGetNextStep)(workflowService);
-    const validateStepOutputUseCase = (0, validate_step_output_1.createValidateStepOutput)(workflowService);
     app.register(exports.METHOD_NAMES.WORKFLOW_LIST, async (_params) => {
         const workflows = await listWorkflowsUseCase();
         return { workflows };
@@ -98,9 +95,6 @@ function buildWorkflowApplication(workflowService, validator = request_validator
     app.register(exports.METHOD_NAMES.WORKFLOW_NEXT, async (params) => {
         return getNextStepUseCase(params.workflowId, params.completedSteps || [], params.context);
     });
-    app.register(exports.METHOD_NAMES.WORKFLOW_VALIDATE, async (params) => {
-        return validateStepOutputUseCase(params.workflowId, params.stepId, params.output);
-    });
     app.register(exports.METHOD_NAMES.INITIALIZE, async (params) => {
         const { initializeHandler } = await Promise.resolve().then(() => __importStar(require('../tools/mcp_initialize')));
         return (await initializeHandler({ id: 0, params, method: 'initialize', jsonrpc: '2.0' })).result;

package/dist/application/services/workflow-service.js CHANGED Viewed

@@ -40,14 +40,55 @@ class DefaultWorkflowService {
         const completed = [...(completedSteps || [])];
         const enhancedContext = checkedContext;
         const loopBodySteps = new Set();
+        const bodyStepToLoop = new Map();
         for (const step of workflow.steps) {
             if ((0, workflow_types_1.isLoopStep)(step)) {
                 const loopStep = step;
                 if (typeof loopStep.body === 'string') {
                     loopBodySteps.add(loopStep.body);
+                    bodyStepToLoop.set(loopStep.body, loopStep);
                 }
                 else if (Array.isArray(loopStep.body)) {
-                    loopStep.body.forEach(bodyStep => loopBodySteps.add(bodyStep.id));
+                    loopStep.body.forEach(bodyStep => {
+                        loopBodySteps.add(bodyStep.id);
+                        bodyStepToLoop.set(bodyStep.id, loopStep);
+                    });
+                }
+            }
+        }
+        if (!enhancedContext._currentLoop) {
+            const completedLoopBodySteps = completed.filter(stepId => loopBodySteps.has(stepId));
+            if (completedLoopBodySteps.length > 0) {
+                const loopStep = bodyStepToLoop.get(completedLoopBodySteps[0]);
+                if (loopStep && !completed.includes(loopStep.id)) {
+                    enhancedContext._currentLoop = {
+                        loopId: loopStep.id,
+                        loopStep: loopStep
+                    };
+                    if (!enhancedContext._loopState || !enhancedContext._loopState[loopStep.id]) {
+                        const resolvedBody = this.loopStepResolver.resolveLoopBody(workflow, loopStep.body, loopStep.id);
+                        let completedIterations = 0;
+                        if (Array.isArray(resolvedBody)) {
+                            const hasConditionalSteps = resolvedBody.some(step => step.runCondition);
+                            if (hasConditionalSteps) {
+                                completedIterations = completedLoopBodySteps.length;
+                            }
+                            else {
+                                completedIterations = 0;
+                            }
+                        }
+                        else {
+                            completedIterations = completedLoopBodySteps.length;
+                        }
+                        if (!enhancedContext._loopState) {
+                            enhancedContext._loopState = {};
+                        }
+                        enhancedContext._loopState[loopStep.id] = {
+                            iteration: completedIterations,
+                            started: Date.now(),
+                            warnings: []
+                        };
+                    }
                 }
             }
         }
@@ -55,9 +96,20 @@ class DefaultWorkflowService {
             const { loopId, loopStep } = enhancedContext._currentLoop;
             const loopContext = new loop_execution_context_1.LoopExecutionContext(loopId, loopStep.loop, enhancedContext._loopState?.[loopId]);
             const bodyStep = this.loopStepResolver.resolveLoopBody(workflow, loopStep.body, loopStep.id);
-            const bodyIsCompleted = Array.isArray(bodyStep)
-                ? bodyStep.every(step => completed.includes(step.id))
-                : completed.includes(bodyStep.id);
+            let bodyIsCompleted;
+            if (Array.isArray(bodyStep)) {
+                const loopEnhancedContext = loopContext.injectVariables(enhancedContext, false);
+                const eligibleSteps = bodyStep.filter(step => {
+                    if (!step.runCondition) {
+                        return true;
+                    }
+                    return (0, condition_evaluator_1.evaluateCondition)(step.runCondition, loopEnhancedContext);
+                });
+                bodyIsCompleted = eligibleSteps.length === 0 || eligibleSteps.every(step => completed.includes(step.id));
+            }
+            else {
+                bodyIsCompleted = completed.includes(bodyStep.id);
+            }
             if (bodyIsCompleted) {
                 loopContext.incrementIteration();
                 if (!enhancedContext._loopState) {

package/dist/mcp-server.js CHANGED Viewed

@@ -285,32 +285,6 @@ const WORKFLOW_NEXT_TOOL = {
         additionalProperties: false
     }
 };
-const WORKFLOW_VALIDATE_TOOL = {
-    name: "workflow_validate",
-    description: `(Optional but Recommended) Verifies the output of a step before proceeding. Use this after completing a step to check if your work is valid to prevent errors.`,
-    inputSchema: {
-        type: "object",
-        properties: {
-            workflowId: {
-                type: "string",
-                description: "The unique identifier of the workflow",
-                pattern: "^[A-Za-z0-9_-]+$"
-            },
-            stepId: {
-                type: "string",
-                description: "The unique identifier of the step to validate",
-                pattern: "^[A-Za-z0-9_-]+$"
-            },
-            output: {
-                type: "string",
-                description: "The output or result produced for this step",
-                maxLength: 10000
-            }
-        },
-        required: ["workflowId", "stepId", "output"],
-        additionalProperties: false
-    }
-};
 const WORKFLOW_VALIDATE_JSON_TOOL = {
     name: "workflow_validate_json",
     description: `Validates workflow JSON content directly without external tools. Use this tool when you need to verify that a workflow JSON file is syntactically correct and follows the proper schema.
@@ -386,7 +360,6 @@ async function runServer() {
             WORKFLOW_LIST_TOOL,
             WORKFLOW_GET_TOOL,
             WORKFLOW_NEXT_TOOL,
-            WORKFLOW_VALIDATE_TOOL,
             WORKFLOW_VALIDATE_JSON_TOOL,
             WORKFLOW_GET_SCHEMA_TOOL,
             ...workflowServer.getSessionTools()
@@ -429,14 +402,6 @@ async function runServer() {
                     };
                 }
                 return await workflowServer.getNextStep(args['workflowId'], args['completedSteps'] || [], args['context']);
-            case "workflow_validate":
-                if (!args?.['workflowId'] || !args?.['stepId'] || !args?.['output']) {
-                    return {
-                        content: [{ type: "text", text: "Error: workflowId, stepId, and output parameters are required" }],
-                        isError: true
-                    };
-                }
-                return await workflowServer.validateStep(args['workflowId'], args['stepId'], args['output']);
             case "workflow_validate_json":
                 if (!args?.['workflowJson']) {
                     return {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@exaudeus/workrail",
-  "version": "0.8.0",
+  "version": "0.8.2",
   "description": "MCP server for structured workflow orchestration and step-by-step task guidance",
   "license": "MIT",
   "bin": {

package/workflows/bug-investigation.agentic.json ADDED Viewed

@@ -0,0 +1,112 @@
+{
+  "id": "bug-investigation",
+  "name": "Bug Investigation (Agentic)",
+  "version": "2.0.0-beta",
+  "description": "An Agentic-enhanced bug investigation workflow. Supports Subagent Delegation for research and hypothesis generation.",
+  "clarificationPrompts": [
+    "What type of system is this? (web app, backend service, CLI tool, etc.)",
+    "How reproducible is this bug? (always, sometimes, rarely)",
+    "What access do you have? (full codebase, logs, tests, etc.)"
+  ],
+  "preconditions": [
+    "User has a specific bug or failing test to investigate",
+    "Agent has codebase access and can run tests/build",
+    "Bug is reproducible with specific steps"
+  ],
+  "metaGuidance": [
+    "WHO YOU ARE: You are a special investigator - one of the few who has the patience, determination, and skill to find the TRUE source of bugs.",
+    "Most investigators stop at the obvious explanation. You don't. You look past red herrings, challenge assumptions, and dig until you have certainty.",
+    "YOUR MISSION: Find the REAL cause of this bug. Not the apparent cause, not the first explanation, but the actual source with evidence to prove it.",
+    "WHY THIS WORKFLOW EXISTS: It gives you a systematic process to avoid the traps that catch other investigators - jumping to conclusions, confirmation bias, surface-level analysis.",
+    "HOW IT WORKS: Each phase has two steps: First you PLAN your approach (think strategically), then you EXECUTE it (do the work).",
+    "This planning step is critical - it forces you to think about HOW you'll investigate before diving in. Better plans lead to better investigations.",
+    "THE PHASES:",
+    "Phase 0: Understand what you're investigating and set up your workspace",
+    "Phase 1: Trace how execution flows from entry point to error (follow the code path)",
+    "Phase 2: Form multiple hypotheses about what could be causing this (stay open-minded)",
+    "Phase 3: Design and add instrumentation to gather evidence (set up your surveillance)",
+    "Phase 4: Run instrumented code and collect evidence (gather proof, not assumptions)",
+    "Phase 5: Validate your conclusion rigorously (be your harshest critic)",
+    "Phase 6: Document your findings so others can understand and fix it (prove your case)",
+    "CRITICAL DISTINCTION - THEORY VS PROOF:",
+    "When you read code and think 'I found it!', you have a THEORY. Theories feel certain but are often wrong.",
+    "PROOF comes from running instrumented code, collecting evidence, ruling out alternatives, and validating rigorously.",
+    "You must complete all phases to get from theory to proof. No shortcuts, even with high confidence.",
+    "YOUR DELIVERABLE: A diagnostic writeup that proves you found the true source - complete with evidence, alternative explanations ruled out, and reproduction steps.",
+    "SUCCESS MEANS: Someone reading your writeup can fix the bug confidently because you've proven what's actually happening and why.",
+    "WORKFLOW MECHANICS: Call workflow_next to get each phase. Complete the phase (both plan and execute). Call workflow_next again. Repeat until isComplete=true."
+  ],
+  "steps": [
+    {
+      "id": "phase-0-setup",
+      "title": "Phase 0: Investigation Setup (Agentic)",
+      "prompt": "**UNDERSTAND THE PROBLEM & SET UP YOUR WORKSPACE**\n\nBefore you start investigating, you need to understand what you're looking for and prepare your workspace.\n\n**CHOOSE YOUR EXECUTION PATH:**\n\n**PATH A: DELEGATE (If you have a Researcher Subagent)**\nInstruct your Researcher Subagent to execute the `routine-context-gathering` workflow. Tell them:\n> \"Run the context gathering routine. Focus on identifying the files related to this bug description: [Insert Bug Description].\"\n\n**PATH B: PROXY (If your Subagent cannot use tools)**\nAct as a proxy for your Researcher. Read the steps from `routine-context-gathering` yourself, and instruct your subagent via natural language to perform the scans.\n\n**PATH C: SOLO (If you have no Subagent)**\nPerform the setup yourself.\n\n**Your Task**: Set up everything you need for a systematic investigation.\n\n**Questions to Answer**:\n- What exactly is the reported problem?\n- What's the expected vs actual behavior?\n- How is it reproduced?\n- What error messages or symptoms exist?\n- What information do you have (logs, stack traces, etc.)?\n- What tools and access do you have?\n- What workspace do you need (branch, investigation directory)?\n\n**Set Up**:\n- Create INVESTIGATION_CONTEXT.md to track your investigation\n- Document the bug description and reproduction steps\n- Note any initial assumptions you'll need to verify\n- Set up a workspace (branch or directory) if appropriate\n- Clarify any user preferences\n\n**OUTPUT**: INVESTIGATION_CONTEXT.md with:\n- Clear description of the bug\n- Reproduction steps\n- Initial information (stack traces, logs, errors)\n- Your workspace location\n- Any early assumptions to verify later\n\n**Before Proceeding**: Can you clearly explain this bug to someone else? Do you know how to reproduce it?",
+      "agentRole": "You are beginning your investigation. Take time to understand what you're looking for before you start looking.",
+      "requireConfirmation": false
+    },
+    {
+      "id": "phase-1a-plan",
+      "title": "Phase 1A: Plan Your Investigation Approach",
+      "prompt": "**PLAN HOW YOU'LL TRACK DOWN THIS BUG**\n\nYou're about to analyze the codebase. But first, think strategically about HOW you'll investigate.\n\n**Think Through**:\n\n1. **Where does execution start?**\n   - What triggers this bug? (API call, user action, test, scheduled job?)\n   - Where in the code does execution begin?\n\n2. **What's your investigation strategy?**\n   - Will you trace execution flow from entry to error?\n   - Will you start at the error and work backwards?\n   - Will you examine recent changes first?\n   - How will you identify the key points to investigate?\n\n3. **What could cause you to miss the real issue?**\n   - Focusing too narrowly on one area?\n   - Missing indirect causes or side effects?\n   - Assuming things work as documented?\n   - Not checking alternative execution paths?\n\n4. **What's your analysis plan?**\n   - List the sequence of investigations you'll do\n   - What will you look for at each step?\n   - How will you know when you understand enough?\n\n**OUTPUT**: Update INVESTIGATION_CONTEXT.md with \"Phase 1 Investigation Plan\" section:\n- Your investigation strategy\n- Sequence of steps you'll take\n- Key questions you need to answer\n- Risks you're watching out for\n\n**Self-Check**: Is your plan specific enough to follow? Does it account for the ways you might miss the real cause?",
+      "agentRole": "You are a strategic investigator planning your approach. Think before you dive in.",
+      "requireConfirmation": false
+    },
+    {
+      "id": "phase-1b-execute",
+      "title": "Phase 1B: Execute Your Investigation",
+      "prompt": "**CARRY OUT YOUR INVESTIGATION PLAN**\n\nNow execute the investigation strategy you designed.\n\n**Execute Your Plan**:\n- Follow the sequence of investigations you planned\n- Trace execution flow from entry point to error\n- Track how data flows and state changes\n- Read the actual code at key points\n- Note anything suspicious or unexpected\n- Adapt your plan if you discover new information\n\n**Document As You Go**:\nCreate ExecutionFlow.md with:\n- **Entry Point**: Where execution begins\n- **Call Chain**: Step-by-step path from entry to error (with file:line)\n- **Data Flow**: How data transforms along the way\n- **State Changes**: What gets modified\n- **Suspicious Points**: Code that could be problematic\n- **Patterns**: How things normally work vs how they work in failing case\n\n**Self-Critique**:\n- Did you follow your plan or skip steps?\n- Did you actually trace the execution flow, or just read code?\n- What did you learn that surprised you?\n- What are you still uncertain about?\n- Did your plan work, or should you investigate differently?\n\n**Critical Reminder**: You're building understanding of what the code DOES. You don't have a diagnosis yet - that comes later after you form and test hypotheses.",
+      "agentRole": "You are executing your investigation plan. Stay systematic and document what you find.",
+      "requireConfirmation": false
+    },
+    {
+      "id": "phase-2a-plan",
+      "title": "Phase 2A: Plan Your Hypothesis Development",
+      "prompt": "**PLAN HOW YOU'LL FORM HYPOTHESES**\n\nBased on your investigation, you'll now develop hypotheses about what's causing the bug.\n\n**Think Through**:\n\n1. **What patterns did you notice?**\n   - From your execution flow tracing, what stood out?\n   - What code seemed suspicious?\n   - What assumptions are baked into the code?\n\n2. **What types of causes should you consider?**\n   - Logic errors in the code?\n   - Data issues (wrong format, corruption, missing)?\n   - Timing or race conditions?\n   - Environment or configuration issues?\n   - Integration problems with dependencies?\n\n3. **How will you avoid anchoring on your first idea?**\n   - How many alternative hypotheses will you generate?\n   - How will you challenge your initial impressions?\n   - What evidence would contradict your leading theory?\n\n4. **What makes a good hypothesis?**\n   - Specific enough to test\n   - Explains all the symptoms\n   - Has clear evidence for/against\n   - Can be proven or disproven\n\n**OUTPUT**: Update INVESTIGATION_CONTEXT.md with \"Phase 2 Hypothesis Strategy\":\n- How you'll generate multiple hypotheses\n- What types of causes you'll consider\n- How you'll avoid confirmation bias\n- How you'll test your hypotheses\n\n**Self-Check**: Are you committed to generating multiple hypotheses, or are you already attached to one idea?",
+      "agentRole": "You are strategizing about hypothesis formation. Commit to staying open-minded.",
+      "requireConfirmation": false
+    },
+    {
+      "id": "phase-2b-execute",
+      "title": "Phase 2B: Develop and Prioritize Hypotheses",
+      "prompt": "**FORM MULTIPLE HYPOTHESES ABOUT THE BUG**\n\nNow generate your hypotheses following your strategy.\n\n**Generate Hypotheses**:\n\nFor each possible cause, create a hypothesis:\n\n**Hypothesis Template**:\n- **ID**: H1, H2, H3, etc.\n- **Statement**: \"The bug occurs because [specific cause]\"\n- **Evidence For**: What from your investigation supports this?\n- **Evidence Against**: What contradicts this or makes it unlikely?\n- **How to Test**: What evidence would prove/disprove this?\n- **Likelihood** (1-10): Based on current evidence\n\n**Generate 3-7 hypotheses**. Force yourself to consider alternatives even if one seems obvious.\n\n**Prioritize**:\nRank by:\n1. Likelihood (evidence strength)\n2. Testability (can you validate it?)\n3. Completeness (explains all symptoms?)\n\n**Plan Validation**:\nFor top 3-5 hypotheses:\n- What instrumentation would prove/disprove each?\n- What tests should you run?\n- What experiments could distinguish between them?\n\n**OUTPUT**: Create Hypotheses.md with all hypotheses, rankings, and validation strategy.\n\n**🚨 CRITICAL - YOU ARE NOT DONE:**\n\nYou now have theories. You do NOT have proof.\n\nEven if H1 has 10/10 likelihood, it's based on reading code, not evidence from running code.\n\nYou MUST continue to Phase 3 (design instrumentation) and Phase 4 (collect evidence).\n\nThis is not optional. High confidence without evidence = educated guess, not diagnosis.\n\nCall workflow_next to continue.",
+      "agentRole": "You are forming competing hypotheses. Stay open to alternatives even if one seems obvious.",
+      "requireConfirmation": false
+    },
+    {
+      "id": "phase-3a-plan",
+      "title": "Phase 3A: Design Your Instrumentation Strategy",
+      "prompt": "**PLAN HOW YOU'LL GATHER EVIDENCE**\n\nYou have hypotheses. Now design how you'll gather evidence to test them.\n\n**Think Through**:\n\n1. **What evidence would prove each hypothesis?**\n   - For H1, what specific data points would confirm it?\n   - For H2, what would you observe if it's correct?\n   - How can you distinguish between competing hypotheses?\n\n2. **Where should you add instrumentation?**\n   - What points in the execution flow are critical?\n   - Where could you observe the data/state you need?\n   - What's already being logged vs what do you need to add?\n\n3. **What's the right level of detail?**\n   - Too much logging = noise and hard to analyze\n   - Too little = gaps and missing evidence\n   - How will you balance this?\n\n4. **Can you use existing tests?**\n   - Are there tests you can enhance instead of adding new logging?\n   - Can you modify tests to expose the state you need?\n   - Should you write new targeted tests?\n\n**OUTPUT**: Update INVESTIGATION_CONTEXT.md with \"Phase 3 Instrumentation Plan\":\n- What evidence you need for each hypothesis\n- Where you'll add instrumentation (file:line)\n- What you'll log/observe at each point\n- Test scenarios you'll prepare\n- How you'll organize output to distinguish hypotheses\n\n**Self-Check**: Will this instrumentation actually give you the evidence you need? What might you miss?",
+      "agentRole": "You are designing your evidence collection strategy. Think carefully about what you need to prove.",
+      "requireConfirmation": false
+    },
+    {
+      "id": "phase-3b-execute",
+      "title": "Phase 3B: Implement Your Instrumentation",
+      "prompt": "**ADD INSTRUMENTATION AND PREPARE TEST SCENARIOS**\n\nNow implement the instrumentation strategy you designed.\n\n**Implement**:\n- Add debug logging at the points you identified\n- Enhance or create tests to expose necessary state\n- Add assertions to catch violations\n- Set up controlled experiments if needed\n- Label everything clearly ([H1], [H2], etc.)\n\n**Prepare Test Scenarios**:\n- Minimal reproduction case\n- Edge cases that might behave differently\n- Working scenarios for comparison\n- Variations that test specific hypotheses\n\n**OUTPUT**: Update INVESTIGATION_CONTEXT.md with:\n- List of instrumentation added (what/where/why)\n- Test scenarios prepared\n- Expected outcomes for each hypothesis\n- How you'll analyze results\n\n**Self-Critique**:\n- Did you add the instrumentation you planned?\n- Did you skip any because it seemed unnecessary?\n- Is your instrumentation labeled clearly?\n- Are your test scenarios sufficient?\n\n**Readiness Check**: If you run these tests, will you get the evidence you need to prove/disprove your hypotheses?",
+      "agentRole": "You are implementing your evidence collection plan. Good instrumentation is the foundation of proof.",
+      "requireConfirmation": false
+    },
+    {
+      "id": "phase-4-execute",
+      "title": "Phase 4: Collect Evidence",
+      "prompt": "**RUN INSTRUMENTED CODE AND COLLECT EVIDENCE**\n\nNow run your test scenarios and collect the evidence.\n\n**Execute**:\n- Run minimal reproduction case\n- Run edge cases and variations\n- Run working scenarios for comparison\n- Capture all output (logs, errors, test results)\n\n**Organize Evidence**:\nFor each hypothesis, create Evidence_H1.md, Evidence_H2.md, etc.:\n- What did the instrumentation reveal?\n- Does behavior match predictions?\n- What unexpected findings emerged?\n- Quality rating (1-10): How strong is this evidence?\n\n**Analyze Patterns**:\n- Which hypotheses are supported by evidence?\n- Which are contradicted?\n- Are there patterns you didn't predict?\n- Do you need different instrumentation?\n- Should you form new hypotheses?\n\n**Update Hypotheses**:\nUpdate Hypotheses.md with:\n- Evidence collected for each\n- New likelihood scores based on evidence\n- Evidence quality ratings\n- New insights or remaining questions\n\n**Decision Point**:\n- Strong evidence (8+/10) for one hypothesis? → Proceed to validation\n- Need more instrumentation? → Go back and add it\n- Need to revise hypotheses? → Update them\n\nBut you're not done until you have strong evidence. Keep investigating.",
+      "agentRole": "You are collecting evidence systematically. Let the data guide you, not your assumptions.",
+      "requireConfirmation": false
+    },
+    {
+      "id": "phase-5-validate",
+      "title": "Phase 5: Validate Your Conclusion",
+      "prompt": "**RIGOROUSLY VALIDATE YOUR FINDING**\n\nYou have a leading hypothesis with evidence. Now be your harshest critic.\n\n**State Your Conclusion**:\n- What hypothesis has the strongest evidence?\n- What's your confidence (1-10)?\n- What evidence supports it?\n\n**Challenge Yourself (Adversarial Review)**:\n\n1. **Alternative Explanations**: What else could explain the evidence you collected?\n2. **Contradicting Evidence**: What evidence doesn't fit your conclusion?\n3. **Bias Check**: Are you seeing what you expect to see?\n4. **Completeness**: Does this explain ALL symptoms, or just some?\n5. **Edge Cases**: Does your explanation hold for all scenarios?\n6. **Reproducibility**: Can you reliably reproduce the bug based on your understanding?\n\n**If confidence < 9/10**:\n- What specific test would raise confidence?\n- What alternative should you rule out?\n- What additional evidence do you need?\n- Go collect that evidence\n\n**Final Assessment**:\nAnswer these YES/NO:\n- Does this explain all observed symptoms?\n- Have you ruled out major alternatives?\n- Can you reproduce the bug based on this understanding?\n- Would you stake your reputation on this diagnosis?\n- Is there any contradicting evidence?\n\n**OUTPUT**: ValidationReport.md with:\n- Leading hypothesis and evidence\n- Alternatives considered and ruled out\n- Adversarial review findings\n- Final confidence score\n- Remaining uncertainties\n\n**Threshold**: 9+/10 confidence with strong evidence to proceed. If not, keep investigating.",
+      "agentRole": "You are validating your conclusion rigorously. Be skeptical of your own findings.",
+      "requireConfirmation": false
+    },
+    {
+      "id": "phase-6-writeup",
+      "title": "Phase 6: Prove Your Case",
+      "prompt": "**DOCUMENT YOUR INVESTIGATION - PROVE YOU FOUND THE TRUE SOURCE**\n\nYou've found the true source of the bug. Now prove it to others.\n\n**Your Task**: Create a diagnostic writeup that proves your case.\n\n**Structure**:\n\n**1. EXECUTIVE SUMMARY** (3-5 sentences)\n- What's the bug?\n- What's the true cause?\n- How confident are you? (should be 9-10/10)\n- What's the impact?\n\n**2. THE TRUE SOURCE** (detailed)\n- Explain the root cause\n- Why this causes the observed symptoms\n- Code locations (file:line)\n- Relevant code snippets\n\n**3. THE PROOF** (your evidence)\n- Key evidence that proves this diagnosis\n- How you collected it (instrumentation, tests)\n- Evidence quality and sources\n- Why alternative explanations don't fit\n\n**4. HOW TO REPRODUCE**\n- Minimal steps to reproduce\n- What to observe that confirms the diagnosis\n- Conditions required\n\n**5. YOUR INVESTIGATION**\n- What you analyzed\n- Hypotheses you tested\n- How you arrived at the conclusion\n- Key turning points\n\n**6. FIXING IT**\n- Suggested approach (conceptual)\n- Risks to consider\n- How to verify the fix\n- Tests that should be added\n\n**7. UNCERTAINTIES** (if any)\n- What you're still unsure about\n- Edge cases needing more investigation\n\n**OUTPUT**: DIAGNOSTIC_WRITEUP.md\n\n**Quality Check**:\n- Could someone fix this bug confidently from your writeup?\n- Have you proven your case with evidence?\n- Is it clear WHY this is the true source, not just a symptom?\n\n**Mission Complete**: You've tracked down the true source and proven it. Well done.",
+      "agentRole": "You are documenting your successful investigation. You found the truth - now prove it to others.",
+      "requireConfirmation": false
+    }
+  ]
+}

package/workflows/document-creation-workflow.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "document-creation-workflow",
   "name": "Document Creation Workflow",
   "version": "0.0.1",
-  "description": "A comprehensive workflow for creating high-quality documentation with intelligent complexity triage, automation levels, validation criteria, context management, and systematic quality assurance. Supports README, API docs, user guides, technical specs, and custom documentation with adaptive rigor based on complexity and risk.",
+  "description": "Create BROAD or COMPREHENSIVE documentation spanning multiple components/systems. Perfect for: project READMEs, complete API documentation, user guides covering multiple features, technical specifications for systems. Uses complexity triage (Simple/Standard/Complex) to adapt rigor. For SINGLE, BOUNDED subjects (one class, one integration), use scoped-documentation-workflow instead for better scope discipline.",
   "clarificationPrompts": [
     "What type of document do you need? (README, API documentation, user guide, technical specification, or custom)",
     "What's the document's purpose and target audience?",

package/workflows/documentation-update-workflow.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "documentation-update-workflow",
   "name": "Documentation Update & Maintenance Workflow",
   "version": "1.0.0",
-  "description": "A focused workflow for updating and maintaining existing codebase documentation. Analyzes changes since last update, identifies outdated sections, and systematically refreshes documentation while preserving valuable existing content. Designed to complement the deep-documentation-workflow for ongoing documentation maintenance.",
+  "description": "UPDATE and MAINTAIN existing documentation. Analyzes Git history to detect staleness, identifies outdated sections, and systematically refreshes docs while preserving valuable content. Perfect for: refreshing docs after code changes, scheduled maintenance, addressing feedback. NOT for creating new docs - use scoped-documentation-workflow or document-creation-workflow for new documentation.",
   "clarificationPrompts": [
     "Which existing documentation needs to be updated? (provide file paths or scope description)",
     "When was this documentation last updated? (approximate date or 'unknown')",

package/workflows/routines/plan-analysis.json ADDED Viewed

@@ -0,0 +1,139 @@
+{
+  "id": "routine-plan-analysis",
+  "name": "Plan Analysis Routine",
+  "version": "1.0.0",
+  "description": "Validates implementation plans for completeness, pattern adherence, and risk. Checks against requirements, constraints, and codebase patterns. Designed for delegation to Plan Analyzer subagent or manual execution by main agent.",
+  "clarificationPrompts": [
+    "What plan should I analyze? (file reference or inline)",
+    "What requirements must the plan address?",
+    "What constraints must it follow? (patterns, rules, standards)",
+    "What context should I consider? (codebase patterns, user rules, background)"
+  ],
+  "preconditions": [
+    "Plan is available (as file or inline text)",
+    "Requirements are clearly stated",
+    "Constraints are specified (patterns, rules, standards)",
+    "Agent has read access to codebase for pattern verification"
+  ],
+  "metaGuidance": [
+    "**ROUTINE PURPOSE:**",
+    "This routine validates implementation plans against requirements, constraints, and established patterns. It identifies gaps, risks, and deviations before execution begins.",
+    "",
+    "**CORE PRINCIPLES:**",
+    "- COMPLETENESS: Verify all requirements are addressed",
+    "- COMPLIANCE: Check adherence to patterns and rules",
+    "- RISK-AWARE: Identify potential issues before they occur",
+    "- CONSTRUCTIVE: Suggest improvements, not just criticize",
+    "",
+    "**EXPECTED INPUT FORMAT:**",
+    "Plan: Can be a file reference (e.g., 'implementation-plan.md') or inline text. A plan typically includes: objectives, approach/strategy, steps/phases, dependencies, testing strategy, and success criteria.",
+    "Requirements: List of what must be accomplished",
+    "Constraints: Patterns to follow, rules to obey, standards to meet",
+    "Context: Background information, codebase patterns, user rules",
+    "",
+    "**PLAN HANDLING:**",
+    "If plan is a file reference, use read_file to load it. If plan is provided inline as text, work with it directly.",
+    "",
+    "**EXECUTION MODEL:**",
+    "This routine is designed for autonomous execution. You will receive all necessary context upfront. Your role is to validate thoroughly and identify issues."
+  ],
+  "steps": [
+    {
+      "id": "step-0-understand-plan",
+      "title": "Step 0: Understand the Plan",
+      "prompt": "**UNDERSTAND THE PLAN** (5-10 min) - Read and comprehend\n\n**YOUR MISSION:** Thoroughly understand what the plan proposes to do.\n\n**PLAN YOUR APPROACH:**\nBefore analyzing, think:\n- What is the plan trying to accomplish?\n- What are the major components or phases?\n- What's the overall strategy?\n\n**EXECUTE:**\n1. Read the plan (use read_file if it's a file reference)\n2. Identify the main objectives\n3. Break down into major components or phases\n4. Note the proposed approach/strategy\n5. Identify any dependencies or prerequisites\n\n**REFLECT:**\nAs you read, ask yourself:\n- What is this plan really trying to achieve?\n- What's the high-level strategy?\n- What are the major steps or phases?\n- Are there dependencies between components?\n- Is the plan clear and well-structured?\n\n**WORKING NOTES:**\nCapture your understanding:\n- Main objectives (what's being accomplished)\n- Major components/phases (how it's structured)\n- Proposed strategy (the approach)\n- Dependencies (what depends on what)\n- Initial impressions (clarity, structure, completeness)",
+      "agentRole": "You are a meticulous plan analyst who ensures complete understanding before evaluation.",
+      "guidance": [
+        "READING STRATEGY: If plan is a file reference, use read_file. If inline, parse the text carefully",
+        "OBJECTIVE EXTRACTION: What is the plan trying to accomplish? Be specific - not 'fix bug' but 'fix token validation bug in AuthService'",
+        "COMPONENT IDENTIFICATION: Break plan into logical pieces - phases, modules, steps, layers",
+        "STRATEGY ANALYSIS: What's the approach? Incremental? Big bang? Refactor then fix? Fix then refactor?",
+        "DEPENDENCY MAPPING: What must happen before what? Are dependencies explicit or implicit?",
+        "QUALITY INDICATORS - Clear plan: Explicit objectives, logical structure, clear dependencies, well-organized",
+        "QUALITY INDICATORS - Unclear plan: Vague objectives, jumbled structure, hidden dependencies, hard to follow",
+        "CONSTRAINT: Just understand - don't evaluate yet",
+        "OUTPUT LIMIT: Summary of plan understanding (aim for 300-400 words)"
+      ],
+      "requireConfirmation": false
+    },
+    {
+      "id": "step-1-completeness-check",
+      "title": "Step 1: Completeness Check",
+      "prompt": "**COMPLETENESS CHECK** (10-15 min) - Verify all requirements addressed\n\n**YOUR MISSION:** Ensure every requirement is addressed by the plan.\n\n**PLAN YOUR APPROACH:**\nBased on your understanding, decide:\n- How will I map requirements to plan elements?\n- What might be missing?\n- What's implied vs explicit?\n\n**EXECUTE:**\n1. List all stated requirements\n2. For each requirement, find where it's addressed in the plan\n3. Identify requirements that are missing or partially addressed\n4. Check for implicit requirements (not stated but necessary)\n5. Verify success criteria are defined\n\n**REFLECT:**\nAs you check, ask yourself:\n- Is every requirement explicitly addressed?\n- Are any requirements only partially covered?\n- What implicit requirements exist? (testing, docs, migration, rollback)\n- How will success be measured?\n- What's missing that should be there?\n\n**WORKING NOTES:**\nCapture your analysis:\n- Requirement coverage matrix (requirement → plan element)\n- Missing requirements (stated but not addressed)\n- Partial coverage (addressed but incompletely)\n- Implicit requirements (testing, docs, migration, monitoring, rollback)\n- Success criteria (how will we know it worked?)\n- Completeness score (X/Y requirements fully addressed)",
+      "agentRole": "You are a thorough requirements analyst who ensures nothing is overlooked.",
+      "guidance": [
+        "MAPPING STRATEGY: Create explicit mapping - Requirement 1 → Plan Section 2.3, Requirement 2 → Plan Section 1.1 + 3.2",
+        "MISSING DETECTION: Look for requirements with no corresponding plan element. These are gaps.",
+        "PARTIAL COVERAGE: Requirement is mentioned but not fully addressed. Example: 'Add tests' mentioned but no test strategy",
+        "IMPLICIT REQUIREMENTS: Always check for - Testing strategy, Documentation updates, Migration/upgrade path, Rollback plan, Monitoring/observability, Performance impact, Security considerations",
+        "SUCCESS CRITERIA: How will we know the plan succeeded? Metrics? Tests? User validation?",
+        "QUALITY INDICATORS - Complete plan: All requirements mapped, implicit requirements addressed, success criteria defined",
+        "QUALITY INDICATORS - Incomplete plan: Missing requirements, no implicit requirements, no success criteria",
+        "CONSTRAINT: Focus on what's missing, not how it's done (that's next step)",
+        "OUTPUT LIMIT: Completeness analysis with requirement mapping and gaps (aim for 400-500 words)"
+      ],
+      "requireConfirmation": false
+    },
+    {
+      "id": "step-2-pattern-compliance",
+      "title": "Step 2: Pattern Compliance Check",
+      "prompt": "**PATTERN COMPLIANCE** (15-20 min) - Verify adherence to patterns and rules\n\n**YOUR MISSION:** Check if the plan follows established patterns, rules, and standards.\n\n**PLAN YOUR APPROACH:**\nBased on constraints provided, decide:\n- What patterns should I verify?\n- Where in the codebase are these patterns?\n- How will I check compliance?\n\n**EXECUTE:**\n1. Review all stated constraints (patterns, rules, standards)\n2. For each constraint, check if plan follows it\n3. Use codebase_search/grep to find existing patterns\n4. Compare plan approach to established patterns\n5. Identify deviations (intentional or accidental)\n6. Check against user rules (if provided)\n\n**REFLECT:**\nAs you check, ask yourself:\n- Does the plan follow established patterns?\n- Are deviations justified or accidental?\n- Does it match how similar things are done?\n- Does it follow user rules and standards?\n- Are there better patterns it should use?\n\n**WORKING NOTES:**\nCapture your analysis:\n- Pattern compliance matrix (constraint → compliance status)\n- Deviations identified (where plan differs from patterns)\n- Justification check (are deviations explained?)\n- Similar implementations (how others did this)\n- User rule compliance (follows stated rules?)\n- Recommended patterns (better approaches available?)",
+      "agentRole": "You are a pattern expert who ensures consistency with established practices.",
+      "guidance": [
+        "PATTERN DISCOVERY: Use codebase_search to find existing patterns - 'How is authentication implemented?' or grep for similar code",
+        "COMPLIANCE CHECKING: For each constraint, verify plan follows it. Example: Constraint 'use dependency injection' → Check if plan shows DI",
+        "DEVIATION ANALYSIS: Where does plan differ from patterns? Is it justified (with explanation) or accidental (oversight)?",
+        "SIMILARITY SEARCH: Find similar implementations - grep 'class.*Service', codebase_search 'How are services structured?'",
+        "USER RULE VERIFICATION: Check against provided user rules - naming conventions, architectural patterns, testing requirements",
+        "BETTER ALTERNATIVES: Are there superior patterns the plan should use? More modern? More maintainable?",
+        "QUALITY INDICATORS - Compliant plan: Follows patterns, deviations justified, matches similar code, follows user rules",
+        "QUALITY INDICATORS - Non-compliant plan: Ignores patterns, unexplained deviations, inconsistent with codebase, violates user rules",
+        "CONSTRAINT: Focus on pattern adherence, not implementation details",
+        "OUTPUT LIMIT: Pattern compliance analysis with deviations and recommendations (aim for 500-600 words)"
+      ],
+      "requireConfirmation": false
+    },
+    {
+      "id": "step-3-risk-assessment",
+      "title": "Step 3: Risk Assessment",
+      "prompt": "**RISK ASSESSMENT** (10-15 min) - Identify potential issues\n\n**YOUR MISSION:** Identify risks, potential problems, and failure modes in the plan.\n\n**PLAN YOUR APPROACH:**\nBased on your analysis so far, decide:\n- What could go wrong?\n- What's risky about this approach?\n- What failure modes exist?\n\n**EXECUTE:**\n1. Identify technical risks (complexity, dependencies, unknowns)\n2. Identify execution risks (ordering, timing, coordination)\n3. Identify business risks (downtime, data loss, user impact)\n4. Check for single points of failure\n5. Assess rollback/recovery capability\n6. Consider edge cases and error scenarios\n\n**REFLECT:**\nAs you assess, ask yourself:\n- What's the riskiest part of this plan?\n- What could cause this to fail?\n- What happens if something goes wrong?\n- Are there single points of failure?\n- Can we rollback if needed?\n- What edge cases could break this?\n\n**WORKING NOTES:**\nCapture your assessment:\n- Technical risks (complexity, unknowns, dependencies)\n- Execution risks (ordering, timing, coordination)\n- Business risks (downtime, data loss, user impact)\n- Single points of failure (no redundancy or fallback)\n- Rollback capability (can we undo this?)\n- Edge cases (unusual scenarios that could break it)\n- Risk mitigation suggestions (how to reduce risks)",
+      "agentRole": "You are a risk analyst who identifies potential problems before they occur.",
+      "guidance": [
+        "SEVERITY LEVELS: HIGH (blocks execution, causes outages, data loss), MEDIUM (significant impact, workarounds exist), LOW (minor impact, easily mitigated)",
+        "TECHNICAL RISK CATEGORIES: High complexity (too many moving parts), Unknown dependencies (what else does this affect?), Untested approach (never done this before), Performance impact (could slow things down), Security implications (could introduce vulnerabilities)",
+        "EXECUTION RISK CATEGORIES: Ordering dependencies (must do A before B), Timing issues (race conditions, deadlocks), Coordination needs (multiple teams/systems), Data migration risks (could corrupt data), Deployment complexity (hard to deploy safely)",
+        "BUSINESS RISK CATEGORIES: Downtime required (service interruption), Data loss potential (could lose data), User impact (breaks workflows), Backward compatibility (breaks existing code), Production incident risk (could cause outage)",
+        "SINGLE POINT OF FAILURE: Where is there no redundancy? No fallback? No alternative path?",
+        "ROLLBACK ANALYSIS: Can we undo this? How? What if rollback fails? Is there a recovery plan?",
+        "EDGE CASE THINKING: What unusual scenarios could break this? Concurrent access? Partial failures? Network issues? Resource exhaustion?",
+        "QUALITY INDICATORS - Low risk plan: Risks identified and mitigated, rollback possible, no single points of failure, edge cases considered",
+        "QUALITY INDICATORS - High risk plan: Risks not addressed, no rollback plan, single points of failure, edge cases ignored",
+        "CONSTRAINT: Identify risks, don't solve them (that's for recommendations)",
+        "OUTPUT LIMIT: Risk assessment with categories and severity (aim for 400-500 words)"
+      ],
+      "requireConfirmation": false
+    },
+    {
+      "id": "step-4-synthesize-analysis",
+      "title": "Step 4: Synthesize & Deliver Analysis",
+      "prompt": "**SYNTHESIZE YOUR ANALYSIS**\n\nYou've completed your plan analysis. Now synthesize and structure your findings.\n\n**REFLECT ON YOUR ANALYSIS:**\n- What's the overall quality of this plan?\n- What are the biggest gaps or issues?\n- What should be addressed before execution?\n- Is this plan ready to execute?\n- What's your confidence level in this plan?\n\n**CREATE STRUCTURED DELIVERABLE:**\n\nProduce `{deliverableName}` with your structured analysis.\n\n**Key sections to include:**\n- **Executive Summary**: Overall assessment, critical issues, major strengths, next steps\n- **Completeness Analysis**: Requirement coverage, gaps, implicit requirements, success criteria\n- **Pattern Compliance**: Adherence to constraints, deviations, similar implementations, user rule compliance\n- **Risk Assessment**: Technical/execution/business risks with severity, single points of failure, rollback capability\n- **Recommendations**: Prioritized by importance (Critical / Important / Nice to have)\n- **Verdict**: Ready to execute? Confidence level (1-10)? Recommended action?\n\nOrganize these in the way that best communicates your findings. Use the structure that makes your analysis clearest.\n\n**SELF-VALIDATE:**\n- All requirements checked?\n- All constraints verified?\n- All risks identified?\n- Recommendations actionable?\n- Verdict justified?\n\nIf YES to all, deliver. If NO, revise first.",
+      "agentRole": "You are a senior plan analyst who synthesizes findings into clear, actionable recommendations.",
+      "guidance": [
+        "SYNTHESIS APPROACH: Don't just list findings - assess overall quality. Is this a good plan? Ready to execute?",
+        "STRUCTURE REFERENCE: Suggested format - Executive Summary (3-5 bullets), Completeness (requirement matrix, gaps, score), Compliance (constraint checks, deviations, similar code), Risks (categorized by severity), Recommendations (prioritized), Verdict (ready? confidence? action?). Adapt as needed.",
+        "PRIORITIZATION: Order issues by impact - Critical (blocks execution), Important (should fix), Nice to have (consider)",
+        "VERDICT FRAMEWORK: Ready (minor issues only), Needs Revision (significant gaps but salvageable), Not Ready (major redesign needed)",
+        "CONFIDENCE SCORING: 1-3 (many issues), 4-6 (some concerns), 7-8 (mostly good), 9-10 (excellent plan). This adds nuance beyond the categorical verdict.",
+        "ACTIONABLE RECOMMENDATIONS: Be specific - Not 'add tests' but 'Add unit tests for AuthService.validateToken covering edge cases: null token, expired token, malformed token'",
+        "CONSTRUCTIVE TONE: Identify issues to improve the plan, not to criticize. Suggest solutions, not just problems",
+        "EVIDENCE-BASED: Support assessments with specific examples from your analysis",
+        "QUALITY INDICATORS - Strong analysis: Clear verdict, specific recommendations, evidence-based, actionable, constructive",
+        "QUALITY INDICATORS - Weak analysis: Vague verdict, generic recommendations, no evidence, not actionable, just criticism",
+        "SELF-VALIDATION CHECKLIST: All requirements checked? Constraints verified? Risks identified? Recommendations actionable? Verdict justified?",
+        "DELIVERABLE CREATION: Use write tool to create {deliverableName} with your structured analysis. Don't just output in chat - create the actual file"
+      ],
+      "requireConfirmation": false
+    }
+  ]
+}