npm - @exaudeus/workrail - Versions diffs - 0.1.4 → 0.1.6 - Mend

@exaudeus/workrail 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/application/services/enhanced-loop-validator.d.ts +21 -0
package/dist/application/services/enhanced-loop-validator.js +137 -0
package/dist/application/services/validation-engine.d.ts +3 -0
package/dist/application/services/validation-engine.js +20 -2
package/dist/cli.js +33 -6
package/package.json +1 -1
package/workflows/coding-task-workflow-with-loops.json +59 -10
package/workflows/exploration-workflow.json +198 -17
package/workflows/systemic-bug-investigation-with-loops.json +61 -15

package/dist/application/services/enhanced-loop-validator.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import { LoopStep } from '../../types/workflow-types';
+export interface EnhancedValidationResult {
+    warnings: string[];
+    suggestions: string[];
+    info: string[];
+}
+export declare class EnhancedLoopValidator {
+    private readonly PROMPT_WARNING_THRESHOLD;
+    private readonly PROMPT_ERROR_THRESHOLD;
+    private readonly TEMPLATE_VAR_PATTERN;
+    private readonly TERNARY_PATTERN;
+    private readonly NESTED_TERNARY_PATTERN;
+    validateLoopStep(step: LoopStep): EnhancedValidationResult;
+    private getLoopBodySteps;
+    private validateConditionalLogic;
+    private validatePromptLength;
+    private validateTemplateVariables;
+    private getKnownLoopVariables;
+    private detectLoopPatterns;
+    private validateLoopStructure;
+}

package/dist/application/services/enhanced-loop-validator.js ADDED Viewed

@@ -0,0 +1,137 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.EnhancedLoopValidator = void 0;
+class EnhancedLoopValidator {
+    constructor() {
+        this.PROMPT_WARNING_THRESHOLD = 1500;
+        this.PROMPT_ERROR_THRESHOLD = 2000;
+        this.TEMPLATE_VAR_PATTERN = /\{\{([^}]+)\}\}/g;
+        this.TERNARY_PATTERN = /\{\{[^}]*\?[^}]*:[^}]*\}\}/;
+        this.NESTED_TERNARY_PATTERN = /\{\{[^}]*\?[^}]*\?[^}]*:[^}]*:[^}]*\}\}/;
+    }
+    validateLoopStep(step) {
+        const warnings = [];
+        const suggestions = [];
+        const info = [];
+        const stepsToValidate = this.getLoopBodySteps(step);
+        for (const bodyStep of stepsToValidate) {
+            this.validateConditionalLogic(bodyStep, warnings, suggestions);
+            this.validatePromptLength(bodyStep, warnings, suggestions);
+            this.validateTemplateVariables(bodyStep, step, warnings, suggestions);
+        }
+        this.detectLoopPatterns(step, info, suggestions);
+        this.validateLoopStructure(step, warnings, suggestions);
+        return { warnings, suggestions, info };
+    }
+    getLoopBodySteps(step) {
+        if (Array.isArray(step.body)) {
+            return step.body;
+        }
+        return [];
+    }
+    validateConditionalLogic(step, warnings, suggestions) {
+        const fieldsToCheck = ['prompt', 'title', 'agentRole'];
+        for (const field of fieldsToCheck) {
+            const value = step[field];
+            if (!value || typeof value !== 'string')
+                continue;
+            if (this.NESTED_TERNARY_PATTERN.test(value)) {
+                warnings.push(`Step '${step.id}' contains nested ternary operators in ${field}. This can be hard to read and maintain.`);
+                suggestions.push(`Consider refactoring nested conditionals into separate steps with runCondition.`);
+            }
+            else if (this.TERNARY_PATTERN.test(value)) {
+                const ternaryCount = (value.match(/\?/g) || []).length;
+                if (ternaryCount >= 2) {
+                    warnings.push(`Step '${step.id}' contains complex conditional logic (${ternaryCount} conditions) in ${field}.`);
+                    suggestions.push(`For loops with ${ternaryCount} or more conditional paths, consider using separate steps with runCondition instead of inline conditionals.`);
+                }
+            }
+        }
+    }
+    validatePromptLength(step, warnings, suggestions) {
+        if (!step.prompt)
+            return;
+        const promptLength = step.prompt.length;
+        if (promptLength > this.PROMPT_ERROR_THRESHOLD) {
+            warnings.push(`Step '${step.id}' has a very long prompt (${promptLength} characters). This may cause issues.`);
+            suggestions.push(`Consider splitting this into multiple steps or moving content to the guidance section.`);
+        }
+        else if (promptLength > this.PROMPT_WARNING_THRESHOLD) {
+            warnings.push(`Step '${step.id}' has a long prompt (${promptLength} characters).`);
+            suggestions.push(`For better maintainability, consider breaking this into smaller, focused steps.`);
+        }
+        const conditionalMatches = step.prompt.match(/\{\{[^}]*\?[^}]*\}\}/g);
+        if (conditionalMatches) {
+            let totalConditionalContent = 0;
+            for (const match of conditionalMatches) {
+                const literals = match.match(/'[^']*'|"[^"]*"/g) || [];
+                for (const literal of literals) {
+                    totalConditionalContent += literal.length - 2;
+                }
+            }
+            if (totalConditionalContent > this.PROMPT_ERROR_THRESHOLD) {
+                warnings.push(`Step '${step.id}' has conditional content totaling ~${totalConditionalContent} characters when expanded.`);
+                suggestions.push(`This exceeds safe limits. Use separate steps with runCondition instead of inline conditionals.`);
+            }
+        }
+    }
+    validateTemplateVariables(step, loopStep, warnings, suggestions) {
+        const knownVars = this.getKnownLoopVariables(loopStep);
+        const fieldsToCheck = ['prompt', 'title', 'agentRole'];
+        for (const field of fieldsToCheck) {
+            const value = step[field];
+            if (!value || typeof value !== 'string')
+                continue;
+            const matches = value.matchAll(this.TEMPLATE_VAR_PATTERN);
+            for (const match of matches) {
+                const expression = match[1].trim();
+                const varName = expression.split(/[^a-zA-Z0-9_$]/, 1)[0];
+                if (varName && !knownVars.has(varName)) {
+                    warnings.push(`Step '${step.id}' references potentially undefined variable '${varName}' in ${field}.`);
+                    suggestions.push(`Ensure '${varName}' is defined in the context or use a known loop variable like: ${Array.from(knownVars).join(', ')}`);
+                }
+            }
+        }
+    }
+    getKnownLoopVariables(loopStep) {
+        const vars = new Set();
+        vars.add(loopStep.loop.iterationVar || 'iteration');
+        if (loopStep.loop.type === 'forEach') {
+            vars.add(loopStep.loop.itemVar || 'item');
+            vars.add(loopStep.loop.indexVar || 'index');
+        }
+        vars.add('context');
+        vars.add('workflowId');
+        return vars;
+    }
+    detectLoopPatterns(step, info, suggestions) {
+        const bodySteps = this.getLoopBodySteps(step);
+        if (step.loop.type === 'for' && bodySteps.length > 0) {
+            const firstStep = bodySteps[0];
+            if (firstStep.prompt?.includes('analysis') ||
+                firstStep.title?.toLowerCase().includes('analysis') ||
+                firstStep.prompt?.includes('Step 1') ||
+                firstStep.prompt?.includes('Structure')) {
+                info.push('Progressive analysis pattern detected.');
+                suggestions.push('Consider using the multi-step pattern with separate steps and runCondition for clearer structure.');
+            }
+        }
+        if (bodySteps.some(s => s.prompt?.includes('===') && s.prompt?.includes('?'))) {
+            info.push('Multi-conditional loop pattern detected.');
+            suggestions.push('For loops with multiple conditional paths, the separate steps pattern is more maintainable than inline conditionals.');
+        }
+    }
+    validateLoopStructure(step, warnings, suggestions) {
+        if (typeof step.body === 'string' && step.loop.type === 'for' &&
+            typeof step.loop.count === 'number' && step.loop.count > 3) {
+            suggestions.push(`For loops with ${step.loop.count} iterations, consider if each iteration truly needs different logic. ` +
+                `If so, use separate steps with runCondition for better clarity.`);
+        }
+        if (step.loop.maxIterations > 100) {
+            warnings.push(`Loop '${step.id}' has a very high maxIterations limit (${step.loop.maxIterations}). ` +
+                `This could cause performance issues.`);
+            suggestions.push(`Consider if you really need ${step.loop.maxIterations} iterations, or implement pagination/chunking instead.`);
+        }
+    }
+}
+exports.EnhancedLoopValidator = EnhancedLoopValidator;

package/dist/application/services/validation-engine.d.ts CHANGED Viewed

@@ -21,10 +21,13 @@ export interface ValidationResult {
     valid: boolean;
     issues: string[];
     suggestions: string[];
+    warnings?: string[];
+    info?: string[];
 }
 export declare class ValidationEngine {
     private ajv;
     private schemaCache;
+    private enhancedLoopValidator;
     constructor();
     private compileSchema;
     private evaluateCriteria;

package/dist/application/services/validation-engine.js CHANGED Viewed

@@ -8,10 +8,12 @@ const error_handler_1 = require("../../core/error-handler");
 const condition_evaluator_1 = require("../../utils/condition-evaluator");
 const ajv_1 = __importDefault(require("ajv"));
 const workflow_types_1 = require("../../types/workflow-types");
+const enhanced_loop_validator_1 = require("./enhanced-loop-validator");
 class ValidationEngine {
     constructor() {
         this.schemaCache = new Map();
         this.ajv = new ajv_1.default({ allErrors: true });
+        this.enhancedLoopValidator = new enhanced_loop_validator_1.EnhancedLoopValidator();
     }
     compileSchema(schema) {
         const schemaKey = JSON.stringify(schema);
@@ -205,6 +207,7 @@ class ValidationEngine {
         throw new error_handler_1.ValidationError('Invalid validationCriteria format.');
     }
     validateLoopStep(step, workflow) {
+        const enhancedResult = this.enhancedLoopValidator.validateLoopStep(step);
         const issues = [];
         const suggestions = [];
         const validTypes = ['while', 'until', 'for', 'forEach'];
@@ -309,15 +312,22 @@ class ValidationEngine {
             issues.push(`Invalid index variable name '${step.loop.indexVar}'`);
             suggestions.push('Use a valid JavaScript variable name');
         }
+        const allWarnings = [...(enhancedResult.warnings || [])];
+        const allSuggestions = [...suggestions, ...(enhancedResult.suggestions || [])];
+        const allInfo = [...(enhancedResult.info || [])];
         return {
             valid: issues.length === 0,
             issues,
-            suggestions
+            suggestions: allSuggestions,
+            warnings: allWarnings.length > 0 ? allWarnings : undefined,
+            info: allInfo.length > 0 ? allInfo : undefined
         };
     }
     validateWorkflow(workflow) {
         const issues = [];
         const suggestions = [];
+        const warnings = [];
+        const info = [];
         const stepIds = new Set();
         for (const step of workflow.steps) {
             if (stepIds.has(step.id)) {
@@ -331,6 +341,12 @@ class ValidationEngine {
                 const loopResult = this.validateLoopStep(step, workflow);
                 issues.push(...loopResult.issues.map(issue => `Step '${step.id}': ${issue}`));
                 suggestions.push(...loopResult.suggestions);
+                if (loopResult.warnings) {
+                    warnings.push(...loopResult.warnings.map(warning => `Step '${step.id}': ${warning}`));
+                }
+                if (loopResult.info) {
+                    info.push(...loopResult.info.map(i => `Step '${step.id}': ${i}`));
+                }
             }
             else {
                 if (!step.id) {
@@ -368,7 +384,9 @@ class ValidationEngine {
         return {
             valid: issues.length === 0,
             issues,
-            suggestions
+            suggestions,
+            warnings: warnings.length > 0 ? warnings : undefined,
+            info: info.length > 0 ? info : undefined
         };
     }
     isValidVariableName(name) {

package/dist/cli.js CHANGED Viewed

@@ -11,7 +11,7 @@ const chalk_1 = __importDefault(require("chalk"));
 const os_1 = __importDefault(require("os"));
 const server_1 = require("./infrastructure/rpc/server");
 const workflow_service_1 = require("./application/services/workflow-service");
-const validation_1 = require("./application/validation");
+const validation_engine_1 = require("./application/services/validation-engine");
 const multi_directory_workflow_storage_1 = require("./infrastructure/storage/multi-directory-workflow-storage");
 const migrate_workflow_1 = require("./cli/migrate-workflow");
 const program = new commander_1.Command();
@@ -177,18 +177,45 @@ async function validateWorkflowFile(filePath) {
             console.error(chalk_1.default.yellow('\nPlease check the JSON syntax and try again.'));
             process.exit(1);
         }
-        const result = (0, validation_1.validateWorkflow)(workflow);
-        if (result.valid) {
+        const validationEngine = new validation_engine_1.ValidationEngine();
+        const result = validationEngine.validateWorkflow(workflow);
+        if (result.valid && !result.warnings?.length && !result.info?.length) {
             console.log(chalk_1.default.green('✅ Workflow is valid:'), filePath);
-            process.exit(0);
+        }
+        else if (result.valid) {
+            console.log(chalk_1.default.green('✅ Workflow is valid with warnings:'), filePath);
+            if (result.warnings && result.warnings.length > 0) {
+                console.log(chalk_1.default.yellow('\n⚠️  Warnings:'));
+                result.warnings.forEach(warning => {
+                    console.log(chalk_1.default.yellow('  •'), warning);
+                });
+            }
+            if (result.info && result.info.length > 0) {
+                console.log(chalk_1.default.blue('\nℹ️  Information:'));
+                result.info.forEach(info => {
+                    console.log(chalk_1.default.blue('  •'), info);
+                });
+            }
+            if (result.suggestions && result.suggestions.length > 0) {
+                console.log(chalk_1.default.gray('\n💡 Suggestions:'));
+                result.suggestions.forEach(suggestion => {
+                    console.log(chalk_1.default.gray('  •'), suggestion);
+                });
+            }
         }
         else {
             console.error(chalk_1.default.red('❌ Workflow validation failed:'), filePath);
             console.error(chalk_1.default.yellow('\nValidation errors:'));
-            result.errors.forEach(error => {
+            result.issues.forEach(error => {
                 console.error(chalk_1.default.red('  •'), error);
             });
-            console.error(chalk_1.default.yellow(`\nFound ${result.errors.length} validation error${result.errors.length === 1 ? '' : 's'}.`));
+            console.error(chalk_1.default.yellow(`\nFound ${result.issues.length} validation error${result.issues.length === 1 ? '' : 's'}.`));
+            if (result.suggestions && result.suggestions.length > 0) {
+                console.log(chalk_1.default.gray('\n💡 Suggestions:'));
+                result.suggestions.forEach(suggestion => {
+                    console.log(chalk_1.default.gray('  •'), suggestion);
+                });
+            }
             process.exit(1);
         }
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@exaudeus/workrail",
-  "version": "0.1.4",
+  "version": "0.1.6",
   "description": "MCP server for structured workflow orchestration and step-by-step task guidance",
   "license": "MIT",
   "bin": {

package/workflows/coding-task-workflow-with-loops.json CHANGED Viewed

@@ -152,23 +152,72 @@
             },
             "body": [
                 {
-                    "id": "phase-1-sub-analysis",
-                    "title": "Analysis Step {{analysisStep}}/4",
-                    "prompt": "{{analysisStep === 1 ? '**STEP 1: STRUCTURAL MAPPING**\\n\\nBuild on phase-0c overview, dive deeper into structure:\\n\\n1. Module organization (packages/services)\\n2. Core components (controllers/services/models)\\n3. Architectural patterns from overview\\n4. File naming conventions\\n5. Code organization\\n\\n**Actions:** useTools() with list_dir, grep_search (class/interface/export), read 2-3 files\\n\\n**Output (400 words):**\\n- Structure summary\\n- User rules alignment\\n- Areas for next steps\\n\\nupdateDecisionLog() with 3-5 key files' : analysisStep === 2 ? '**STEP 2: TASK-RELEVANT MODULES**\\n\\nFocus on task-specific modules:\\n\\n1. Target areas from mapping\\n2. Core business logic\\n3. Data models (interfaces/types/schemas)\\n4. API contracts\\n5. Pattern implementation\\n\\n**Actions:** useTools() and matchPatterns() with codebase_search, read complete files (with imports), trace flows\\n\\n**Output (400 words):**\\n- Module responsibilities\\n- Patterns to match\\n- Integration points\\n\\nupdateDecisionLog() with core logic files' : analysisStep === 3 ? '**STEP 3: DEPENDENCIES & FLOWS**\\n\\nTrace dependencies and execution:\\n\\n1. Import mapping\\n2. Data flow tracing\\n3. Integration points\\n4. Side effects\\n5. Testing patterns\\n\\n**Actions:** useTools() to follow imports, find test files, trace error handling\\n\\n**Output (400 words):**\\n- Dependency map\\n- Integration challenges\\n- Testing strategies\\n- Risk indicators\\n\\nupdateDecisionLog() with dependencies and test approaches' : '**STEP 4: PATTERN DISCOVERY**\\n\\nIdentify established patterns relevant to the task type. Analyze if this is a pattern-heavy task (tests, telemetry, logging, APIs, UI components).\\n\\n**Actions:** useTools() extensively - codebase_search for concepts, grep_search for pattern markers, read_file for full examples\\n\\n**Output (400 words):**\\n- Pattern types discovered\\n- Reference implementations (with file paths)\\n- Key conventions to follow\\n- Pattern alignment recommendations\\n\\nupdateDecisionLog() with pattern templates and reference files'}}",
-                    "agentRole": "You are conducting focused analysis step {{analysisStep}} of 4. Your expertise lies in {{analysisStep === 1 ? 'understanding code structure and organization' : analysisStep === 2 ? 'identifying and analyzing task-specific components' : analysisStep === 3 ? 'tracing dependencies and system flows' : 'discovering and documenting established implementation patterns for the specific task type'}}. Use tools extensively and never make assumptions.",
+                    "id": "phase-1-step-structure",
+                    "title": "Analysis Step 1/4: Structure",
+                    "prompt": "**STEP 1: STRUCTURAL MAPPING**\\n\\nBuild on phase-0c overview, dive deeper into structure:\\n\\n1. Module organization (packages/services)\\n2. Core components (controllers/services/models)\\n3. Architectural patterns from overview\\n4. File naming conventions\\n5. Code organization\\n\\n**Actions:** useTools() with list_dir, grep_search (class/interface/export), read 2-3 files\\n\\n**Output (400 words):**\\n- Structure summary\\n- User rules alignment\\n- Areas for next steps\\n\\nupdateDecisionLog() with 3-5 key files",
+                    "agentRole": "You are conducting focused analysis step 1 of 4. Your expertise lies in understanding code structure and organization. Use tools extensively and never make assumptions.",
                     "guidance": [
-                        "This is step {{analysisStep}} of a 4-step analysis process",
+                        "This is step 1 of a 4-step analysis process",
+                        "Each step builds on the previous findings",
+                        "Use tools liberally - verify everything",
+                        "Update the Decision Log with key discoveries",
+                        "Respect word limits to prevent context bloat",
+                        "Note alignment/conflicts with user rules"
+                    ],
+                    "runCondition": {"var": "analysisStep", "equals": 1},
+                    "requireConfirmation": false
+                },
+                {
+                    "id": "phase-1-step-modules",
+                    "title": "Analysis Step 2/4: Modules",
+                    "prompt": "**STEP 2: TASK-RELEVANT MODULES**\\n\\nFocus on task-specific modules:\\n\\n1. Target areas from mapping\\n2. Core business logic\\n3. Data models (interfaces/types/schemas)\\n4. API contracts\\n5. Pattern implementation\\n\\n**Actions:** useTools() and matchPatterns() with codebase_search, read complete files (with imports), trace flows\\n\\n**Output (400 words):**\\n- Module responsibilities\\n- Patterns to match\\n- Integration points\\n\\nupdateDecisionLog() with core logic files",
+                    "agentRole": "You are conducting focused analysis step 2 of 4. Your expertise lies in identifying and analyzing task-specific components. Use tools extensively and never make assumptions.",
+                    "guidance": [
+                        "This is step 2 of a 4-step analysis process",
+                        "Each step builds on the previous findings",
+                        "Use tools liberally - verify everything",
+                        "Update the Decision Log with key discoveries",
+                        "Respect word limits to prevent context bloat",
+                        "Note alignment/conflicts with user rules"
+                    ],
+                    "runCondition": {"var": "analysisStep", "equals": 2},
+                    "requireConfirmation": false
+                },
+                {
+                    "id": "phase-1-step-dependencies",
+                    "title": "Analysis Step 3/4: Dependencies",
+                    "prompt": "**STEP 3: DEPENDENCIES & FLOWS**\\n\\nTrace dependencies and execution:\\n\\n1. Import mapping\\n2. Data flow tracing\\n3. Integration points\\n4. Side effects\\n5. Testing patterns\\n\\n**Actions:** useTools() to follow imports, find test files, trace error handling\\n\\n**Output (400 words):**\\n- Dependency map\\n- Integration challenges\\n- Testing strategies\\n- Risk indicators\\n\\nupdateDecisionLog() with dependencies and test approaches",
+                    "agentRole": "You are conducting focused analysis step 3 of 4. Your expertise lies in tracing dependencies and system flows. Use tools extensively and never make assumptions.",
+                    "guidance": [
+                        "This is step 3 of a 4-step analysis process",
+                        "Each step builds on the previous findings",
+                        "Use tools liberally - verify everything",
+                        "Update the Decision Log with key discoveries",
+                        "Respect word limits to prevent context bloat",
+                        "Note alignment/conflicts with user rules"
+                    ],
+                    "runCondition": {"var": "analysisStep", "equals": 3},
+                    "requireConfirmation": false
+                },
+                {
+                    "id": "phase-1-step-patterns",
+                    "title": "Analysis Step 4/4: Patterns",
+                    "prompt": "**STEP 4: PATTERN DISCOVERY**\\n\\nIdentify established patterns relevant to the task type. Analyze if this is a pattern-heavy task (tests, telemetry, logging, APIs, UI components).\\n\\n**Actions:** useTools() extensively - codebase_search for concepts, grep_search for pattern markers, read_file for full examples\\n\\n**Output (400 words):**\\n- Pattern types discovered\\n- Reference implementations (with file paths)\\n- Key conventions to follow\\n- Pattern alignment recommendations\\n\\nupdateDecisionLog() with pattern templates and reference files",
+                    "agentRole": "You are conducting focused analysis step 4 of 4. Your expertise lies in discovering and documenting established implementation patterns for the specific task type. Use tools extensively and never make assumptions.",
+                    "guidance": [
+                        "This is step 4 of a 4-step analysis process",
                         "Each step builds on the previous findings",
                         "Use tools liberally - verify everything",
                         "Update the Decision Log with key discoveries",
                         "Respect word limits to prevent context bloat",
                         "Note alignment/conflicts with user rules",
-                        "{{analysisStep === 4 ? 'TASK TYPE ANALYSIS: Determine if this is adding tests (find test utilities, mocking patterns), telemetry/analytics (find tracking implementations, event schemas), logging/monitoring (find log formats, monitoring integrations), API endpoints (find route patterns, validation, error handling), UI components (find component patterns, styling conventions), or other pattern-heavy work' : ''}}",
-                        "{{analysisStep === 4 ? 'PATTERN SEARCH STRATEGY: Use codebase_search with queries like \"How is [telemetry|testing|logging|API] implemented?\". Use grep_search for pattern markers (e.g., track, analytics, test, describe, log). Find 3-5 reference implementations similar to your task. Look for shared utilities, helpers, or base classes. Check for pattern documentation in README or docs' : ''}}",
-                        "{{analysisStep === 4 ? 'PATTERN EXTRACTION: Document file structure and naming conventions, common imports and dependencies, implementation approach (functional vs class-based), error handling patterns, configuration patterns, and testing approach for this type of feature' : ''}}",
-                        "{{analysisStep === 4 ? 'PATTERN VALIDATION: List discovered patterns with file references. Note any conflicting patterns or multiple approaches. Ask user: \"Found these pattern examples: [list files]. Should I follow these patterns, or are there other preferred examples?\" Set establishedPatterns context variable' : ''}}",
-                        "{{analysisStep === 4 ? 'FOCUS ON REUSABILITY: Patterns help avoid reinventing the wheel. Look especially for utility functions, base classes, shared components, or established conventions that should be followed' : ''}}"
+                        "TASK TYPE ANALYSIS: Determine if this is adding tests (find test utilities, mocking patterns), telemetry/analytics (find tracking implementations, event schemas), logging/monitoring (find log formats, monitoring integrations), API endpoints (find route patterns, validation, error handling), UI components (find component patterns, styling conventions), or other pattern-heavy work",
+                        "PATTERN SEARCH STRATEGY: Use codebase_search with queries like 'How is [telemetry|testing|logging|API] implemented?'. Use grep_search for pattern markers (e.g., track, analytics, test, describe, log). Find 3-5 reference implementations similar to your task. Look for shared utilities, helpers, or base classes. Check for pattern documentation in README or docs",
+                        "PATTERN EXTRACTION: Document file structure and naming conventions, common imports and dependencies, implementation approach (functional vs class-based), error handling patterns, configuration patterns, and testing approach for this type of feature",
+                        "PATTERN VALIDATION: List discovered patterns with file references. Note any conflicting patterns or multiple approaches. Ask user: 'Found these pattern examples: [list files]. Should I follow these patterns, or are there other preferred examples?' Set establishedPatterns context variable",
+                        "FOCUS ON REUSABILITY: Patterns help avoid reinventing the wheel. Look especially for utility functions, base classes, shared components, or established conventions that should be followed"
                     ],
+                    "runCondition": {"var": "analysisStep", "equals": 4},
                     "requireConfirmation": false
                 }
             ],

package/workflows/exploration-workflow.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "id": "exploration-workflow",
     "name": "Comprehensive Adaptive Exploration Workflow",
-    "version": "0.0.1",
-    "description": "A sophisticated workflow for systematically exploring and determining optimal approaches to accomplish tasks or solve problems. Features adaptive complexity paths, intelligent clarification, devil's advocate analysis, automation levels, failure bounds, and comprehensive documentation for production-ready research.",
+    "version": "0.1.0",
+    "description": "An enterprise-grade exploration workflow featuring multi-phase research loops with saturation detection, evidence-based validation, diverse solution generation, and adversarial challenge patterns. Adapts methodology based on domain type (technical/business/creative) while ensuring depth through triangulation, confidence scoring, and systematic quality gates.",
     "clarificationPrompts": [
         "What specific task, problem, or question do you need to explore?",
         "What constraints or requirements should guide the exploration? (time, budget, technical, etc.)",
@@ -13,22 +13,26 @@
     ],
     "preconditions": [
         "User has a clear task, problem, or question to explore",
-        "Agent has access to research tools (web search, codebase search, etc.)",
         "User can provide initial context, constraints, or requirements",
         "Agent can maintain context variables throughout the workflow"
     ],
     "metaGuidance": [
-        "This workflow follows ANALYZE -> CLARIFY -> RESEARCH -> EVALUATE -> RECOMMEND pattern with dynamic re-triage capabilities.",
-        "Automation levels (Low/Medium/High) control confirmation requirements to balance thoroughness with efficiency.",
-        "Dynamic re-triage allows complexity upgrades and safe downgrades based on new insights from research.",
-        "Always base recommendations on evidence from multiple sources with quantified evaluations.",
-        "Context documentation is maintained throughout to enable seamless handoffs between chat sessions.",
-        "Failure bounds prevent analysis paralysis: word limits (2000), step tracking (>15), and escalation protocols.",
-        "Include trade-offs, pros, cons, and alternatives for transparency and informed decision-making.",
-        "Document all sources, methodology, and reasoning for reproducibility and validation.",
-        "Limit exploration depth based on complexity to prevent resource waste while ensuring thoroughness.",
-        "Human approval is required after Devil's Advocate review and before final recommendations.",
-        "Always provide actionable next steps and implementation guidance, not just theoretical analysis."
+        "FUNCTION DEFINITIONS: fun trackEvidence(source, grade) = 'Add to context.evidenceLog[] with {source, grade, timestamp}. Grade: High (peer-reviewed/official), Medium (expert/established), Low (anecdotal/emerging)'",
+        "fun checkSaturation() = 'Calculate novelty score: (new_insights / total_insights). If <0.1 for last 3 iterations, set context.saturationReached=true'",
+        "fun generateSolution(index, approach) = 'Create solution in context.solutions[index] with {approach, evidence, confidence, tradeoffs, risks}'",
+        "fun calculateConfidence() = '(0.4 × evidenceStrength) + (0.3 × triangulation) + (0.2 × sourceDiversity) + (0.1 × recency). Result in context.confidenceScores[]'",
+        "fun triggerDeepDive() = 'If confidence < 0.7 OR evidenceGaps.length > 0 OR contradictions found, set context.needsDeepDive=true'",
+        "CONTEXT ARCHITECTURE: Track explorationDomain (technical/business/creative), solutions[], evidenceLog[], confidenceScores[], researchPhases[], currentPhase, saturationMetrics, contradictions[], evidenceGaps[]",
+        "EVIDENCE STANDARDS: Minimum 3 sources per key claim (from available sources: web, agent knowledge, user environment), at least 1 contrasting perspective required, formal grading using adapted RAND scale (High/Medium/Limited)",
+        "SOLUTION DIVERSITY: Generate minimum 5 solutions: Quick/Simple, Thorough/Proven, Creative/Novel, Optimal/Balanced, Contrarian/Alternative",
+        "VALIDATION GATES: Phase transitions require validation; solutions need confidence ≥0.7; evidence must pass triangulation check",
+        "This workflow follows ANALYZE -> CLARIFY -> RESEARCH (loop) -> GENERATE (divergent) -> EVALUATE (convergent) -> CHALLENGE -> RECOMMEND pattern.",
+        "Automation levels (Low/Medium/High) control confirmation requirements. High: auto-proceed if confidence >0.8",
+        "Dynamic re-triage allows complexity upgrades and safe downgrades based on research insights and saturation metrics.",
+        "TOOL ADAPTATION: Workflow adapts to available tools. Check MCPs and adjust strategy based on what's available.",
+        "Context documentation updated at phase boundaries. Include function definitions for resumption.",
+        "Failure bounds: word limits (2000), max iterations (5 per loop), total steps (>20 triggers review).",
+        "Human approval required after adversarial challenge and before final recommendations."
     ],
     "steps": [
         {
@@ -46,6 +50,32 @@
             ],
             "requireConfirmation": true
         },
+        {
+            "id": "phase-0a-user-context",
+            "title": "Phase 0a: User Context & Preferences Check",
+            "prompt": "**GATHER USER CONTEXT**: Before proceeding, check for relevant user preferences, rules, and past decisions that should influence this exploration.\n\n**CHECK FOR:**\n1. **User Rules/Preferences**: Use memory tools to check for:\n   - Organizational standards or guidelines\n   - Preferred technologies or approaches\n   - Constraints or requirements from past decisions\n   - Specific methodologies or frameworks to follow/avoid\n\n2. **Environmental Context**:\n   - Current tech stack (if technical)\n   - Business constraints (budget, timeline, resources)\n   - Regulatory or compliance requirements\n   - Team capabilities and preferences\n\n3. **Historical Decisions**:\n   - Similar problems solved before\n   - Lessons learned from past explorations\n   - Established patterns to follow\n\n**ACTIONS:**\n1. Query memory/knowledge base for relevant rules\n2. Set context.userRules[] with applicable preferences\n3. Set context.constraints[] with hard requirements\n4. Note any past decisions that create precedent\n\nIf no specific rules found, note that and proceed with general best practices.",
+            "agentRole": "You are gathering user-specific context that will influence all subsequent exploration phases. Your role is to ensure the exploration aligns with the user's established preferences and constraints.",
+            "guidance": [
+                "This context check happens for all complexity levels",
+                "Rules and preferences should influence solution generation",
+                "Document which rules apply and why",
+                "If conflicts exist between rules and task requirements, flag for clarification"
+            ],
+            "requireConfirmation": false
+        },
+        {
+            "id": "phase-0b-domain-classification",
+            "title": "Phase 0b: Domain Classification & Tool Selection",
+            "prompt": "**CLASSIFY EXPLORATION DOMAIN**: Based on the task, classify the exploration into one of these domains:\n\n**Technical Domain:**\n- Code implementation, architecture design, debugging\n- Tool selection, framework comparison, performance optimization\n- Primary tools: codebase_search, grep_search (if available), technical documentation\n- Fallback: Agent's technical knowledge, architectural patterns from training\n\n**Business Domain:**\n- Strategy formulation, market analysis, process improvement\n- Cost-benefit analysis, resource allocation, risk assessment\n- Primary tools: web_search for market data (if available), case studies, industry reports\n- Fallback: Business frameworks and principles from agent knowledge\n\n**Creative Domain:**\n- Content creation, design systems, user experience\n- Innovation, brainstorming, conceptual development\n- Primary tools: web_search for inspiration (if available), trend analysis\n- Fallback: Creative methodologies and patterns from agent training\n\n**IMPLEMENT:**\n1. Analyze task characteristics\n2. Set context.explorationDomain = 'technical' | 'business' | 'creative'\n3. Set context.primaryTools[] based on domain\n4. Set context.evaluationCriteria[] appropriate for domain\n\n**DOMAIN-SPECIFIC SUCCESS METRICS:**\n- Technical: Feasibility, performance, maintainability, scalability\n- Business: ROI, time-to-value, risk mitigation, strategic alignment\n- Creative: Innovation, user satisfaction, aesthetics, differentiation",
+            "agentRole": "You are a domain classification specialist who identifies the nature of exploration tasks and configures appropriate methodologies, tools, and success criteria for each domain type.",
+            "guidance": [
+                "Some tasks may span domains - choose primary domain",
+                "This classification affects tool selection and evaluation criteria",
+                "Document reasoning for domain choice",
+                "Set domain-specific context variables for later steps"
+            ],
+            "requireConfirmation": false
+        },
         {
             "id": "phase-1-simple-lookup",
             "runCondition": {"var": "explorationComplexity", "equals": "Simple"},
@@ -152,6 +182,97 @@
                 ]
             }
         },
+        {
+            "id": "phase-2c-iterative-research-loop",
+            "type": "loop",
+            "title": "Phase 2c: Multi-Phase Deep Research with Saturation Detection",
+            "runCondition": {"var": "explorationComplexity", "not_equals": "Simple"},
+            "loop": {
+                "type": "for",
+                "count": 5,
+                "maxIterations": 5,
+                "iterationVar": "researchPhase"
+            },
+            "body": [
+                {
+                    "id": "research-phase-1-broad",
+                    "title": "Research Phase 1/5: Broad Scan",
+                    "runCondition": { "var": "researchPhase", "equals": 1 },
+                    "prompt": "**OBJECTIVE**: Cast a wide net to map the solution landscape, identify key themes, and find conflicting viewpoints.",
+                    "agentRole": "Systematic Researcher: Broad Scan Specialist",
+                    "guidance": [
+                        "Use multiple search strategies (e.g., 'how to [task]', 'alternatives to [tool]').",
+                        "Identify 3-5 high-level solution categories.",
+                        "Note sources that directly conflict with each other.",
+                        "ACTIONS: Update context.evidenceLog[], context.broadScanThemes[], context.contradictions[]"
+                    ]
+                },
+                {
+                    "id": "research-phase-2-deep-dive",
+                    "title": "Research Phase 2/5: Deep Dive",
+                    "runCondition": { "var": "researchPhase", "equals": 2 },
+                    "prompt": "**OBJECTIVE**: Focus on the most promising themes from the broad scan. Investigate technical details, find implementation examples, and assess feasibility.",
+                    "agentRole": "Systematic Researcher: Deep Dive Analyst",
+                    "guidance": [
+                        "Focus on the themes in context.broadScanThemes[].",
+                        "Find specific, real-world implementation examples or case studies.",
+                        "Assess complexity, dependencies, and requirements for each.",
+                        "ACTIONS: Update context.evidenceLog[], context.deepDiveFindings[]"
+                    ]
+                },
+                {
+                    "id": "research-phase-3-contrarian",
+                    "title": "Research Phase 3/5: Contrarian Research",
+                    "runCondition": { "var": "researchPhase", "equals": 3 },
+                    "prompt": "**OBJECTIVE**: Actively seek out opposing viewpoints, failure cases, and critiques of the promising solutions. The goal is to challenge assumptions.",
+                    "agentRole": "Systematic Researcher: Devil's Advocate",
+                    "guidance": [
+                        "Search for '[solution] problems', '[approach] failures', 'why not use [tool]'.",
+                        "Identify hidden assumptions in the mainstream approaches.",
+                        "Look for entirely different paradigms that were missed.",
+                        "ACTIONS: Update context.evidenceLog[], context.contrarianEvidence[]"
+                    ]
+                },
+                {
+                    "id": "research-phase-4-synthesis",
+                    "title": "Research Phase 4/5: Evidence Synthesis",
+                    "runCondition": { "var": "researchPhase", "equals": 4 },
+                    "prompt": "**OBJECTIVE**: Consolidate all findings. Resolve contradictions, identify patterns, and build a coherent narrative of the solution landscape.",
+                    "agentRole": "Systematic Researcher: Synthesizer",
+                    "guidance": [
+                        "Review evidence from all previous phases.",
+                        "Where sources conflict, try to understand the reason for the disagreement.",
+                        "Build a framework or matrix to compare the approaches.",
+                        "ACTIONS: Update context.synthesisFramework, context.evidenceGaps[]"
+                    ]
+                },
+                {
+                    "id": "research-phase-5-gap-filling",
+                    "title": "Research Phase 5/5: Gap Filling & Closure",
+                    "runCondition": { "var": "researchPhase", "equals": 5 },
+                    "prompt": "**OBJECTIVE**: Address the specific, critical unknowns identified during synthesis. Verify key assumptions and prepare for solution generation.",
+                    "agentRole": "Systematic Researcher: Finisher",
+                    "guidance": [
+                        "Focus only on the critical gaps listed in context.evidenceGaps[].",
+                        "Perform targeted searches to answer these specific questions.",
+                        "This is the final research step. The goal is to be 'done', not perfect.",
+                        "ACTIONS: Update context.evidenceLog[], set context.researchComplete = true"
+                    ]
+                },
+                {
+                    "id": "research-phase-validation",
+                    "title": "Validation: Research Quality Check",
+                    "prompt": "**OBJECTIVE**: After each research phase, perform a quick quality check.",
+                    "agentRole": "Quality Analyst",
+                    "guidance": [
+                        "EVIDENCE CHECK: Have we gathered at least 3 new sources in this phase? (unless it was gap-filling).",
+                        "QUALITY CHECK: Is there at least one 'High' or 'Medium' grade source?",
+                        "SATURATION CHECK: Use checkSaturation() to assess if we are still gathering novel information. If not, we can consider exiting the loop early by setting context.researchComplete = true.",
+                        "ACTIONS: Update context.qualityMetrics[]"
+                    ]
+                }
+            ]
+        },
         {
             "id": "phase-3-context-documentation",
             "runCondition": {"var": "explorationComplexity", "not_equals": "Simple"},
@@ -168,11 +289,71 @@
             ],
             "requireConfirmation": false
         },
+        {
+            "id": "phase-3a-prepare-solutions",
+            "title": "Phase 3a: Prepare Solution Generation",
+            "runCondition": {"var": "explorationComplexity", "not_equals": "Simple"},
+            "prompt": "**PREPARE SOLUTION GENERATION**\n\nBased on your research findings, prepare for systematic solution generation:\n\n**SETUP TASKS:**\n1. Review research synthesis from Phase 2c\n2. Identify top solution categories/approaches\n3. Create solution generation framework\n\n**CREATE SOLUTION APPROACHES ARRAY:**\nSet context.solutionApproaches with these 5 types:\n```json\n[\n  {\"type\": \"Quick/Simple\", \"focus\": \"Minimal time, proven approaches, immediate value\"},\n  {\"type\": \"Thorough/Proven\", \"focus\": \"Best practices, comprehensive, long-term sustainability\"},\n  {\"type\": \"Creative/Novel\", \"focus\": \"Innovation, emerging tech, competitive advantage\"},\n  {\"type\": \"Optimal/Balanced\", \"focus\": \"Best trade-offs, practical yet forward-thinking\"},\n  {\"type\": \"Contrarian/Alternative\", \"focus\": \"Challenge assumptions, overlooked approaches\"}\n]\n```\n\n**Also set:**\n- context.solutionCriteria[] from research findings\n- context.evaluationFramework for comparing solutions\n- context.userConstraints from Phase 0a\n\n**This enables the next loop to generate each solution type systematically.**",
+            "agentRole": "You are preparing the solution generation phase by creating a structured framework based on research findings.",
+            "guidance": [
+                "This step makes the loop cleaner by preparing the array",
+                "Each solution type should address different user needs",
+                "Framework should incorporate research insights"
+            ],
+            "requireConfirmation": false
+        },
+        {
+            "id": "phase-3b-solution-generation-loop",
+            "type": "loop",
+            "title": "Phase 3b: Diverse Solution Portfolio Generation",
+            "runCondition": {"var": "explorationComplexity", "not_equals": "Simple"},
+            "loop": {
+                "type": "forEach",
+                "items": "solutionApproaches",
+                "itemVar": "approach",
+                "indexVar": "solutionIndex",
+                "maxIterations": 5
+            },
+            "body": [
+                {
+                    "id": "generate-solution",
+                    "title": "Generate {{approach.type}} Solution ({{solutionIndex + 1}}/5)",
+                    "prompt": "**GENERATE SOLUTION: {{approach.type}}**\n\n**Focus for this solution type**: {{approach.focus}}\n\n**DIVERGENT THINKING MODE - NO JUDGMENT**\nYou are in pure generation mode. Do NOT evaluate, compare, or judge this solution against others. Focus solely on creating a complete solution that embodies the {{approach.type}} approach.\n\n**SOLUTION REQUIREMENTS:**\n1. Generate a solution that embodies the {{approach.type}} approach\n2. Base it on evidence from all research phases\n3. Make it genuinely different from other solutions (not just variations)\n4. DEFER ALL JUDGMENT - no scoring, ranking, or comparison\n\n**INCORPORATE USER CONTEXT:**\n- Apply all relevant rules from context.userRules[]\n- Respect constraints from context.constraints[]\n- Align with organizational standards and preferences\n- Consider environment-specific factors\n\n**SOLUTION STRUCTURE:**\n1. **Core Approach**: Clear description (what makes this {{approach.type}}?)\n2. **Implementation Path**: 3-5 key steps to execute\n3. **Evidence Base**: Which research findings support this approach?\n4. **Key Features**: What distinguishes this approach?\n5. **Resource Requirements**: What's needed to implement?\n6. **Success Indicators**: Observable outcomes when working\n\n**NO EVALUATION ELEMENTS:**\n- Do NOT include confidence scores\n- Do NOT compare to other solutions\n- Do NOT rank or judge quality\n- Simply generate and document\n\n**ACTIONS:**\n- generateSolution({{solutionIndex}}, '{{approach.type}}')\n- Store complete solution in context.solutions[{{solutionIndex}}]\n- Track which evidence supports this approach",
+                    "agentRole": "You are in DIVERGENT THINKING mode, generating the {{approach.type}} solution. Focus on creation without judgment. Draw from research to build a complete solution.",
+                    "guidance": [
+                        "DIVERGENT PHASE: Generate without evaluating or comparing",
+                        "Each solution should be genuinely different, not just variations",
+                        "Ground each solution in evidence from research phases",
+                        "Align with user rules and preferences from Phase 0a",
+                        "Include enough detail to be actionable",
+                        "Reference specific sources from evidenceLog",
+                        "If a solution conflicts with user rules, note it factually without judgment",
+                        "DEFER ALL EVALUATION until Phase 4"
+                    ],
+                    "hasValidation": true,
+                    "validationCriteria": {
+                        "and": [
+                            {
+                                "type": "contains",
+                                "value": "Evidence:",
+                                "message": "Must include evidence section"
+                            },
+                            {
+                                "type": "contains",
+                                "value": "Key Features:",
+                                "message": "Must describe distinguishing features"
+                            }
+                        ]
+                    },
+                    "requireConfirmation": false
+                }
+            ]
+        },
         {
             "id": "phase-4-option-evaluation",
             "runCondition": {"var": "explorationComplexity", "not_equals": "Simple"},
-            "title": "Phase 4: Comprehensive Option Evaluation & Ranking",
-            "prompt": "**PREP**: Define evaluation criteria based on clarified requirements, constraints, and priorities.\n\n**IMPLEMENT**: \n1. Create weighted scoring matrix with 4-6 evaluation criteria based on clarifications\n2. Score each option quantitatively (1-10 scale) with detailed rationale\n3. Calculate weighted scores and rank options\n4. Perform sensitivity analysis on key criteria weights\n5. Identify decision breakpoints and scenario dependencies\n6. Document evaluation methodology and assumptions\n\n**VERIFY**: Ensure evaluation is objective, comprehensive, and incorporates all clarified priorities.",
+            "title": "Phase 4: CONVERGENT THINKING - Option Evaluation & Ranking",
+            "prompt": "**TRANSITION TO CONVERGENT THINKING MODE**\n\nThe divergent generation phase is complete. Now shift to analytical, convergent thinking to systematically evaluate all solutions.\n\n**CONVERGENT THINKING PRINCIPLES:**\n- This is NOW the time for judgment and comparison\n- Apply critical analysis to all generated solutions\n- Use evidence-based evaluation criteria\n- Be rigorous and systematic\n\n**PREP**: Define evaluation criteria based on clarified requirements, constraints, and priorities.\n\n**IMPLEMENT**: \n1. Create weighted scoring matrix with 4-6 evaluation criteria based on clarifications\n2. Score each option quantitatively (1-10 scale) with detailed rationale\n3. Calculate weighted scores and rank options\n4. Perform sensitivity analysis on key criteria weights\n5. Identify decision breakpoints and scenario dependencies\n6. Document evaluation methodology and assumptions\n\n**VERIFY**: Ensure evaluation is objective, comprehensive, and incorporates all clarified priorities.",
             "agentRole": "You are an objective decision analyst expert in multi-criteria evaluation and quantitative assessment. Your expertise lies in translating qualitative factors into structured, defensible evaluations.",
             "guidance": [
                 "Use at least 4-6 evaluation criteria based on clarifications",
@@ -200,7 +381,7 @@
             "id": "phase-4b-devil-advocate-review",
             "runCondition": {"var": "explorationComplexity", "not_equals": "Simple"},
             "title": "Phase 4b: Devil's Advocate Evaluation Review",
-            "prompt": "Perform a 'devil's advocate' review of your option evaluation from Phase 4. The objective is to rigorously stress-test your analysis and strengthen the final recommendation. Your critique must be balanced and evidence-based.\n\nAnalyze the evaluation through these lenses, citing specific evidence:\n\n1. **Hidden Assumptions**: What assumptions does your evaluation make about user context, implementation reality, or future conditions that might be incorrect?\n2. **Evaluation Bias**: Are there systematic biases in your scoring? Do criteria weights reflect stated priorities? Are any important factors missing?\n3. **Option Blind Spots**: What alternatives or hybrid approaches might you have overlooked? Are there emerging options not fully considered?\n4. **Risk Assessment**: What are the biggest risks of the top-ranked option? What could go wrong that isn't reflected in the scoring?\n5. **Evaluation Strengths**: What aspects of your analysis are most robust and reliable? What gives you confidence in the methodology?\n\nConclude with balanced summary. If you found issues, provide concrete suggestions for improving the evaluation. **Set the confidenceScore variable to your 1-10 rating** for the evaluation quality *if* suggestions are implemented.",
+            "prompt": "Perform a rigorous 'devil's advocate' review of your solutions and evaluation. This is a mandatory adversarial self-challenge to prevent overconfidence and blind spots.\n\n**STRUCTURED ADVERSARIAL ANALYSIS:**\n\n1. **Evidence Challenge**: For each solution's top 3 claims:\n   - Is the evidence truly supporting this claim?\n   - Are there contradicting sources we dismissed?\n   - What evidence grade did we assign vs. what it deserves?\n\n2. **Hidden Failure Modes**: For the top-ranked solution:\n   - What could cause catastrophic failure?\n   - What assumptions could be completely wrong?\n   - What context changes would invalidate this approach?\n\n3. **Overlooked Alternatives**:\n   - What hybrid approaches could combine solution strengths?\n   - What completely different paradigm did we miss?\n   - Are we solving the right problem?\n\n4. **Bias Detection**:\n   - Did we favor familiar over novel?\n   - Did recent sources overshadow established wisdom?\n   - Did domain bias affect our evaluation?\n\n5. **Confidence Calibration**:\n   - Where are we overconfident?\n   - What unknowns are we treating as knowns?\n   - calculateConfidence() with penalty for identified weaknesses\n\n**OUTPUT REQUIREMENTS:**\n- Identify at least 3 significant concerns\n- Propose specific remedies for each\n- Re-calculate confidence scores\n- Set context.confidenceScore (1-10) for overall analysis quality\n- Set context.criticalIssues[] with must-address items\n\ntriggerDeepDive() if confidence drops below 0.7",
             "agentRole": "You are a skeptical but fair senior research analyst with 15+ years of experience in strategic decision analysis. Your role is to identify potential blind spots, biases, and overlooked factors in evaluation methodologies. You excel at constructive criticism that strengthens analysis rather than destroys it.",
             "guidance": [
                 "This is critical thinking step to find weaknesses in your own analysis",

package/workflows/systemic-bug-investigation-with-loops.json CHANGED Viewed

@@ -209,21 +209,67 @@
             },
             "body": [
                 {
-                    "id": "analysis-iteration",
-                    "title": "Analysis {{analysisPhase}}/4",
-                    "prompt": "{{analysisPhase === 1 ? '**BREADTH SCAN**\\n\\n1. **Error Mapping**: grep_search errors, trace logs, map stack traces\\n2. **Component Discovery**: Find all interacting components using codebase_search\\n3. **Data Flow**: Trace data through bug area, transformations, persistence\\n4. **Recent Changes**: Git history last 10 commits\\n\\n**Output**: BreadthAnalysis.md with interaction map' : analysisPhase === 2 ? '**COMPONENT DEEP DIVE**\\n\\nUse recursiveAnalysis(component, 3) on top 5 suspicious components:\\n\\n1. **L1 Direct**: Read complete file, state management, error handling\\n2. **L2 Dependencies**: Follow imports, contracts, version compatibility\\n3. **L3 Integration**: System fit, side effects, concurrency, resources\\n\\n**Output**: ComponentAnalysis.md with deep insights' : analysisPhase === 3 ? '**DEPENDENCY & FLOW ANALYSIS**\\n\\n1. **Static Graph**: Import tree, circular deps, hidden dependencies\\n2. **Runtime Flow**: Execution paths, async flows, state changes\\n3. **Data Pipeline**: Track transformations, validation, corruption points\\n4. **Integration**: External services, DB, queues, filesystem\\n\\n**Output**: FlowAnalysis.md with diagrams' : '**TEST COVERAGE ANALYSIS**\\n\\nUse analyzeTests(component) for each suspicious component:\\n\\n1. **Direct Coverage**: Find tests, analyze coverage gaps, quality\\n2. **Integration Tests**: Bug area tests, assumptions, flaky tests\\n3. **History**: When added/modified, correlation with bug\\n4. **Debug Execution**: Run with debug flags, instrument, compare\\n\\n**Output**: TestAnalysis.md with coverage gaps matrix'}}",
-                    "agentRole": "You are performing systematic analysis phase {{analysisPhase}} of 4. Your focus is {{analysisPhase === 1 ? 'casting a wide net to find all potentially related components' : analysisPhase === 2 ? 'deep diving into the most suspicious components to understand their internals' : analysisPhase === 3 ? 'tracing how components connect and data flows between them' : 'leveraging existing tests to understand expected behavior and find coverage gaps'}}.",
+                    "id": "analysis-breadth-scan",
+                    "title": "Analysis 1/4: Breadth Scan",
+                    "prompt": "**BREADTH SCAN**\n\n1. **Error Mapping**: grep_search errors, trace logs, map stack traces\n2. **Component Discovery**: Find all interacting components using codebase_search\n3. **Data Flow**: Trace data through bug area, transformations, persistence\n4. **Recent Changes**: Git history last 10 commits\n\n**Output**: BreadthAnalysis.md with interaction map",
+                    "agentRole": "You are performing systematic analysis phase 1 of 4. Your focus is casting a wide net to find all potentially related components.",
                     "guidance": [
-                        "This is analysis phase {{analysisPhase}} of 4 total phases",
-                        "Phase 1 = Breadth Scan, Phase 2 = Deep Dive, Phase 3 = Dependencies, Phase 4 = Tests",
-                        "Each phase builds on previous findings",
-                        "Create a structured markdown file for each phase output",
+                        "This is analysis phase 1 of 4 total phases",
+                        "Phase 1 = Breadth Scan - Cast wide net for all related components",
+                        "Create BreadthAnalysis.md with structured findings",
+                        "Use findSimilarBugs() to search for historical patterns",
                         "Use the function definitions for standardized operations",
-                        "If you discover the bug's root cause with high confidence, note it but complete all analysis phases for thoroughness",
-                        "Update INVESTIGATION_CONTEXT.md after each phase: use updateInvestigationContext('Analysis Findings', phase-specific findings)",
-                        "In Phase 1 (Breadth Scan): Use findSimilarBugs() to search for historical patterns",
-                        "After all 4 phases complete, use trackInvestigation('Phase 1 Complete', 'Moving to Hypothesis Development')"
+                        "Update INVESTIGATION_CONTEXT.md after completion"
                     ],
+                    "runCondition": {"var": "analysisPhase", "equals": 1},
+                    "requireConfirmation": false
+                },
+                {
+                    "id": "analysis-deep-dive",
+                    "title": "Analysis 2/4: Component Deep Dive",
+                    "prompt": "**COMPONENT DEEP DIVE**\n\nUse recursiveAnalysis(component, 3) on top 5 suspicious components:\n\n1. **L1 Direct**: Read complete file, state management, error handling\n2. **L2 Dependencies**: Follow imports, contracts, version compatibility\n3. **L3 Integration**: System fit, side effects, concurrency, resources\n\n**Output**: ComponentAnalysis.md with deep insights",
+                    "agentRole": "You are performing systematic analysis phase 2 of 4. Your focus is deep diving into the most suspicious components to understand their internals.",
+                    "guidance": [
+                        "This is analysis phase 2 of 4 total phases",
+                        "Phase 2 = Deep Dive - Analyze suspicious components 3 levels deep",
+                        "Build on findings from Phase 1 Breadth Scan",
+                        "Create ComponentAnalysis.md with structured findings",
+                        "Use recursiveAnalysis() for systematic exploration",
+                        "Update INVESTIGATION_CONTEXT.md after completion"
+                    ],
+                    "runCondition": {"var": "analysisPhase", "equals": 2},
+                    "requireConfirmation": false
+                },
+                {
+                    "id": "analysis-dependencies",
+                    "title": "Analysis 3/4: Dependencies & Flow",
+                    "prompt": "**DEPENDENCY & FLOW ANALYSIS**\n\n1. **Static Graph**: Import tree, circular deps, hidden dependencies\n2. **Runtime Flow**: Execution paths, async flows, state changes\n3. **Data Pipeline**: Track transformations, validation, corruption points\n4. **Integration**: External services, DB, queues, filesystem\n\n**Output**: FlowAnalysis.md with diagrams",
+                    "agentRole": "You are performing systematic analysis phase 3 of 4. Your focus is tracing how components connect and data flows between them.",
+                    "guidance": [
+                        "This is analysis phase 3 of 4 total phases",
+                        "Phase 3 = Dependencies - Trace connections and data flows",
+                        "Build on component understanding from Phase 2",
+                        "Create FlowAnalysis.md with diagrams and flow charts",
+                        "Focus on runtime behavior and integration points",
+                        "Update INVESTIGATION_CONTEXT.md after completion"
+                    ],
+                    "runCondition": {"var": "analysisPhase", "equals": 3},
+                    "requireConfirmation": false
+                },
+                {
+                    "id": "analysis-test-coverage",
+                    "title": "Analysis 4/4: Test Coverage",
+                    "prompt": "**TEST COVERAGE ANALYSIS**\n\nUse analyzeTests(component) for each suspicious component:\n\n1. **Direct Coverage**: Find tests, analyze coverage gaps, quality\n2. **Integration Tests**: Bug area tests, assumptions, flaky tests\n3. **History**: When added/modified, correlation with bug\n4. **Debug Execution**: Run with debug flags, instrument, compare\n\n**Output**: TestAnalysis.md with coverage gaps matrix",
+                    "agentRole": "You are performing systematic analysis phase 4 of 4. Your focus is leveraging existing tests to understand expected behavior and find coverage gaps.",
+                    "guidance": [
+                        "This is analysis phase 4 of 4 total phases",
+                        "Phase 4 = Tests - Analyze test coverage and quality",
+                        "Build on all previous analysis phases",
+                        "Create TestAnalysis.md with coverage gap matrix",
+                        "Run tests with debug flags for additional insights",
+                        "After completion, use trackInvestigation('Phase 1 Complete', 'Moving to Hypothesis Development')"
+                    ],
+                    "runCondition": {"var": "analysisPhase", "equals": 4},
                     "requireConfirmation": false
                 }
             ],
@@ -415,7 +461,7 @@
                     "prompt": "**DEBUGGING INSTRUMENTATION for {{currentHypothesis.id}}**\n\n**Hypothesis**: {{currentHypothesis.description}}\n\n**IMPLEMENT SMART LOGGING**:\n\n1. **Standard Format**: Use instrumentCode(location, '{{currentHypothesis.id}}')\n   ```\n   className.methodName [{{currentHypothesis.id}}] {timestamp}: Specific message\n   ```\n\n2. **Deduplication Implementation**:\n   ```javascript\n   // Add to each instrumentation point\n   const debugState = { lastMsg: '', count: 0 };\n   function smartLog(msg) {\n     if (debugState.lastMsg === msg) {\n       debugState.count++;\n       if (debugState.count % 10 === 0) {\n         console.log(`[{{currentHypothesis.id}}] ${msg} x${debugState.count}`);\n       }\n     } else {\n       if (debugState.count > 1) {\n         console.log(`[{{currentHypothesis.id}}] Previous message x${debugState.count}`);\n       }\n       console.log(`[{{currentHypothesis.id}}] ${msg}`);\n       debugState.lastMsg = msg;\n       debugState.count = 1;\n     }\n   }\n   ```\n\n3. **Operation Grouping**:\n   ```javascript\n   console.log(`=== {{currentHypothesis.id}}: Operation ${opName} Start ===`);\n   const startTime = Date.now();\n   // ... operation code with smartLog() calls ...\n   console.log(`=== {{currentHypothesis.id}}: Operation ${opName} End (${Date.now() - startTime}ms) ===`);\n   ```\n\n4. **Test Instrumentation**:\n   - Add debugging to relevant test files\n   - Instrument test setup/teardown\n   - Log test assumptions vs actual behavior\n\n5. **High-Frequency Aggregation**:\n   - For loops/iterations, log summary every 100 iterations\n   - For events, create time-window summaries\n   - Track unique values and their counts\n\n**OUTPUT**: Instrumented code ready to produce clean, manageable logs for {{currentHypothesis.id}}",
                     "agentRole": "You are instrumenting code specifically to validate hypothesis {{currentHypothesis.id}}. Focus on targeted evidence collection.",
                     "guidance": [
-                        "This is hypothesis {{hypothesisIndex + 1}} of {{hypothesesToValidate.length}}",
+                        "This is hypothesis {{hypothesisIndex + 1}} of 3",
                         "Tailor instrumentation to the specific hypothesis",
                         "Ensure non-intrusive implementation"
                     ],
@@ -436,7 +482,7 @@
                 {
                     "id": "loop-phase-5-synthesis",
                     "title": "Phase 5: Evidence Synthesis for {{currentHypothesis.id}}",
-                    "prompt": "**EVIDENCE SYNTHESIS for {{currentHypothesis.id}}**\n\n**Synthesize findings:**\n1. **Evidence Summary**: What did we learn about {{currentHypothesis.id}}?\n2. **Confidence Update**: Based on evidence, rate confidence this is the root cause (0-10)\n3. **Status Update**: Mark hypothesis as Confirmed/Refuted/Needs-More-Evidence\n\n**If {{currentHypothesis.id}} is confirmed with high confidence (>8.0):**\n- Set `rootCauseFound` = true\n- Set `rootCauseHypothesis` = {{currentHypothesis.id}}\n- Update `currentConfidence` with confidence score\n\n**If all hypotheses validated but confidence <9.0:**\n- Consider additional investigation needs\n- Document what evidence is still missing\n\n**Context Update**:\n- Use updateInvestigationContext('Evidence Log', evidence summary for {{currentHypothesis.id}})\n- Every 3 iterations: Use trackInvestigation('Validation Progress', '{{hypothesisIndex + 1}}/{{hypothesesToValidate.length}} hypotheses validated')",
+                    "prompt": "**EVIDENCE SYNTHESIS for {{currentHypothesis.id}}**\n\n**Synthesize findings:**\n1. **Evidence Summary**: What did we learn about {{currentHypothesis.id}}?\n2. **Confidence Update**: Based on evidence, rate confidence this is the root cause (0-10)\n3. **Status Update**: Mark hypothesis as Confirmed/Refuted/Needs-More-Evidence\n\n**If {{currentHypothesis.id}} is confirmed with high confidence (>8.0):**\n- Set `rootCauseFound` = true\n- Set `rootCauseHypothesis` = {{currentHypothesis.id}}\n- Update `currentConfidence` with confidence score\n\n**If all hypotheses validated but confidence <9.0:**\n- Consider additional investigation needs\n- Document what evidence is still missing\n\n**Context Update**:\n- Use updateInvestigationContext('Evidence Log', evidence summary for {{currentHypothesis.id}})\n- Every 3 iterations: Use trackInvestigation('Validation Progress', '{{hypothesisIndex + 1}}/3 hypotheses validated')",
                     "agentRole": "You are synthesizing evidence to determine if {{currentHypothesis.id}} is the root cause.",
                     "guidance": [
                         "Update hypothesis status based on evidence",
@@ -461,7 +507,7 @@
                 "var": "currentConfidence",
                 "lt": 8.0
             },
-            "prompt": "**CONTROLLED EXPERIMENTATION** - When observation isn't enough, experiment!\n\n**Current Top Hypothesis**: {{hypothesesToValidate[0].id}} (Confidence: {{currentConfidence}}/10)\n\n**EXPERIMENT TYPES** (use controlledModification()):\n\n1. **Guard Additions (Non-Breaking)**:\n   ```javascript\n   // Add defensive check that logs but doesn't change behavior\n   if (unexpectedCondition) {\n     console.error('[H1_GUARD] Unexpected state detected:', state);\n     // Continue normal execution\n   }\n   ```\n\n2. **Assertion Injections**:\n   ```javascript\n   // Add assertion that would fail if hypothesis is correct\n   console.assert(expectedCondition, '[H1_ASSERT] Hypothesis H1 violated!');\n   ```\n\n3. **Minimal Fix Test**:\n   ```javascript\n   // Apply minimal fix for hypothesis, see if bug disappears\n   if (process.env.DEBUG_FIX_H1 === 'true') {\n     // Apply hypothesized fix\n     return fixedBehavior();\n   }\n   ```\n\n4. **Controlled Breaking**:\n   ```javascript\n   // Temporarily break suspected component to verify involvement\n   if (process.env.DEBUG_BREAK_H1 === 'true') {\n     throw new Error('[H1_BREAK] Intentionally breaking to test hypothesis');\n   }\n   ```\n\n**PROTOCOL**:\n1. Choose experiment type based on confidence and risk\n2. Implement modification with clear DEBUG markers\n3. Use createInvestigationBranch() if not already on investigation branch\n4. Commit: `git commit -m \"DEBUG: {{experiment_type}} for {{currentHypothesis.id}}\"`\n5. Run reproduction steps\n6. Use collectEvidence() to gather results\n7. Revert changes: `git revert HEAD`\n8. Document results in ExperimentResults/{{currentHypothesis.id}}.md\n\n**SAFETY LIMITS**:\n- Max 3 experiments per hypothesis\n- Each experiment in separate commit\n- Always revert after evidence collection\n- Document everything in INVESTIGATION_CONTEXT.md\n\n**UPDATE**:\n- Hypothesis confidence based on experimental results\n- Use updateInvestigationContext('Experiment Results', experiment details and outcomes)\n- Track failed experiments in 'Dead Ends & Lessons' section",
+            "prompt": "**CONTROLLED EXPERIMENTATION** - When observation isn't enough, experiment!\n\n**Current Investigation Status**: Leading hypothesis (Confidence: {{currentConfidence}}/10)\n\n**EXPERIMENT TYPES** (use controlledModification()):\n\n1. **Guard Additions (Non-Breaking)**:\n   ```javascript\n   // Add defensive check that logs but doesn't change behavior\n   if (unexpectedCondition) {\n     console.error('[H1_GUARD] Unexpected state detected:', state);\n     // Continue normal execution\n   }\n   ```\n\n2. **Assertion Injections**:\n   ```javascript\n   // Add assertion that would fail if hypothesis is correct\n   console.assert(expectedCondition, '[H1_ASSERT] Hypothesis H1 violated!');\n   ```\n\n3. **Minimal Fix Test**:\n   ```javascript\n   // Apply minimal fix for hypothesis, see if bug disappears\n   if (process.env.DEBUG_FIX_H1 === 'true') {\n     // Apply hypothesized fix\n     return fixedBehavior();\n   }\n   ```\n\n4. **Controlled Breaking**:\n   ```javascript\n   // Temporarily break suspected component to verify involvement\n   if (process.env.DEBUG_BREAK_H1 === 'true') {\n     throw new Error('[H1_BREAK] Intentionally breaking to test hypothesis');\n   }\n   ```\n\n**PROTOCOL**:\n1. Choose experiment type based on confidence and risk\n2. Implement modification with clear DEBUG markers\n3. Use createInvestigationBranch() if not already on investigation branch\n4. Commit: `git commit -m \"DEBUG: {{experiment_type}} for hypothesis investigation\"`\n5. Run reproduction steps\n6. Use collectEvidence() to gather results\n7. Revert changes: `git revert HEAD`\n8. Document results in ExperimentResults/hypothesis-experiment.md\n\n**SAFETY LIMITS**:\n- Max 3 experiments per hypothesis\n- Each experiment in separate commit\n- Always revert after evidence collection\n- Document everything in INVESTIGATION_CONTEXT.md\n\n**UPDATE**:\n- Hypothesis confidence based on experimental results\n- Use updateInvestigationContext('Experiment Results', experiment details and outcomes)\n- Track failed experiments in 'Dead Ends & Lessons' section",
             "agentRole": "You are a careful experimenter using controlled code modifications to validate hypotheses. Safety and reversibility are paramount.",
             "guidance": [
                 "Start with non-breaking experiments (guards, logs)",