npm - testchimp-runner-core - Versions diffs - 0.0.33 → 0.0.35 - Mend

testchimp-runner-core 0.0.33 → 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

package/dist/execution-service.d.ts +1 -4
package/dist/execution-service.d.ts.map +1 -1
package/dist/execution-service.js +155 -468
package/dist/execution-service.js.map +1 -1
package/dist/index.d.ts +3 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +11 -1
package/dist/index.js.map +1 -1
package/dist/llm-facade.d.ts.map +1 -1
package/dist/llm-facade.js +7 -7
package/dist/llm-facade.js.map +1 -1
package/dist/llm-provider.d.ts +9 -0
package/dist/llm-provider.d.ts.map +1 -1
package/dist/model-constants.d.ts +16 -5
package/dist/model-constants.d.ts.map +1 -1
package/dist/model-constants.js +17 -6
package/dist/model-constants.js.map +1 -1
package/dist/orchestrator/decision-parser.d.ts +18 -0
package/dist/orchestrator/decision-parser.d.ts.map +1 -0
package/dist/orchestrator/decision-parser.js +127 -0
package/dist/orchestrator/decision-parser.js.map +1 -0
package/dist/orchestrator/index.d.ts +4 -2
package/dist/orchestrator/index.d.ts.map +1 -1
package/dist/orchestrator/index.js +15 -2
package/dist/orchestrator/index.js.map +1 -1
package/dist/orchestrator/orchestrator-agent.d.ts +17 -22
package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
package/dist/orchestrator/orchestrator-agent.js +708 -577
package/dist/orchestrator/orchestrator-agent.js.map +1 -1
package/dist/orchestrator/orchestrator-prompts.d.ts +32 -0
package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -0
package/dist/orchestrator/orchestrator-prompts.js +737 -0
package/dist/orchestrator/orchestrator-prompts.js.map +1 -0
package/dist/orchestrator/page-som-handler.d.ts +106 -0
package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
package/dist/orchestrator/page-som-handler.js +1353 -0
package/dist/orchestrator/page-som-handler.js.map +1 -0
package/dist/orchestrator/som-types.d.ts +149 -0
package/dist/orchestrator/som-types.d.ts.map +1 -0
package/dist/orchestrator/som-types.js +87 -0
package/dist/orchestrator/som-types.js.map +1 -0
package/dist/orchestrator/tool-registry.d.ts +2 -0
package/dist/orchestrator/tool-registry.d.ts.map +1 -1
package/dist/orchestrator/tool-registry.js.map +1 -1
package/dist/orchestrator/tools/index.d.ts +5 -1
package/dist/orchestrator/tools/index.d.ts.map +1 -1
package/dist/orchestrator/tools/index.js +9 -2
package/dist/orchestrator/tools/index.js.map +1 -1
package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
package/dist/orchestrator/tools/verify-action-result.d.ts +17 -0
package/dist/orchestrator/tools/verify-action-result.d.ts.map +1 -0
package/dist/orchestrator/tools/verify-action-result.js +140 -0
package/dist/orchestrator/tools/verify-action-result.js.map +1 -0
package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
package/dist/orchestrator/types.d.ts +49 -1
package/dist/orchestrator/types.d.ts.map +1 -1
package/dist/orchestrator/types.js +11 -1
package/dist/orchestrator/types.js.map +1 -1
package/dist/prompts.d.ts.map +1 -1
package/dist/prompts.js +40 -34
package/dist/prompts.js.map +1 -1
package/dist/scenario-service.d.ts +5 -0
package/dist/scenario-service.d.ts.map +1 -1
package/dist/scenario-service.js +17 -0
package/dist/scenario-service.js.map +1 -1
package/dist/scenario-worker-class.d.ts +4 -0
package/dist/scenario-worker-class.d.ts.map +1 -1
package/dist/scenario-worker-class.js +21 -3
package/dist/scenario-worker-class.js.map +1 -1
package/dist/testing/agent-tester.d.ts +35 -0
package/dist/testing/agent-tester.d.ts.map +1 -0
package/dist/testing/agent-tester.js +84 -0
package/dist/testing/agent-tester.js.map +1 -0
package/dist/testing/ref-translator-tester.d.ts +44 -0
package/dist/testing/ref-translator-tester.d.ts.map +1 -0
package/dist/testing/ref-translator-tester.js +104 -0
package/dist/testing/ref-translator-tester.js.map +1 -0
package/dist/utils/coordinate-converter.d.ts +32 -0
package/dist/utils/coordinate-converter.d.ts.map +1 -0
package/dist/utils/coordinate-converter.js +130 -0
package/dist/utils/coordinate-converter.js.map +1 -0
package/dist/utils/hierarchical-selector.d.ts +47 -0
package/dist/utils/hierarchical-selector.d.ts.map +1 -0
package/dist/utils/hierarchical-selector.js +212 -0
package/dist/utils/hierarchical-selector.js.map +1 -0
package/dist/utils/page-info-retry.d.ts +14 -0
package/dist/utils/page-info-retry.d.ts.map +1 -0
package/dist/utils/page-info-retry.js +60 -0
package/dist/utils/page-info-retry.js.map +1 -0
package/dist/utils/page-info-utils.d.ts +1 -0
package/dist/utils/page-info-utils.d.ts.map +1 -1
package/dist/utils/page-info-utils.js +46 -18
package/dist/utils/page-info-utils.js.map +1 -1
package/dist/utils/ref-attacher.d.ts +21 -0
package/dist/utils/ref-attacher.d.ts.map +1 -0
package/dist/utils/ref-attacher.js +149 -0
package/dist/utils/ref-attacher.js.map +1 -0
package/dist/utils/ref-translator.d.ts +49 -0
package/dist/utils/ref-translator.d.ts.map +1 -0
package/dist/utils/ref-translator.js +276 -0
package/dist/utils/ref-translator.js.map +1 -0
package/package.json +1 -1
package/plandocs/BEFORE_AFTER_VERIFICATION.md +148 -0
package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +144 -0
package/plandocs/IMPLEMENTATION_STATUS.md +108 -0
package/plandocs/PHASE_1_COMPLETE.md +165 -0
package/plandocs/PHASE_1_SUMMARY.md +184 -0
package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +120 -0
package/plandocs/PROMPT_SANITY_CHECK.md +120 -0
package/plandocs/SESSION_SUMMARY_v0.0.33.md +151 -0
package/plandocs/TROUBLESHOOTING_SESSION.md +72 -0
package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +396 -0
package/plandocs/WHATS_NEW_v0.0.33.md +183 -0
package/plandocs/exploratory-mode-support-v2.plan.md +953 -0
package/plandocs/exploratory-mode-support.plan.md +928 -0
package/plandocs/journey-id-tracking-addendum.md +227 -0
package/src/execution-service.ts +179 -596
package/src/index.ts +10 -0
package/src/llm-facade.ts +8 -8
package/src/llm-provider.ts +11 -1
package/src/model-constants.ts +17 -5
package/src/orchestrator/decision-parser.ts +139 -0
package/src/orchestrator/index.ts +27 -2
package/src/orchestrator/orchestrator-agent.ts +868 -623
package/src/orchestrator/orchestrator-prompts.ts +786 -0
package/src/orchestrator/page-som-handler.ts +1565 -0
package/src/orchestrator/som-types.ts +188 -0
package/src/orchestrator/tool-registry.ts +2 -0
package/src/orchestrator/tools/index.ts +5 -1
package/src/orchestrator/tools/refresh-som-markers.ts +69 -0
package/src/orchestrator/tools/verify-action-result.ts +159 -0
package/src/orchestrator/tools/view-previous-screenshot.ts +103 -0
package/src/orchestrator/types.ts +95 -4
package/src/prompts.ts +40 -34
package/src/scenario-service.ts +20 -0
package/src/scenario-worker-class.ts +30 -4
package/src/utils/coordinate-converter.ts +162 -0
package/src/utils/page-info-retry.ts +65 -0
package/src/utils/page-info-utils.ts +53 -18
package/testchimp-runner-core-0.0.35.tgz +0 -0
/package/{CREDIT_CALLBACK_ARCHITECTURE.md → plandocs/CREDIT_CALLBACK_ARCHITECTURE.md} +0 -0
/package/{INTEGRATION_COMPLETE.md → plandocs/INTEGRATION_COMPLETE.md} +0 -0
/package/{VISION_DIAGNOSTICS_IMPROVEMENTS.md → plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md} +0 -0
/package/{RELEASE_0.0.26.md → releasenotes/RELEASE_0.0.26.md} +0 -0
/package/{RELEASE_0.0.27.md → releasenotes/RELEASE_0.0.27.md} +0 -0
/package/{RELEASE_0.0.28.md → releasenotes/RELEASE_0.0.28.md} +0 -0

package/src/execution-service.ts CHANGED Viewed

@@ -21,6 +21,8 @@ import { DEFAULT_MODEL, VISION_MODEL } from './model-constants';
 import { LLMProvider } from './llm-provider';
 import { ProgressReporter } from './progress-reporter';
 import { BackendProxyLLMProvider } from './providers/backend-proxy-llm-provider';
+import { OrchestratorAgent, ToolRegistry, DEFAULT_AGENT_CONFIG } from './orchestrator';
+import type { AgentConfig, JourneyMemory } from './orchestrator';
 /**
  * Service for orchestrating Playwright script execution
@@ -34,6 +36,7 @@ export class ExecutionService {
   private maxConcurrentExecutions: number;
   private activeExecutions: Set<Promise<any>> = new Set();
   private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
+  private orchestratorAgent: OrchestratorAgent;
   constructor(
     authConfig?: AuthConfig,
@@ -51,6 +54,21 @@ export class ExecutionService {
     this.progressReporter = progressReporter;
     this.creditUsageService = new CreditUsageService(authConfig, backendUrl);
     this.maxConcurrentExecutions = maxConcurrentExecutions;
+    // Initialize orchestrator for repair mode (reuses all SoM infrastructure)
+    const toolRegistry = new ToolRegistry();
+    const repairConfig: Partial<AgentConfig> = {
+      useSoM: true,
+      somRestrictCoordinates: true  // Prefer SoM markers for repairs
+    };
+    this.orchestratorAgent = new OrchestratorAgent(
+      this.llmFacade,
+      toolRegistry,
+      repairConfig,
+      progressReporter,
+      (msg, level) => this.log(msg)
+    );
   }
   /**
@@ -253,7 +271,7 @@ export class ExecutionService {
         }
         // Execute the script as-is
-        await this.executeScriptContent(request.script, page);
+        await this.executeStepCode(request.script, page);
         // LIFECYCLE: Call afterEndTest on success
         if (this.progressReporter?.afterEndTest) {
@@ -301,7 +319,7 @@ export class ExecutionService {
         }
         // Execute the script as-is
-        await this.executeScriptContent(request.script, page);
+        await this.executeStepCode(request.script, page);
         // LIFECYCLE: Call afterEndTest on success
         if (this.progressReporter?.afterEndTest) {
@@ -385,8 +403,12 @@ export class ExecutionService {
     // Start AI repair process
     this.log('Starting AI repair process...');
+    let repairBrowser: any = null;
+    let repairContext: any = null;
+    let repairPage: any = null;
     try {
-        let repairBrowser, repairContext, repairPage, steps, updatedSteps;
+        let steps, updatedSteps;
         if (useExistingBrowser) {
           // Use existing browser
@@ -451,8 +473,8 @@ export class ExecutionService {
           updatedSteps = await this.repairStepsWithAI(steps, repairPage, repairFlexibility, model, request.jobId);
         }
-        // Always generate the updated script
-        const updatedScript = this.generateUpdatedScript(updatedSteps);
+        // Always generate the updated script (preserve original test name)
+        const updatedScript = this.generateUpdatedScript(updatedSteps, undefined, request.script);
         // Check if repair was successful by seeing if we completed all steps
         const allStepsSuccessful = updatedSteps.length > 0 && updatedSteps.every(step => step.success);
@@ -472,11 +494,32 @@ export class ExecutionService {
         // Update file if we have any successful repairs (partial or complete)
         if (hasSuccessfulRepairs) {
+          // IMPORTANT: Use the orchestrator-generated script directly (already has proper Playwright commands)
+          // Don't regenerate via LLM as it loses the actual repairs
+          this.log('Using orchestrator-generated script (skipping LLM regeneration to preserve repairs)');
+          // For repair advice, compare original vs repaired
           const confidenceResponse = await this.llmFacade.assessRepairConfidence(request.script!, updatedScript, model);
-          const finalScript = await this.llmFacade.generateFinalScript(request.script!, updatedScript, confidenceResponse.advice, model);
-          // Ensure the final script has the correct TestChimp comment format with repair advice
-          const scriptWithRepairAdvice = addTestChimpComment(finalScript, confidenceResponse.advice);
+          // Add TestChimp comment with repair advice
+          const scriptWithAdvice = addTestChimpComment(updatedScript, confidenceResponse.advice);
+          // Polish the script with minor LLM cleanup (removes redundancies, fixes formatting)
+          this.log('Applying final LLM polish to repaired script (minor cleanup only)...');
+          const cleanupResult = await this.llmFacade.cleanupScript(scriptWithAdvice, model);
+          if (cleanupResult.changes.length > 0) {
+            this.log(`Script cleanup made ${cleanupResult.changes.length} minor improvements:`);
+            cleanupResult.changes.forEach((change, i) => {
+              this.log(`  ${i + 1}. ${change}`);
+            });
+          } else if (cleanupResult.skipped) {
+            this.log(`Script cleanup skipped: ${cleanupResult.skipped}`);
+          } else {
+            this.log('Script cleanup: no changes needed');
+          }
+          const scriptWithRepairAdvice = cleanupResult.script;
           // Report credit usage for successful AI repair
           this.creditUsageService.reportAIRepairCredit().catch(error => {
@@ -496,11 +539,6 @@ export class ExecutionService {
             }
           }
-          // Only close browser if we created it (not provided by caller)
-          if (!useExistingBrowser) {
-            await repairBrowser.close();
-          }
           return {
             runStatus: 'failed', // Original script failed
             repairStatus: allStepsSuccessful ? 'success' : 'partial', // Complete or partial repair success
@@ -522,11 +560,6 @@ export class ExecutionService {
             }
           }
-          // Only close browser if we created it (not provided by caller)
-          if (!useExistingBrowser) {
-            await repairBrowser.close();
-          }
           return {
             runStatus: 'failed', // Original script failed
             repairStatus: 'failed',
@@ -546,6 +579,16 @@ export class ExecutionService {
         executionTime: Date.now() - startTime,
         error: error instanceof Error ? error.message : 'Script execution failed'
       };
+    } finally {
+      // Clean up browser resources if we created them (not provided by caller)
+      if (!useExistingBrowser && repairBrowser) {
+        try {
+          await repairBrowser.close();
+          this.log('AI repair browser closed');
+        } catch (closeError) {
+          this.log(`Error closing AI repair browser: ${closeError}`, 'warn');
+        }
+      }
     }
   }
@@ -629,6 +672,9 @@ export class ExecutionService {
       newCode?: string;
     }> = [];
+    // Track actual executed steps (including agent repairs) for proper history
+    const executedStepDescriptions: string[] = [];
     // Create a shared execution context that accumulates all executed code for variable tracking
     let executionContext = '';
     const contextVariables = new Map<string, any>();
@@ -660,6 +706,9 @@ export class ExecutionService {
         this.log(`Step ${i + 1} executed successfully: ${step.description}`);
         this.log(`Step ${i + 1} success status set to: ${step.success}`);
+        // Track executed step description for agent context
+        executedStepDescriptions.push(step.description);
         // Report successful step execution
         this.log(`DEBUG: About to check callback - progressReporter=${!!this.progressReporter}, onStepProgress=${!!this.progressReporter?.onStepProgress}, jobId=${jobId}`);
         if (this.progressReporter?.onStepProgress && jobId) {
@@ -691,304 +740,114 @@ export class ExecutionService {
         step.success = false;
         step.error = this.safeSerializeError(error);
-        // Try multiple repair attempts
-        const repairHistory: Array<{
-          attempt: number;
-          action: StepRepairAction;
-          error: string;
-          pageInfo: PageInfo;
-        }> = [];
+        // Use orchestrator for repair (reuses all SoM infrastructure)
+        this.log(`Calling orchestrator in REPAIR mode for step ${i + 1}`);
+        // Prepare repair context - use executedStepDescriptions (includes agent repairs)
+        const priorSteps = executedStepDescriptions; // What was ACTUALLY executed (scripted + agent)
+        const nextSteps = updatedSteps.slice(i + 1).map(s => s.description);
+        this.log(`  Prior steps executed: ${priorSteps.length}, Next steps: ${nextSteps.length}`);
+        this.log(`  Prior steps context:\n    ${priorSteps.map((s, idx) => `${idx + 1}. ${s}`).join('\n    ')}`);
+        // Create minimal memory for repair
+        const memory: JourneyMemory = {
+          experiences: [],
+          extractedData: {},
+          history: [],
+          latestNote: undefined
+        };
         let repairSuccess = false;
-        const originalDescription = step.description;
-        const originalCode = step.code;
-        let usedVisionMode = false;
-        for (let attempt = 1; attempt <= maxTries; attempt++) {
-          this.log(`Step ${i + 1} repair attempt ${attempt}/${maxTries}`);
-          // Get current page state for AI repair
-          const pageInfo = await this.getEnhancedPageInfo(page);
-          // Build failure history for LLM context
-          const failureHistory = this.buildFailureHistory(repairHistory, step, error);
-          // Build recent repairs context for LLM
-          const recentRepairsContext = this.buildRecentRepairsContext(recentRepairs);
-          let repairSuggestion;
+        try {
+          // Call orchestrator with repair context (page object persisted)
+          const repairResult = await this.orchestratorAgent.executeStep(
+            page,                                    // Same page object (persisted state)
+            step.description,                        // Goal with testdata embedded
+            i + 1,                                   // Current step number
+            updatedSteps.length,                     // Total steps
+            updatedSteps.map(s => s.description),    // All step descriptions
+            memory,                                  // Memory (empty for repair)
+            jobId || 'repair',
+            priorSteps,  // NEW: What was already completed
+            nextSteps    // NEW: What comes after this
+          );
-          // VISION-BASED FALLBACK: After 2 regular repair attempts, consider vision diagnostics on final attempt
-          if (attempt === maxTries - 1 && repairHistory.length >= 2 && !usedVisionMode) {
-            // Ask LLM if screenshot would help for repair diagnostics
-            this.log(`  🤔 After ${repairHistory.length} failed repairs: Asking LLM if screenshot would help (last resort)...`);
-            const screenshotNeed = await this.llmFacade.assessScreenshotNeed(
-              step.description,
-              step.error || 'Unknown error',
-              repairHistory.length + 1,
-              pageInfo,
-              model
-            );
+          if (repairResult.success && repairResult.commands.length > 0) {
+            // MODIFY: Orchestrator fixed the step - replace with new code
+            const repairedCode = repairResult.commands.join('\n');
-            this.log(`  💭 LLM assessment: ${screenshotNeed.needsScreenshot ? 'SCREENSHOT NEEDED' : 'NO SCREENSHOT'} - ${screenshotNeed.reason}`);
-            if (screenshotNeed.needsScreenshot) {
-              // Two-step supervisor pattern for vision-based repair:
-              // 1. Supervisor analyzes screenshot and provides diagnostic insights
-              // 2. Get repair suggestion with enhanced context from vision analysis
-              this.log(`  📸 Taking screenshot for supervisor analysis...`);
-              // Capture optimized screenshot using utility method
-              const imageDataUrl = await captureOptimizedScreenshot(
-                page,
-                { timeout: 10000 }, // Uses default quality 60
-                (msg) => this.log(msg)
-              );
-              this.log(`  👔 STEP 1: Supervisor analyzing screenshot (${VISION_MODEL})...`);
-              const supervisorDiagnostics = await this.llmFacade.getVisionDiagnostics(
-                step.description,
-                pageInfo,
-                [], // No previous steps context for repair
-                step.error,
-                imageDataUrl,
-                VISION_MODEL
-              );
-              // DEBUG: Log vision diagnostics
-              this.log(`  📸 Visual insights: ${supervisorDiagnostics.visualAnalysis}`);
-              this.log(`  🔍 Root cause: ${supervisorDiagnostics.rootCause}`);
-              this.log(`  💡 Recommended approach: ${supervisorDiagnostics.recommendedApproach}`);
-              if (supervisorDiagnostics.elementsFound.length > 0) {
-                this.log(`  ✅ Elements found: ${supervisorDiagnostics.elementsFound.join(', ')}`);
-              }
-              if (supervisorDiagnostics.elementsNotFound.length > 0) {
-                this.log(`  ❌ Elements not found: ${supervisorDiagnostics.elementsNotFound.join(', ')}`);
-              }
-              // Get repair suggestion with vision-enhanced context
-              this.log(`  🔨 STEP 2: Getting repair suggestion with vision insights...`);
-              const visionEnhancedFailureHistory = `${failureHistory}
-VISION-BASED DIAGNOSTIC INSIGHTS:
-Visual Analysis: ${supervisorDiagnostics.visualAnalysis}
-Root Cause: ${supervisorDiagnostics.rootCause}
-Recommended Approach: ${supervisorDiagnostics.recommendedApproach}
-Elements Found: ${supervisorDiagnostics.elementsFound.join(', ') || 'None'}
-Elements Not Found: ${supervisorDiagnostics.elementsNotFound.join(', ') || 'None'}
-Use these vision insights to inform your repair strategy.`;
-              repairSuggestion = await this.llmFacade.getRepairSuggestion(
-                step.description,
-                step.code,
-                step.error || 'Unknown error',
-                pageInfo,
-                visionEnhancedFailureHistory,
-                recentRepairsContext,
-                model
-              );
-              usedVisionMode = true;
-            } else {
-              // Regular repair without vision
-              if (screenshotNeed.alternativeApproach) {
-                this.log(`  💡 Alternative approach: ${screenshotNeed.alternativeApproach}`);
-              }
-              repairSuggestion = await this.llmFacade.getRepairSuggestion(
-                step.description,
-                step.code,
-                step.error || 'Unknown error',
-                pageInfo,
-                failureHistory,
-                recentRepairsContext,
-                model
-              );
-            }
-          } else {
-            // Regular repair attempt (first 2 attempts or already used vision)
-            repairSuggestion = await this.llmFacade.getRepairSuggestion(
-              step.description,
-              step.code,
-              step.error || 'Unknown error',
-              pageInfo,
-              failureHistory,
-              recentRepairsContext,
-              model
-            );
-          }
-          if (!repairSuggestion.shouldContinue) {
-            this.log(`AI decided to stop repair at attempt ${attempt}: ${repairSuggestion.reason}`);
-            break;
-          }
-          // Apply the repair action
-          try {
-            // Set the step index and insertAfterIndex on the client side based on current step being processed
-            const repairAction = {
-              ...repairSuggestion.action,
-              stepIndex: i, // Client-side step index management
-              insertAfterIndex: repairSuggestion.action.operation === StepOperation.INSERT ? i - 1 : undefined // For INSERT, insert before current step
+            updatedSteps[i] = {
+              ...step,
+              code: repairedCode,
+              success: true,
+              error: undefined
             };
-            this.log(`🔧 Applying repair action: ${repairAction.operation} on step ${repairAction.stepIndex}`);
-            this.log(`🔧 Steps array before repair: ${updatedSteps.map((s, idx) => `${idx}: "${s.description}" (success: ${s.success})`).join(', ')}`);
+            this.log(`✓ Step ${i + 1} MODIFIED by orchestrator (repair successful)`);
+            this.log(`  Original code: ${step.code}`);
+            this.log(`  New code (${repairResult.commands.length} commands):\n    ${repairResult.commands.join('\n    ')}`);
-            const result = await this.applyRepairActionInContext(repairAction, updatedSteps, i, page, executionContext, contextVariables);
+            // Track what agent actually did in history (for future repair context)
+            const agentActionSummary = `${step.description} [AI-repaired: ${repairResult.commands.length} commands]`;
+            executedStepDescriptions.push(agentActionSummary);
-            if (result.success) {
-              repairSuccess = true;
-              this.log(`🔧 Steps array after repair: ${updatedSteps.map((s, idx) => `${idx}: "${s.description}" (success: ${s.success})`).join(', ')}`);
-              // Mark the appropriate step(s) as successful based on operation type
-              if (repairAction.operation === StepOperation.MODIFY) {
-                // For MODIFY: mark the modified step as successful
-                step.success = true;
-                step.error = undefined;
-                updatedSteps[i].success = true;
-                updatedSteps[i].error = undefined;
-                this.log(`Step ${i + 1} marked as successful after MODIFY repair`);
-                // Report repaired step
-                if (this.progressReporter?.onStepProgress && jobId) {
-                  this.log(`DEBUG: Reporting repaired step ${i + 1}:`);
-                  this.log(`  description: ${updatedSteps[i].description}`);
-                  this.log(`  code: ${updatedSteps[i].code}`);
-                  await this.progressReporter.onStepProgress({
-                    jobId,
-                    stepId: updatedSteps[i].id, // Preserve original step ID if provided
-                    stepNumber: i + 1,
-                    description: updatedSteps[i].description,
-                    code: updatedSteps[i].code,
-                    status: 'SUCCESS_STEP_EXECUTION' as any,
-                    wasRepaired: true
-                  });
-                }
-              } else if (repairAction.operation === StepOperation.INSERT) {
-                // For INSERT: mark the newly inserted step as successful
-                const insertIndex = repairAction.insertAfterIndex !== undefined ? repairAction.insertAfterIndex + 1 : i + 1;
-                if (updatedSteps[insertIndex]) {
-                  updatedSteps[insertIndex].success = true;
-                  updatedSteps[insertIndex].error = undefined;
-                  // Report inserted step
-                  if (this.progressReporter?.onStepProgress && jobId) {
-                    await this.progressReporter.onStepProgress({
-                      jobId,
-                      stepId: updatedSteps[insertIndex].id, // Preserve original step ID if provided
-                      stepNumber: insertIndex + 1,
-                      description: updatedSteps[insertIndex].description,
-                      code: updatedSteps[insertIndex].code,
-                      status: 'SUCCESS_STEP_EXECUTION' as any,
-                      wasRepaired: true
-                    });
-                  }
-                }
-              } else if (repairAction.operation === StepOperation.REMOVE) {
-                // For REMOVE: no step to mark as successful since we removed it
-                // The step is already removed from the array
-              }
-              const commandInfo = repairAction.operation === StepOperation.MODIFY ?
-                `MODIFY: "${repairAction.newStep?.code || 'N/A'}"` :
-                repairAction.operation === StepOperation.INSERT ?
-                `INSERT: "${repairAction.newStep?.code || 'N/A'}"` :
-                repairAction.operation === StepOperation.REMOVE ?
-                `REMOVE: step at index ${repairAction.stepIndex}` :
-                repairAction.operation;
-              this.log(`Step ${i + 1} repair action ${commandInfo} executed successfully on attempt ${attempt}${usedVisionMode ? ' (vision-aided)' : ''}`);
-              // Update execution context based on the repair action
-              if (repairAction.operation === StepOperation.MODIFY && repairAction.newStep) {
-                // Update the step in the execution context for variable tracking
-                executionContext = executionContext.replace(originalCode, repairAction.newStep.code);
-              } else if (repairAction.operation === StepOperation.INSERT && repairAction.newStep) {
-                // Insert the new step code into execution context for variable tracking
-                executionContext += repairAction.newStep.code + '\n';
-              } else if (repairAction.operation === StepOperation.REMOVE) {
-                // Remove the step code from execution context for variable tracking
-                executionContext = executionContext.replace(originalCode, '');
-              }
-              // Record this successful repair
-              recentRepairs.push({
+            // Report repaired step
+            if (this.progressReporter?.onStepProgress && jobId) {
+              await this.progressReporter.onStepProgress({
+                jobId,
+                stepId: step.id,
                 stepNumber: i + 1,
-                operation: repairAction.operation,
-                originalDescription: repairAction.operation === StepOperation.REMOVE ? originalDescription : undefined,
-                newDescription: repairAction.newStep?.description,
-                originalCode: repairAction.operation === StepOperation.REMOVE ? originalCode : undefined,
-                newCode: repairAction.newStep?.code
+                description: updatedSteps[i].description,
+                code: updatedSteps[i].code,
+                status: 'SUCCESS_STEP_EXECUTION' as any,
+                wasRepaired: true
               });
-              // Keep only the last 3 repairs for context
-              if (recentRepairs.length > 3) {
-                recentRepairs.shift();
-              }
-              // Update step index based on operation
-              if (repairAction.operation === StepOperation.INSERT) {
-                // For INSERT: inserted step is already executed
-                this.log(`INSERT operation: current i=${i}, insertAfterIndex=${repairAction.insertAfterIndex}`);
-                this.log(`INSERT: Steps array length before: ${updatedSteps.length}`);
-                this.log(`INSERT: Steps before operation: ${updatedSteps.map((s, idx) => `${idx}: "${s.description}" (success: ${s.success})`).join(', ')}`);
-                if (repairAction.insertAfterIndex !== undefined && repairAction.insertAfterIndex < i) {
-                  // If inserting before current position, current step moved down by 1
-                  this.log(`INSERT before current position: incrementing i from ${i} to ${i + 1}`);
-                  i++; // Move to the original step that was pushed to the next position
-                } else {
-                  // If inserting at or after current position, stay at current step
-                  this.log(`INSERT at/after current position: keeping i at ${i}`);
-                }
-                this.log(`INSERT: Steps array length after: ${updatedSteps.length}`);
-                this.log(`INSERT: Steps after operation: ${updatedSteps.map((s, idx) => `${idx}: "${s.description}" (success: ${s.success})`).join(', ')}`);
-              } else if (repairAction.operation === StepOperation.REMOVE) {
-                // For REMOVE: stay at same index since the next step moved to current position
-                // Don't increment i because the array shifted left
-              } else {
-                // For MODIFY: move to next step since modified step was executed
-                i++; // Move to next step for MODIFY
-              }
-              // Add the repaired step's code to execution context for variable tracking
-              executionContext += step.code + '\n';
-              break;
-            } else {
-              throw new Error(result.error || 'Repair action failed');
             }
-          } catch (repairError) {
-            const repairErrorMessage = repairError instanceof Error ? repairError.message : 'Repair failed';
-            const commandInfo = repairSuggestion.action.operation === StepOperation.MODIFY ?
-              `MODIFY: "${repairSuggestion.action.newStep?.code || 'N/A'}"` :
-              repairSuggestion.action.operation === StepOperation.INSERT ?
-              `INSERT: "${repairSuggestion.action.newStep?.code || 'N/A'}"` :
-              repairSuggestion.action.operation === StepOperation.REMOVE ?
-              `REMOVE: step at index ${repairSuggestion.action.stepIndex}` :
-              repairSuggestion.action.operation;
-            this.log(`Step ${i + 1} repair attempt ${attempt} failed (${commandInfo}): ${repairErrorMessage}`);
-            if (repairError instanceof Error && repairError.stack) {
-              this.log(`  Repair stack trace: ${repairError.stack}`);
+            // Ensure page is stable after agent repairs before returning control to script
+            this.log(`Waiting for page stability after agent repair...`);
+            try {
+              await page.waitForLoadState('networkidle', { timeout: 5000 });
+              this.log(`Page stabilized (networkidle) after agent repair`);
+            } catch (stabilityError) {
+              try {
+                await page.waitForLoadState('domcontentloaded', { timeout: 3000 });
+                this.log(`Page loaded (domcontentloaded) after agent repair`);
+              } catch (fallbackError) {
+                this.log(`Page stability wait timed out (continuing anyway)`, 'warn');
+              }
             }
-            // Record this attempt in history
-            repairHistory.push({
-              attempt,
-              action: repairSuggestion.action,
-              error: repairErrorMessage,
-              pageInfo
-            });
+            repairSuccess = true;
+            i++; // Continue to NEXT step (hand control back to script)
+          } else if (repairResult.success && repairResult.commands.length === 0) {
+            // DELETE: Step goal already achieved or no longer needed (e.g., modal already dismissed)
+            this.log(`✓ Step ${i + 1} DELETED by orchestrator (goal already achieved, step obsolete)`);
+            this.log(`  Reason: Orchestrator completed with 0 commands - step no longer needed`);
+            // Track deletion in history (helps agent understand what was skipped)
+            executedStepDescriptions.push(`${step.description} [AI-deleted: step obsolete/already done]`);
+            // Remove the step from array
+            updatedSteps.splice(i, 1);
+            repairSuccess = true;
+            // Don't increment i - next step moved to current position
-            step.error = repairErrorMessage;
+          } else {
+            this.log(`✗ Step ${i + 1} could not be repaired by orchestrator (reason: ${repairResult.terminationReason})`);
           }
+        } catch (repairError: any) {
+          this.log(`✗ Orchestrator repair failed: ${repairError.message}`);
         }
+        // Legacy repair code removed - now using orchestrator
         if (!repairSuccess) {
-          this.log(`Step ${i + 1} failed after ${maxTries} repair attempts`);
+          this.log(`Step ${i + 1} could not be repaired - stopping execution`);
           break;
         }
       }
@@ -1041,310 +900,34 @@ Use these vision insights to inform your repair strategy.`;
     return code; // Return the original code without removing comments
   }
-  private async executeStepInContext(
-    code: string,
-    page: any,
-    executionContext: string,
-    contextVariables: Map<string, any>
-  ): Promise<void> {
-    // Detect if code contains navigation or load state operations that need longer timeout
-    const needsLongerTimeout = code.includes('waitForLoadState') ||
-                                code.includes('goto(') ||
-                                code.includes('waitForURL') ||
-                                code.includes('waitForNavigation');
-    // Use appropriate timeout based on operation type
-    const timeout = needsLongerTimeout ? 30000 : 5000;
-    page.setDefaultTimeout(timeout);
-    try {
-      // Execute only the current step code, but make context variables available
-      const fullCode = code;
-      // Dynamically import expect
-      const { expect } = require('@playwright/test');
-    // Create a function that has access to page, expect, and the context variables
-    const executeCode = new Function(
-      'page',
-      'expect',
-      'contextVariables',
-      `return (async () => {
-        // Make context variables available in the execution scope
-        for (const [key, value] of contextVariables) {
-          globalThis[key] = value;
-        }
-        ${fullCode}
-        // Capture any new variables that might have been created
-        const newVars = {};
-        for (const key in globalThis) {
-          if (!contextVariables.has(key) && typeof globalThis[key] !== 'function' && key !== 'page' && key !== 'expect') {
-            newVars[key] = globalThis[key];
-          }
-        }
-        return newVars;
-      })()`
-    );
-      const newVars = await executeCode(page, expect, contextVariables);
-      // Update the context variables with any new variables created
-      for (const [key, value] of Object.entries(newVars)) {
-        contextVariables.set(key, value);
-      }
-    } finally {
-      // Reset to default timeout for element operations
-      page.setDefaultTimeout(5000);
-    }
-  }
-  private async executeScriptContent(script: string, page: any): Promise<void> {
-    // Extract the test function content
-    const testMatch = script.match(/test\([^,]+,\s*async\s*\(\s*\{\s*page[^}]*\}\s*\)\s*=>\s*\{([\s\S]*)\}\s*\);/);
-    if (!testMatch) {
-      throw new Error('Could not extract test function from script');
-    }
-    const testBody = testMatch[1];
-    // Dynamically import expect
-    const { expect } = require('@playwright/test');
-    // Execute the entire test body as one async function
-    const executeTest = new Function('page', 'expect', `return (async () => { ${testBody} })()`);
-    await executeTest(page, expect);
-  }
-  private async getEnhancedPageInfo(page: any): Promise<PageInfo> {
-    try {
-      return await getEnhancedPageInfo(page);
-    } catch (error) {
-      return {
-        url: page.url(),
-        title: 'Unknown',
-        ariaSnapshot: null,
-        interactiveElements: [],
-        formattedElements: 'Unable to extract'
-      };
-    }
-  }
-  private buildFailureHistory(
-    repairHistory: Array<{ attempt: number; action: StepRepairAction; error: string; pageInfo: PageInfo }>,
-    originalStep: ScriptStep,
-    originalError: any
-  ): string {
-    if (repairHistory.length === 0) {
-      return `Original failure: ${this.safeSerializeError(originalError)}`;
-    }
-    let history = `Original failure: ${this.safeSerializeError(originalError)}\n\n`;
-    history += `Previous repair attempts:\n`;
-    repairHistory.forEach((attempt, index) => {
-      history += `Attempt ${attempt.attempt}:\n`;
-      history += `  Operation: ${attempt.action.operation}\n`;
-      if (attempt.action.newStep) {
-        history += `  Description: ${attempt.action.newStep.description}\n`;
-        history += `  Code: ${attempt.action.newStep.code}\n`;
-      }
-      history += `  Error: ${attempt.error}\n`;
-      if (index < repairHistory.length - 1) {
-        history += `\n`;
-      }
-    });
-    return history;
+  // Legacy repair helper methods (now unused but kept for compilation)
+  private buildFailureHistory(): string { return ''; }
+  private buildRecentRepairsContext(): string { return ''; }
+  private async applyRepairActionInContext(): Promise<{ success: boolean; error?: string }> {
+    return { success: false };
   }
-  private buildRecentRepairsContext(
-    recentRepairs: Array<{
-      stepNumber: number;
-      operation: string;
-      originalDescription?: string;
-      newDescription?: string;
-      originalCode?: string;
-      newCode?: string;
-    }>
-  ): string {
-    if (recentRepairs.length === 0) {
-      return 'No recent repairs to consider.';
-    }
-    let context = 'Recent successful repairs that may affect this step:\n\n';
+  private generateUpdatedScript(steps: (ScriptStep & { success?: boolean; error?: string })[], repairAdvice?: string, originalScript?: string): string {
+    // Extract test name and hashtags from original script if provided
+    let testName = 'repairedTest';
+    let hashtags: string[] = [];
-    recentRepairs.forEach((repair, index) => {
-      context += `Step ${repair.stepNumber} was successfully repaired:\n`;
-      context += `  Operation: ${repair.operation}\n`;
-      if (repair.operation === 'REMOVE') {
-        context += `  Removed: "${repair.originalDescription}"\n`;
-        context += `  Code removed:\n    ${repair.originalCode?.replace(/\n/g, '\n    ')}\n`;
-      } else if (repair.operation === 'INSERT') {
-        context += `  Inserted: "${repair.newDescription}"\n`;
-        context += `  Code inserted:\n    ${repair.newCode?.replace(/\n/g, '\n    ')}\n`;
-      } else {
-        context += `  Original: "${repair.originalDescription}"\n`;
-        context += `  Repaired: "${repair.newDescription}"\n`;
-        context += `  Code changed from:\n    ${repair.originalCode?.replace(/\n/g, '\n    ')}\n`;
-        context += `  To:\n    ${repair.newCode?.replace(/\n/g, '\n    ')}\n`;
+    if (originalScript) {
+      const testNameMatch = originalScript.match(/test\(['"]([^'"]+)['"]/);
+      if (testNameMatch) {
+        testName = testNameMatch[1];
       }
-      if (index < recentRepairs.length - 1) {
-        context += `\n`;
+      // Extract hashtags from TestChimp comment
+      const hashtagMatch = originalScript.match(/#\w+(?:\s+#\w+)*/);
+      if (hashtagMatch) {
+        hashtags = hashtagMatch[0].split(/\s+/).filter(tag => tag.startsWith('#'));
       }
-    });
-    context += '\nConsider how these changes might affect the current step and adjust accordingly.';
-    return context;
-  }
-  private async applyRepairActionInContext(
-    action: StepRepairAction,
-    steps: (ScriptStep & { success?: boolean; error?: string })[],
-    currentIndex: number,
-    page: any,
-    executionContext: string,
-    contextVariables: Map<string, any>
-  ): Promise<{ success: boolean; error?: string; updatedContext?: string }> {
-    try {
-      switch (action.operation) {
-        case StepOperation.MODIFY:
-          if (action.newStep && action.stepIndex !== undefined) {
-            // Modify existing step
-            steps[action.stepIndex] = {
-              ...action.newStep,
-              success: false,
-              error: undefined
-            };
-            // Test the modified step with current page state and variables
-            await this.executeStepCode(action.newStep.code, page);
-            return { success: true, updatedContext: executionContext + action.newStep.code };
-          }
-          break;
-        case StepOperation.INSERT:
-          if (action.newStep && action.insertAfterIndex !== undefined) {
-            // Insert new step after specified index
-            const insertIndex = action.insertAfterIndex + 1;
-            const newStep = {
-              ...action.newStep,
-              success: false,
-              error: undefined
-            };
-            this.log(`INSERT: Inserting step at index ${insertIndex} with description "${newStep.description}"`);
-            this.log(`INSERT: Steps before insertion: ${steps.map((s, i) => `${i}: "${s.description}" (success: ${s.success})`).join(', ')}`);
-            // Preserve success status of existing steps before insertion
-            const successStatusMap = new Map(steps.map((step, index) => [index, { success: step.success, error: step.error }]));
-            steps.splice(insertIndex, 0, newStep);
-            // Restore success status for steps that were shifted by the insertion
-            // Steps at insertIndex and before keep their original status
-            // Steps after insertIndex need to be shifted to their new positions
-            for (let i = insertIndex + 1; i < steps.length; i++) {
-              const originalIndex = i - 1; // The step that was originally at this position
-              if (successStatusMap.has(originalIndex)) {
-                const status = successStatusMap.get(originalIndex)!;
-                steps[i].success = status.success;
-                steps[i].error = status.error;
-              }
-            }
-            // CRITICAL FIX: Ensure the inserted step doesn't overwrite existing step data
-            // The new step should only have its own description, not inherit from existing steps
-            this.log(`INSERT: Final step array after restoration: ${steps.map((s, i) => `${i}: "${s.description}" (success: ${s.success})`).join(', ')}`);
-            this.log(`INSERT: Steps after insertion: ${steps.map((s, i) => `${i}: "${s.description}" (success: ${s.success})`).join(', ')}`);
-            // Test the new step with current page state
-            await this.executeStepCode(action.newStep.code, page);
-            return { success: true, updatedContext: executionContext + action.newStep.code };
-          }
-          break;
-        case StepOperation.REMOVE:
-          if (action.stepIndex !== undefined) {
-            // Remove step
-            steps.splice(action.stepIndex, 1);
-            return { success: true, updatedContext: executionContext };
-          }
-          break;
-      }
-      return { success: false, error: 'Invalid repair action' };
-    } catch (error) {
-      return {
-        success: false,
-        error: error instanceof Error ? error.message : 'Unknown error during repair action'
-      };
     }
-  }
-  private async applyRepairAction(
-    action: StepRepairAction,
-    steps: (ScriptStep & { success?: boolean; error?: string })[],
-    currentIndex: number,
-    page: any
-  ): Promise<{ success: boolean; error?: string }> {
-    try {
-      switch (action.operation) {
-        case StepOperation.MODIFY:
-          if (action.newStep && action.stepIndex !== undefined) {
-            // Modify existing step
-            steps[action.stepIndex] = {
-              ...action.newStep,
-              success: false,
-              error: undefined
-            };
-            // Test the modified step
-            await this.executeStepCode(action.newStep.code, page);
-            return { success: true };
-          }
-          break;
-        case StepOperation.INSERT:
-          if (action.newStep && action.insertAfterIndex !== undefined) {
-            // Insert new step after specified index
-            const insertIndex = action.insertAfterIndex + 1;
-            const newStep = {
-              ...action.newStep,
-              success: false,
-              error: undefined
-            };
-            steps.splice(insertIndex, 0, newStep);
-            // Test the inserted step
-            await this.executeStepCode(action.newStep.code, page);
-            return { success: true };
-          }
-          break;
-        case StepOperation.REMOVE:
-          if (action.stepIndex !== undefined) {
-            // Remove the step
-            steps.splice(action.stepIndex, 1);
-            return { success: true };
-          }
-          break;
-      }
-      return { success: false, error: 'Invalid repair action' };
-    } catch (error) {
-      return {
-        success: false,
-        error: error instanceof Error ? error.message : 'Repair action execution failed'
-      };
-    }
-  }
-  private generateUpdatedScript(steps: (ScriptStep & { success?: boolean; error?: string })[], repairAdvice?: string): string {
     const scriptLines = [
       "import { test, expect } from '@playwright/test';",
-      `test('repairedTest', async ({ page, browser, context }) => {`
+      `test('${testName}', async ({ page, browser, context }) => {`
     ];
     steps.forEach((step, index) => {
@@ -1361,8 +944,8 @@ Use these vision insights to inform your repair strategy.`;
     scriptLines.push('});');
     const script = scriptLines.join('\n');
-    // Add TestChimp comment to the repaired script with repair advice
-    return addTestChimpComment(script, repairAdvice);
+    // Add TestChimp comment with hashtags and repair advice
+    return addTestChimpComment(script, repairAdvice, hashtags);
   }