npm - testchimp-runner-core - Versions diffs - 0.0.32 → 0.0.34 - Mend

testchimp-runner-core 0.0.32 → 0.0.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/dist/llm-facade.d.ts.map +1 -1
package/dist/llm-facade.js +7 -7
package/dist/llm-facade.js.map +1 -1
package/dist/llm-provider.d.ts +9 -0
package/dist/llm-provider.d.ts.map +1 -1
package/dist/model-constants.d.ts +16 -5
package/dist/model-constants.d.ts.map +1 -1
package/dist/model-constants.js +17 -6
package/dist/model-constants.js.map +1 -1
package/dist/orchestrator/index.d.ts +1 -1
package/dist/orchestrator/index.d.ts.map +1 -1
package/dist/orchestrator/index.js +3 -2
package/dist/orchestrator/index.js.map +1 -1
package/dist/orchestrator/orchestrator-agent.d.ts +0 -8
package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
package/dist/orchestrator/orchestrator-agent.js +206 -405
package/dist/orchestrator/orchestrator-agent.js.map +1 -1
package/dist/orchestrator/orchestrator-prompts.d.ts +20 -0
package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -0
package/dist/orchestrator/orchestrator-prompts.js +455 -0
package/dist/orchestrator/orchestrator-prompts.js.map +1 -0
package/dist/orchestrator/tools/index.d.ts +2 -1
package/dist/orchestrator/tools/index.d.ts.map +1 -1
package/dist/orchestrator/tools/index.js +4 -2
package/dist/orchestrator/tools/index.js.map +1 -1
package/dist/orchestrator/tools/verify-action-result.d.ts +17 -0
package/dist/orchestrator/tools/verify-action-result.d.ts.map +1 -0
package/dist/orchestrator/tools/verify-action-result.js +140 -0
package/dist/orchestrator/tools/verify-action-result.js.map +1 -0
package/dist/orchestrator/types.d.ts +26 -0
package/dist/orchestrator/types.d.ts.map +1 -1
package/dist/orchestrator/types.js.map +1 -1
package/dist/prompts.d.ts.map +1 -1
package/dist/prompts.js +87 -37
package/dist/prompts.js.map +1 -1
package/dist/scenario-worker-class.d.ts.map +1 -1
package/dist/scenario-worker-class.js +4 -1
package/dist/scenario-worker-class.js.map +1 -1
package/dist/utils/coordinate-converter.d.ts +32 -0
package/dist/utils/coordinate-converter.d.ts.map +1 -0
package/dist/utils/coordinate-converter.js +130 -0
package/dist/utils/coordinate-converter.js.map +1 -0
package/package.json +1 -1
package/plandocs/BEFORE_AFTER_VERIFICATION.md +148 -0
package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +144 -0
package/plandocs/IMPLEMENTATION_STATUS.md +108 -0
package/plandocs/PHASE_1_COMPLETE.md +165 -0
package/plandocs/PHASE_1_SUMMARY.md +184 -0
package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +120 -0
package/plandocs/PROMPT_SANITY_CHECK.md +120 -0
package/plandocs/SESSION_SUMMARY_v0.0.33.md +151 -0
package/plandocs/TROUBLESHOOTING_SESSION.md +72 -0
package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +396 -0
package/plandocs/WHATS_NEW_v0.0.33.md +183 -0
package/src/llm-facade.ts +8 -8
package/src/llm-provider.ts +11 -1
package/src/model-constants.ts +17 -5
package/src/orchestrator/index.ts +3 -2
package/src/orchestrator/orchestrator-agent.ts +249 -424
package/src/orchestrator/orchestrator-agent.ts.backup +1386 -0
package/src/orchestrator/orchestrator-prompts.ts +474 -0
package/src/orchestrator/tools/index.ts +2 -1
package/src/orchestrator/tools/verify-action-result.ts +159 -0
package/src/orchestrator/types.ts +48 -0
package/src/prompts.ts +87 -37
package/src/scenario-worker-class.ts +7 -2
package/src/utils/coordinate-converter.ts +162 -0
package/testchimp-runner-core-0.0.33.tgz +0 -0
/package/{CREDIT_CALLBACK_ARCHITECTURE.md → plandocs/CREDIT_CALLBACK_ARCHITECTURE.md} +0 -0
/package/{INTEGRATION_COMPLETE.md → plandocs/INTEGRATION_COMPLETE.md} +0 -0
/package/{VISION_DIAGNOSTICS_IMPROVEMENTS.md → plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md} +0 -0

package/dist/utils/coordinate-converter.d.ts ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * Coordinate Converter Utility
+ * Converts percentage-based coordinates to pixel coordinates and generates Playwright commands
+ */
+import { CoordinateAction } from '../orchestrator/types';
+export declare class CoordinateConverter {
+    /**
+     * Convert percentage coordinates to actual pixel coordinates
+     */
+    static percentToPixels(xPercent: number, yPercent: number, viewportWidth: number, viewportHeight: number): {
+        x: number;
+        y: number;
+    };
+    /**
+     * Get viewport dimensions from page
+     */
+    static getViewportSize(page: any): Promise<{
+        width: number;
+        height: number;
+    }>;
+    /**
+     * Convert coordinate action with percentages to Playwright commands
+     * Returns array of command strings
+     */
+    static generateCommands(action: CoordinateAction, page: any): Promise<string[]>;
+    /**
+     * Execute coordinate action directly on page
+     * Used during agent execution (converts and runs immediately)
+     */
+    static executeAction(action: CoordinateAction, page: any): Promise<void>;
+}
+//# sourceMappingURL=coordinate-converter.d.ts.map

package/dist/utils/coordinate-converter.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"coordinate-converter.d.ts","sourceRoot":"","sources":["../../src/utils/coordinate-converter.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAEzD,qBAAa,mBAAmB;IAE9B;;OAEG;IACH,MAAM,CAAC,eAAe,CACpB,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,MAAM,GACrB;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE;IAO3B;;OAEG;WACU,eAAe,CAAC,IAAI,EAAE,GAAG,GAAG,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAUnF;;;OAGG;WACU,gBAAgB,CAC3B,MAAM,EAAE,gBAAgB,EACxB,IAAI,EAAE,GAAG,GACR,OAAO,CAAC,MAAM,EAAE,CAAC;IA0DpB;;;OAGG;WACU,aAAa,CACxB,MAAM,EAAE,gBAAgB,EACxB,IAAI,EAAE,GAAG,GACR,OAAO,CAAC,IAAI,CAAC;CAmDjB"}

package/dist/utils/coordinate-converter.js ADDED Viewed

@@ -0,0 +1,130 @@
+"use strict";
+/**
+ * Coordinate Converter Utility
+ * Converts percentage-based coordinates to pixel coordinates and generates Playwright commands
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.CoordinateConverter = void 0;
+class CoordinateConverter {
+    /**
+     * Convert percentage coordinates to actual pixel coordinates
+     */
+    static percentToPixels(xPercent, yPercent, viewportWidth, viewportHeight) {
+        return {
+            x: Math.round((xPercent / 100) * viewportWidth),
+            y: Math.round((yPercent / 100) * viewportHeight)
+        };
+    }
+    /**
+     * Get viewport dimensions from page
+     */
+    static async getViewportSize(page) {
+        return await page.evaluate(() => {
+            const win = globalThis.window;
+            return {
+                width: win.innerWidth,
+                height: win.innerHeight
+            };
+        });
+    }
+    /**
+     * Convert coordinate action with percentages to Playwright commands
+     * Returns array of command strings
+     */
+    static async generateCommands(action, page) {
+        const viewport = await this.getViewportSize(page);
+        const { x, y } = this.percentToPixels(action.xPercent, action.yPercent, viewport.width, viewport.height);
+        const commands = [];
+        switch (action.action) {
+            case 'click':
+                commands.push(`await page.mouse.click(${x}, ${y});`);
+                break;
+            case 'doubleClick':
+                commands.push(`await page.mouse.dblclick(${x}, ${y});`);
+                break;
+            case 'rightClick':
+                commands.push(`await page.mouse.click(${x}, ${y}, { button: 'right' });`);
+                break;
+            case 'hover':
+                commands.push(`await page.mouse.move(${x}, ${y});`);
+                break;
+            case 'drag':
+                if (action.toXPercent === undefined || action.toYPercent === undefined) {
+                    throw new Error('Drag action requires toXPercent and toYPercent');
+                }
+                const to = this.percentToPixels(action.toXPercent, action.toYPercent, viewport.width, viewport.height);
+                commands.push(`await page.mouse.move(${x}, ${y});`);
+                commands.push(`await page.mouse.down();`);
+                commands.push(`await page.mouse.move(${to.x}, ${to.y});`);
+                commands.push(`await page.mouse.up();`);
+                break;
+            case 'fill':
+                if (!action.value) {
+                    throw new Error('Fill action requires value');
+                }
+                // Click to focus, wait briefly, then type
+                commands.push(`await page.mouse.click(${x}, ${y});`);
+                commands.push(`await page.waitForTimeout(100);`);
+                commands.push(`await page.keyboard.type(${JSON.stringify(action.value)});`);
+                break;
+            case 'scroll':
+                const scrollAmount = action.scrollAmount || 100;
+                // Move to position, then scroll
+                commands.push(`await page.mouse.move(${x}, ${y});`);
+                commands.push(`await page.mouse.wheel(0, ${scrollAmount});`);
+                break;
+            default:
+                throw new Error(`Unknown coordinate action: ${action.action}`);
+        }
+        return commands;
+    }
+    /**
+     * Execute coordinate action directly on page
+     * Used during agent execution (converts and runs immediately)
+     */
+    static async executeAction(action, page) {
+        const viewport = await this.getViewportSize(page);
+        const { x, y } = this.percentToPixels(action.xPercent, action.yPercent, viewport.width, viewport.height);
+        switch (action.action) {
+            case 'click':
+                await page.mouse.click(x, y);
+                break;
+            case 'doubleClick':
+                await page.mouse.dblclick(x, y);
+                break;
+            case 'rightClick':
+                await page.mouse.click(x, y, { button: 'right' });
+                break;
+            case 'hover':
+                await page.mouse.move(x, y);
+                break;
+            case 'drag':
+                if (action.toXPercent === undefined || action.toYPercent === undefined) {
+                    throw new Error('Drag requires toXPercent and toYPercent');
+                }
+                const to = this.percentToPixels(action.toXPercent, action.toYPercent, viewport.width, viewport.height);
+                await page.mouse.move(x, y);
+                await page.mouse.down();
+                await page.mouse.move(to.x, to.y);
+                await page.mouse.up();
+                break;
+            case 'fill':
+                if (!action.value) {
+                    throw new Error('Fill requires value');
+                }
+                await page.mouse.click(x, y);
+                await page.waitForTimeout(100);
+                await page.keyboard.type(action.value);
+                break;
+            case 'scroll':
+                const scrollAmount = action.scrollAmount || 100;
+                await page.mouse.move(x, y);
+                await page.mouse.wheel(0, scrollAmount);
+                break;
+            default:
+                throw new Error(`Unknown coordinate action: ${action.action}`);
+        }
+    }
+}
+exports.CoordinateConverter = CoordinateConverter;
+//# sourceMappingURL=coordinate-converter.js.map

package/dist/utils/coordinate-converter.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"coordinate-converter.js","sourceRoot":"","sources":["../../src/utils/coordinate-converter.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAIH,MAAa,mBAAmB;IAE9B;;OAEG;IACH,MAAM,CAAC,eAAe,CACpB,QAAgB,EAChB,QAAgB,EAChB,aAAqB,EACrB,cAAsB;QAEtB,OAAO;YACL,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,aAAa,CAAC;YAC/C,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,cAAc,CAAC;SACjD,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,IAAS;QACpC,OAAO,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAsC,EAAE;YACjE,MAAM,GAAG,GAAI,UAAkB,CAAC,MAAM,CAAC;YACvC,OAAO;gBACL,KAAK,EAAE,GAAG,CAAC,UAAoB;gBAC/B,MAAM,EAAE,GAAG,CAAC,WAAqB;aAClC,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,KAAK,CAAC,gBAAgB,CAC3B,MAAwB,EACxB,IAAS;QAET,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QAClD,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEzG,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;YACtB,KAAK,OAAO;gBACV,QAAQ,CAAC,IAAI,CAAC,0BAA0B,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACrD,MAAM;YAER,KAAK,aAAa;gBAChB,QAAQ,CAAC,IAAI,CAAC,6BAA6B,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACxD,MAAM;YAER,KAAK,YAAY;gBACf,QAAQ,CAAC,IAAI,CAAC,0BAA0B,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;gBAC1E,MAAM;YAER,KAAK,OAAO;gBACV,QAAQ,CAAC,IAAI,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,MAAM;YAER,KAAK,MAAM;gBACT,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;oBACvE,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;gBACpE,CAAC;gBACD,MAAM,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACvG,QAAQ,CAAC,IAAI,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,QAAQ,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;gBAC1C,QAAQ,CAAC,IAAI,CAAC,yBAAyB,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;gBAC1D,QAAQ,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;gBACxC,MAAM;YAER,KAAK,MAAM;gBACT,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;oBAClB,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;gBAChD,CAAC;gBACD,0CAA0C;gBAC1C,QAAQ,CAAC,IAAI,CAAC,0BAA0B,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACrD,QAAQ,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;gBACjD,QAAQ,CAAC,IAAI,CAAC,4BAA4B,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC5E,MAAM;YAER,KAAK,QAAQ;gBACX,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,GAAG,CAAC;gBAChD,gCAAgC;gBAChC,QAAQ,CAAC,IAAI,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,QAAQ,CAAC,IAAI,CAAC,6BAA6B,YAAY,IAAI,CAAC,CAAC;gBAC7D,MAAM;YAER;gBACE,MAAM,IAAI,KAAK,CAAC,8BAA8B,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QACnE,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,KAAK,CAAC,aAAa,CACxB,MAAwB,EACxB,IAAS;QAET,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QAClD,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEzG,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;YACtB,KAAK,OAAO;gBACV,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC7B,MAAM;YAER,KAAK,aAAa;gBAChB,MAAM,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAChC,MAAM;YAER,KAAK,YAAY;gBACf,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;gBAClD,MAAM;YAER,KAAK,OAAO;gBACV,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC5B,MAAM;YAER,KAAK,MAAM;gBACT,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;oBACvE,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;gBAC7D,CAAC;gBACD,MAAM,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACvG,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC5B,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;gBACxB,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;gBAClC,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC;gBACtB,MAAM;YAER,KAAK,MAAM;gBACT,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;oBAClB,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;gBACzC,CAAC;gBACD,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC7B,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;gBAC/B,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACvC,MAAM;YAER,KAAK,QAAQ;gBACX,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,GAAG,CAAC;gBAChD,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC5B,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC;gBACxC,MAAM;YAER;gBACE,MAAM,IAAI,KAAK,CAAC,8BAA8B,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;CACF;AAzJD,kDAyJC"}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "testchimp-runner-core",
-  "version": "0.0.32",
+  "version": "0.0.34",
   "description": "Core TestChimp functionality for test generation and AI repair",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",

package/plandocs/BEFORE_AFTER_VERIFICATION.md ADDED Viewed

@@ -0,0 +1,148 @@
+# Before/After Screenshot Verification
+## Feature: Visual Goal Verification for Coordinate Actions
+### Problem Solved:
+When using coordinate-based actions (clicking at x,y%), the agent has no way to know if the click achieved the goal:
+- No element reference to check state
+- No selector feedback
+- Can't verify if expected page loaded or modal opened
+This led to:
+- False positives (click succeeded but goal not achieved)
+- Infinite loops (agent keeps clicking, unsure if it worked)
+### Solution:
+Automatic before/after screenshot comparison after coordinate clicks.
+## How It Works:
+### 1. **Automatic Trigger** (No Agent Action Required)
+When agent uses coordinate action:
+```typescript
+Iteration 4: 🎯 Coordinate mode activated
+  Step 1: Capture BEFORE screenshot
+  Step 2: Execute coordinate click (x%, y%)
+  Step 3: Wait 1000ms for UI to settle
+  Step 4: Capture AFTER screenshot
+  Step 5: Call LLM with both images (labeled "BEFORE", "AFTER")
+  Step 6: LLM responds: { goalAchieved: true/false, reasoning: "..." }
+  Step 7a: If TRUE → Mark complete, exit step ✅
+  Step 7b: If FALSE → Continue to next iteration, try different coordinates
+```
+### 2. **LLM Prompt for Verification**
+```
+Goal: [Current step goal]
+Compare the BEFORE and AFTER screenshots.
+Did the action achieve the goal? Respond with JSON:
+{
+  "goalAchieved": boolean,
+  "reasoning": "What changed (or didn't change)",
+  "visibleChanges": ["List of UI changes observed"]
+}
+Focus on:
+- Did expected elements appear/disappear?
+- Did page navigate or content change?
+- Visual indicators of success (new panels, forms, highlights)?
+Be strict: Only return true if you clearly see the expected change.
+```
+### 3. **Multi-Image LLM Interface**
+```typescript
+// NEW: LabeledImage interface
+export interface LabeledImage {
+  label: string;      // "Before", "After", etc.
+  dataUrl: string;    // Base64 data URL
+}
+// UPDATED: LLMRequest
+export interface LLMRequest {
+  imageUrl?: string;         // Backward compatible (single image)
+  images?: LabeledImage[];   // NEW - multi-image support
+}
+```
+### 4. **Provider Implementation** (scriptservice-llm-provider.ts)
+```typescript
+if (request.images && request.images.length > 0) {
+  for (const img of request.images) {
+    contentParts.push({ type: 'text', text: `\n[${img.label}]:` });
+    contentParts.push({ type: 'image_url', image_url: { url: img.dataUrl } });
+  }
+  // Sends: [BEFORE]: <image1>, [AFTER]: <image2>
+}
+```
+## When Verification Happens:
+✅ **Always**: After first coordinate action attempt
+❌ **Never**: After selector-based actions (have element state to check)
+⚠️ **Conditional**: Can add for other scenarios where goal verification is unclear
+## Cost Considerations:
+**Per verification call:**
+- 2 viewport screenshots (~50-100KB each)
+- Vision model (gpt-5-mini): ~$0.001 per call
+- Used only when coordinate mode activates (after 3 selector failures)
+**Typical scenario:**
+- Steps 1-10: Regular selectors → No verification cost
+- Step 5 gets stuck → Coordinate mode → 1 verification call → $0.001
+- Overall impact: Minimal, used sparingly
+## Example Flow:
+**Step 5: "Select Employee Information"**
+```
+Iteration 1: getByText('Employee Information') → Strict mode ❌
+Iteration 2: locator('#collapse-1').getByText('Employee Information') → Click succeeds ✅
+           BUT: Didn't navigate to Employee Information page (false positive)
+Iteration 3: Selector fails again
+Iteration 4: 🎯 Coordinate mode
+  → BEFORE: Homepage with sidebar
+  → Click at (19.3%, 22.9%)
+  → Wait 1s
+  → AFTER: Check screenshot
+  → LLM: "goalAchieved": true, "reasoning": "Employee Information page loaded with form"
+  → ✅ Mark complete, exit
+```
+## Backward Compatibility:
+✅ **Single image still works:**
+```typescript
+const request = {
+  imageUrl: 'data:image/png;base64,...'  // Old way
+};
+```
+✅ **Multi-image NEW:**
+```typescript
+const request = {
+  images: [
+    { label: 'BEFORE', dataUrl: '...' },
+    { label: 'AFTER', dataUrl: '...' }
+  ]
+};
+```
+## Files Modified:
+1. `runner-core/src/llm-provider.ts` - Added LabeledImage interface and images field
+2. `scriptservice/providers/scriptservice-llm-provider.ts` - Handle multiple images in OpenAI API
+3. `runner-core/src/orchestrator/orchestrator-agent.ts` - Added verifyGoalWithScreenshotComparison method
+4. Automatic trigger after coordinate actions
+## Next Steps:
+- ✅ Infrastructure ready
+- ⏳ Need to test with real scenario
+- 🔮 Future: Could expose as agent-callable tool if needed

package/plandocs/COORDINATE_MODE_DIAGNOSIS.md ADDED Viewed

@@ -0,0 +1,144 @@
+# Coordinate Mode Diagnosis - Live Test Results
+## Test Scenario: PeopleHR Employee Information Flow
+### ✅ What Worked:
+1. **Coordinate fallback DID activate** (after fix from >= 3 to >= 5)
+2. **Agent successfully used coordinates** at (87.5%, 23.438%)
+3. **Physical clicks succeeded** - page.mouse.click(1120, 169)
+4. **Agent learned** to stick with coordinates after selectors failed
+### ❌ What Didn't Work:
+**Agent hit max iterations (8) without marking "complete"**
+## Detailed Step 6 Flow:
+```
+Iteration 1: Selector attempt → Timeout ❌
+Iteration 2: Selector attempt → Timeout ❌
+Iteration 3: Selector attempt → Timeout ❌
+Iteration 4: 🎯 COORDINATE MODE → Click (87.5%, 23.438%) → ✅ Success
+Iteration 5: Repeat coordinate → ✅ Success
+Iteration 6: Repeat coordinate → ✅ Success (?)
+Iteration 7: Repeat coordinate → ✅ Success
+Iteration 8: Repeat coordinate → ✅ Success
+Result: ⚠️ Max iterations → system_limit
+```
+## Root Cause Analysis:
+### Problem: **No Goal Verification After Coordinate Success**
+**With selectors:**
+```typescript
+await page.getByRole('button').click();
+// Can verify: await expect(button).toHaveState('pressed')
+// Can check: New elements appeared, URL changed, etc.
+```
+**With coordinates:**
+```typescript
+await page.mouse.click(1120, 169);
+// ❓ Did it work? No element reference!
+// ❓ How to verify? Can't check button state
+// ❓ What changed? Need to inspect DOM/screenshot
+```
+### Why Agent Kept Retrying:
+**Agent's reasoning (iterations 5-8):**
+- "Coordinate click succeeded (executed without error)"
+- "But I don't know if goal was achieved"
+- "Step says 'Click on New' - did the New form open?"
+- "I should try again to be sure..."
+- → **Loops until max iterations**
+## Solutions to Consider:
+### Option 1: **Trust Coordinate Success** (Simple)
+After coordinate click succeeds:
+- Wait 500ms for UI response
+- Mark status="complete" automatically
+- Assume click worked (trust the coordinates)
+```typescript
+if (coordinateAction && coordResult.allSucceeded) {
+  await page.waitForTimeout(500); // Let UI respond
+  return { status: 'complete', reasoning: 'Coordinate click succeeded' };
+}
+```
+**Pros**: Simple, fast
+**Cons**: No verification of actual goal achievement
+### Option 2: **Visual Verification** (Better)
+After coordinate click:
+- Wait 500ms
+- Take screenshot
+- Compare before/after
+- If changed → complete, else → retry with different coords
+```typescript
+const beforeScreenshot = await page.screenshot();
+await page.mouse.click(x, y);
+await page.waitForTimeout(500);
+const afterScreenshot = await page.screenshot();
+if (screenshotsAreDifferent(before, after)) {
+  return { status: 'complete' };
+}
+```
+**Pros**: Validates something changed
+**Cons**: Slower, more LLM calls
+### Option 3: **DOM Change Detection** (Balanced)
+After coordinate click:
+- Capture DOM snapshot before
+- Click coordinates
+- Capture DOM snapshot after
+- If new elements/navigation → complete
+```typescript
+const beforeUrl = page.url();
+const beforeElements = await getEnhancedPageInfo(page);
+await page.mouse.click(x, y);
+await page.waitForTimeout(500);
+const afterUrl = page.url();
+const afterElements = await getEnhancedPageInfo(page);
+if (afterUrl !== beforeUrl || afterElements.count !== beforeElements.count) {
+  return { status: 'complete', reasoning: 'Page state changed after coordinate click' };
+}
+```
+**Pros**: Fast, objective verification
+**Cons**: Might miss subtle changes (modal opens without URL/element count change)
+### Option 4: **Prompt Guidance** (Immediate)
+Update prompt to tell agent:
+"After coordinate click succeeds, mark status='complete' unless you can clearly verify it failed"
+**Pros**: No code changes
+**Cons**: Relies on LLM judgment
+## Recommendation:
+**Hybrid approach:**
+1. **Immediate** (Prompt): Tell agent to trust coordinate success
+2. **Phase 2** (Code): Add DOM change detection for validation
+## Current Status:
+- ✅ Coordinate fallback works technically
+- ✅ Physical clicks succeed
+- ❌ Agent doesn't know when to stop
+- 🔧 Need completion detection logic
+## Test Results Summary:
+**Steps 1-5**: ✅ All completed successfully
+**Step 6**: ⚠️ Coordinates worked but hit max iterations (no completion detection)
+**Overall**: Coordinate mode is functional but needs completion logic

package/plandocs/IMPLEMENTATION_STATUS.md ADDED Viewed

@@ -0,0 +1,108 @@
+# Runner-Core Visual Agent Implementation Status
+## Phase 1: ✅ COMPLETE (v0.0.33)
+### Implemented Features:
+1. **Note to Future Self** - Tactical iteration memory
+2. **Percentage-Based Coordinates** - Last-resort fallback with 3-decimal precision
+3. **Two-Tier Auto-Escalation** - Code-controlled mode switching
+### Current Behavior (Phase 1):
+```
+Iteration 1-3: Normal Playwright selectors + note-to-self (3 attempts)
+    ↓ (after 3 failures)
+Iteration 4-5: Percentage coordinates (2 attempts max)
+    ↓ (if both coordinate attempts fail)
+Give up - mark as stuck
+Total: Maximum 5 iterations per step
+```
+---
+## Phase 2: 📋 PLANNED (Not Started)
+### Will Add:
+1. **ElementDetector** - Detect interactive elements with z-index awareness
+2. **VisualMarkerInjector** - Number elements [1], [2], [3] on screenshot
+3. **SelectorResolver** - Translate index → native Playwright selector
+4. **IndexCommandTranslator** - Convert CLICK[3] → native Playwright command
+### Future Behavior (Phase 2):
+```
+Iteration 1: Playwright selector (1 attempt) → 70% success
+    ↓ (on first failure)
+Iteration 2-3: Index commands CLICK[3] (2 attempts) → 25% success
+    ↓ (after 3 total failures)
+Iteration 4-5: Percentage coordinates (2 attempts max) → 5% success
+    ↓ (if all fail)
+Give up - mark as stuck
+Total: Maximum 5 iterations per step (down from 8)
+Average: ~1.5 iterations per step (fast!)
+```
+### Key Design Principle for Phase 2:
+**During Execution:**
+- Agent clicks using `data-testchimp-el="[3]"` (reliable, we inject it)
+**In Generated Script:**
+- Translator outputs NATIVE selector: `getByRole('button', {name: 'Menu'})`
+- Script works standalone without data-testchimp-el
+**Why Two-Stage:**
+1. Agent needs reliability during exploration → use data attribute
+2. Generated script must be portable → use native selectors
+3. Best of both worlds: reliable execution + maintainable output
+---
+## Optimizations vs Original Plan
+### Original Plan:
+- Tier 1: iterations 1-2
+- Tier 2: iterations 3-4
+- Tier 3: iterations 5+
+- Average: ~4 iterations per step
+### Optimized Plan (Current):
+- Tier 1: iteration 1 ONLY (fast path)
+- Tier 2: iterations 2-3 (reliable fallback)
+- Tier 3: iterations 4+ (absolute last resort)
+- **Target: ~1.5 average iterations per step**
+**Rationale:** Don't waste time! Simple tasks finish in 1 iteration, complex tasks escalate quickly to more reliable methods.
+---
+## Testing Checklist
+### Phase 1 (Ready Now):
+- [ ] Run PeopleHR scenario - verify note-to-self helps
+- [ ] Test coordinate fallback on deliberately difficult case
+- [ ] Measure iteration reduction (expect 20-30%)
+- [ ] Verify timeout fixes for waitForLoadState
+### Phase 2 (When Implemented):
+- [ ] Test ElementDetector on modals/overlays
+- [ ] Verify z-index occlusion detection
+- [ ] Validate native selector generation (no data-testchimp-el in output)
+- [ ] Run generated scripts standalone - must work!
+- [ ] Measure tier distribution: 70/25/5
+---
+## Current Version
+**Runner-Core:** v0.0.33
+**Status:** Built and ready to test
+**Phase 1:** ✅ Complete
+**Phase 2:** 📋 Planned but not started
+**Next Step:** Test Phase 1 with PeopleHR scenario to validate improvements before implementing Phase 2.