testchimp-runner-core 0.0.32 → 0.0.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/llm-facade.d.ts.map +1 -1
  2. package/dist/llm-facade.js +7 -7
  3. package/dist/llm-facade.js.map +1 -1
  4. package/dist/llm-provider.d.ts +9 -0
  5. package/dist/llm-provider.d.ts.map +1 -1
  6. package/dist/model-constants.d.ts +16 -5
  7. package/dist/model-constants.d.ts.map +1 -1
  8. package/dist/model-constants.js +17 -6
  9. package/dist/model-constants.js.map +1 -1
  10. package/dist/orchestrator/index.d.ts +1 -1
  11. package/dist/orchestrator/index.d.ts.map +1 -1
  12. package/dist/orchestrator/index.js +3 -2
  13. package/dist/orchestrator/index.js.map +1 -1
  14. package/dist/orchestrator/orchestrator-agent.d.ts +0 -8
  15. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  16. package/dist/orchestrator/orchestrator-agent.js +206 -405
  17. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  18. package/dist/orchestrator/orchestrator-prompts.d.ts +20 -0
  19. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -0
  20. package/dist/orchestrator/orchestrator-prompts.js +455 -0
  21. package/dist/orchestrator/orchestrator-prompts.js.map +1 -0
  22. package/dist/orchestrator/tools/index.d.ts +2 -1
  23. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  24. package/dist/orchestrator/tools/index.js +4 -2
  25. package/dist/orchestrator/tools/index.js.map +1 -1
  26. package/dist/orchestrator/tools/verify-action-result.d.ts +17 -0
  27. package/dist/orchestrator/tools/verify-action-result.d.ts.map +1 -0
  28. package/dist/orchestrator/tools/verify-action-result.js +140 -0
  29. package/dist/orchestrator/tools/verify-action-result.js.map +1 -0
  30. package/dist/orchestrator/types.d.ts +26 -0
  31. package/dist/orchestrator/types.d.ts.map +1 -1
  32. package/dist/orchestrator/types.js.map +1 -1
  33. package/dist/prompts.d.ts.map +1 -1
  34. package/dist/prompts.js +87 -37
  35. package/dist/prompts.js.map +1 -1
  36. package/dist/scenario-worker-class.d.ts.map +1 -1
  37. package/dist/scenario-worker-class.js +4 -1
  38. package/dist/scenario-worker-class.js.map +1 -1
  39. package/dist/utils/coordinate-converter.d.ts +32 -0
  40. package/dist/utils/coordinate-converter.d.ts.map +1 -0
  41. package/dist/utils/coordinate-converter.js +130 -0
  42. package/dist/utils/coordinate-converter.js.map +1 -0
  43. package/package.json +1 -1
  44. package/plandocs/BEFORE_AFTER_VERIFICATION.md +148 -0
  45. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +144 -0
  46. package/plandocs/IMPLEMENTATION_STATUS.md +108 -0
  47. package/plandocs/PHASE_1_COMPLETE.md +165 -0
  48. package/plandocs/PHASE_1_SUMMARY.md +184 -0
  49. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +120 -0
  50. package/plandocs/PROMPT_SANITY_CHECK.md +120 -0
  51. package/plandocs/SESSION_SUMMARY_v0.0.33.md +151 -0
  52. package/plandocs/TROUBLESHOOTING_SESSION.md +72 -0
  53. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +396 -0
  54. package/plandocs/WHATS_NEW_v0.0.33.md +183 -0
  55. package/src/llm-facade.ts +8 -8
  56. package/src/llm-provider.ts +11 -1
  57. package/src/model-constants.ts +17 -5
  58. package/src/orchestrator/index.ts +3 -2
  59. package/src/orchestrator/orchestrator-agent.ts +249 -424
  60. package/src/orchestrator/orchestrator-agent.ts.backup +1386 -0
  61. package/src/orchestrator/orchestrator-prompts.ts +474 -0
  62. package/src/orchestrator/tools/index.ts +2 -1
  63. package/src/orchestrator/tools/verify-action-result.ts +159 -0
  64. package/src/orchestrator/types.ts +48 -0
  65. package/src/prompts.ts +87 -37
  66. package/src/scenario-worker-class.ts +7 -2
  67. package/src/utils/coordinate-converter.ts +162 -0
  68. package/testchimp-runner-core-0.0.33.tgz +0 -0
  69. /package/{CREDIT_CALLBACK_ARCHITECTURE.md → plandocs/CREDIT_CALLBACK_ARCHITECTURE.md} +0 -0
  70. /package/{INTEGRATION_COMPLETE.md → plandocs/INTEGRATION_COMPLETE.md} +0 -0
  71. /package/{VISION_DIAGNOSTICS_IMPROVEMENTS.md → plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md} +0 -0
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Coordinate Converter Utility
3
+ * Converts percentage-based coordinates to pixel coordinates and generates Playwright commands
4
+ */
5
+ import { CoordinateAction } from '../orchestrator/types';
6
+ export declare class CoordinateConverter {
7
+ /**
8
+ * Convert percentage coordinates to actual pixel coordinates
9
+ */
10
+ static percentToPixels(xPercent: number, yPercent: number, viewportWidth: number, viewportHeight: number): {
11
+ x: number;
12
+ y: number;
13
+ };
14
+ /**
15
+ * Get viewport dimensions from page
16
+ */
17
+ static getViewportSize(page: any): Promise<{
18
+ width: number;
19
+ height: number;
20
+ }>;
21
+ /**
22
+ * Convert coordinate action with percentages to Playwright commands
23
+ * Returns array of command strings
24
+ */
25
+ static generateCommands(action: CoordinateAction, page: any): Promise<string[]>;
26
+ /**
27
+ * Execute coordinate action directly on page
28
+ * Used during agent execution (converts and runs immediately)
29
+ */
30
+ static executeAction(action: CoordinateAction, page: any): Promise<void>;
31
+ }
32
+ //# sourceMappingURL=coordinate-converter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"coordinate-converter.d.ts","sourceRoot":"","sources":["../../src/utils/coordinate-converter.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAEzD,qBAAa,mBAAmB;IAE9B;;OAEG;IACH,MAAM,CAAC,eAAe,CACpB,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,MAAM,GACrB;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE;IAO3B;;OAEG;WACU,eAAe,CAAC,IAAI,EAAE,GAAG,GAAG,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAUnF;;;OAGG;WACU,gBAAgB,CAC3B,MAAM,EAAE,gBAAgB,EACxB,IAAI,EAAE,GAAG,GACR,OAAO,CAAC,MAAM,EAAE,CAAC;IA0DpB;;;OAGG;WACU,aAAa,CACxB,MAAM,EAAE,gBAAgB,EACxB,IAAI,EAAE,GAAG,GACR,OAAO,CAAC,IAAI,CAAC;CAmDjB"}
@@ -0,0 +1,130 @@
1
+ "use strict";
2
+ /**
3
+ * Coordinate Converter Utility
4
+ * Converts percentage-based coordinates to pixel coordinates and generates Playwright commands
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.CoordinateConverter = void 0;
8
+ class CoordinateConverter {
9
+ /**
10
+ * Convert percentage coordinates to actual pixel coordinates
11
+ */
12
+ static percentToPixels(xPercent, yPercent, viewportWidth, viewportHeight) {
13
+ return {
14
+ x: Math.round((xPercent / 100) * viewportWidth),
15
+ y: Math.round((yPercent / 100) * viewportHeight)
16
+ };
17
+ }
18
+ /**
19
+ * Get viewport dimensions from page
20
+ */
21
+ static async getViewportSize(page) {
22
+ return await page.evaluate(() => {
23
+ const win = globalThis.window;
24
+ return {
25
+ width: win.innerWidth,
26
+ height: win.innerHeight
27
+ };
28
+ });
29
+ }
30
+ /**
31
+ * Convert coordinate action with percentages to Playwright commands
32
+ * Returns array of command strings
33
+ */
34
+ static async generateCommands(action, page) {
35
+ const viewport = await this.getViewportSize(page);
36
+ const { x, y } = this.percentToPixels(action.xPercent, action.yPercent, viewport.width, viewport.height);
37
+ const commands = [];
38
+ switch (action.action) {
39
+ case 'click':
40
+ commands.push(`await page.mouse.click(${x}, ${y});`);
41
+ break;
42
+ case 'doubleClick':
43
+ commands.push(`await page.mouse.dblclick(${x}, ${y});`);
44
+ break;
45
+ case 'rightClick':
46
+ commands.push(`await page.mouse.click(${x}, ${y}, { button: 'right' });`);
47
+ break;
48
+ case 'hover':
49
+ commands.push(`await page.mouse.move(${x}, ${y});`);
50
+ break;
51
+ case 'drag':
52
+ if (action.toXPercent === undefined || action.toYPercent === undefined) {
53
+ throw new Error('Drag action requires toXPercent and toYPercent');
54
+ }
55
+ const to = this.percentToPixels(action.toXPercent, action.toYPercent, viewport.width, viewport.height);
56
+ commands.push(`await page.mouse.move(${x}, ${y});`);
57
+ commands.push(`await page.mouse.down();`);
58
+ commands.push(`await page.mouse.move(${to.x}, ${to.y});`);
59
+ commands.push(`await page.mouse.up();`);
60
+ break;
61
+ case 'fill':
62
+ if (!action.value) {
63
+ throw new Error('Fill action requires value');
64
+ }
65
+ // Click to focus, wait briefly, then type
66
+ commands.push(`await page.mouse.click(${x}, ${y});`);
67
+ commands.push(`await page.waitForTimeout(100);`);
68
+ commands.push(`await page.keyboard.type(${JSON.stringify(action.value)});`);
69
+ break;
70
+ case 'scroll':
71
+ const scrollAmount = action.scrollAmount || 100;
72
+ // Move to position, then scroll
73
+ commands.push(`await page.mouse.move(${x}, ${y});`);
74
+ commands.push(`await page.mouse.wheel(0, ${scrollAmount});`);
75
+ break;
76
+ default:
77
+ throw new Error(`Unknown coordinate action: ${action.action}`);
78
+ }
79
+ return commands;
80
+ }
81
+ /**
82
+ * Execute coordinate action directly on page
83
+ * Used during agent execution (converts and runs immediately)
84
+ */
85
+ static async executeAction(action, page) {
86
+ const viewport = await this.getViewportSize(page);
87
+ const { x, y } = this.percentToPixels(action.xPercent, action.yPercent, viewport.width, viewport.height);
88
+ switch (action.action) {
89
+ case 'click':
90
+ await page.mouse.click(x, y);
91
+ break;
92
+ case 'doubleClick':
93
+ await page.mouse.dblclick(x, y);
94
+ break;
95
+ case 'rightClick':
96
+ await page.mouse.click(x, y, { button: 'right' });
97
+ break;
98
+ case 'hover':
99
+ await page.mouse.move(x, y);
100
+ break;
101
+ case 'drag':
102
+ if (action.toXPercent === undefined || action.toYPercent === undefined) {
103
+ throw new Error('Drag requires toXPercent and toYPercent');
104
+ }
105
+ const to = this.percentToPixels(action.toXPercent, action.toYPercent, viewport.width, viewport.height);
106
+ await page.mouse.move(x, y);
107
+ await page.mouse.down();
108
+ await page.mouse.move(to.x, to.y);
109
+ await page.mouse.up();
110
+ break;
111
+ case 'fill':
112
+ if (!action.value) {
113
+ throw new Error('Fill requires value');
114
+ }
115
+ await page.mouse.click(x, y);
116
+ await page.waitForTimeout(100);
117
+ await page.keyboard.type(action.value);
118
+ break;
119
+ case 'scroll':
120
+ const scrollAmount = action.scrollAmount || 100;
121
+ await page.mouse.move(x, y);
122
+ await page.mouse.wheel(0, scrollAmount);
123
+ break;
124
+ default:
125
+ throw new Error(`Unknown coordinate action: ${action.action}`);
126
+ }
127
+ }
128
+ }
129
+ exports.CoordinateConverter = CoordinateConverter;
130
+ //# sourceMappingURL=coordinate-converter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"coordinate-converter.js","sourceRoot":"","sources":["../../src/utils/coordinate-converter.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAIH,MAAa,mBAAmB;IAE9B;;OAEG;IACH,MAAM,CAAC,eAAe,CACpB,QAAgB,EAChB,QAAgB,EAChB,aAAqB,EACrB,cAAsB;QAEtB,OAAO;YACL,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,aAAa,CAAC;YAC/C,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,cAAc,CAAC;SACjD,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,IAAS;QACpC,OAAO,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAsC,EAAE;YACjE,MAAM,GAAG,GAAI,UAAkB,CAAC,MAAM,CAAC;YACvC,OAAO;gBACL,KAAK,EAAE,GAAG,CAAC,UAAoB;gBAC/B,MAAM,EAAE,GAAG,CAAC,WAAqB;aAClC,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,KAAK,CAAC,gBAAgB,CAC3B,MAAwB,EACxB,IAAS;QAET,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QAClD,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEzG,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;YACtB,KAAK,OAAO;gBACV,QAAQ,CAAC,IAAI,CAAC,0BAA0B,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACrD,MAAM;YAER,KAAK,aAAa;gBAChB,QAAQ,CAAC,IAAI,CAAC,6BAA6B,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACxD,MAAM;YAER,KAAK,YAAY;gBACf,QAAQ,CAAC,IAAI,CAAC,0BAA0B,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;gBAC1E,MAAM;YAER,KAAK,OAAO;gBACV,QAAQ,CAAC,IAAI,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,MAAM;YAER,KAAK,MAAM;gBACT,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;oBACvE,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;gBACpE,CAAC;gBACD,MAAM,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACvG,QAAQ,CAAC,IAAI,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,QAAQ,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;gBAC1C,QAAQ,CAAC,IAAI,CAAC,yBAAyB,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;gBAC1D,QAAQ,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;gBACxC,MAAM;YAER,KAAK,MAAM;gBACT,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;oBAClB,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;gBAChD,CAAC;gBACD,0CAA0C;gBAC1C,QAAQ,CAAC,IAAI,CAAC,0BAA0B,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACrD,QAAQ,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;gBACjD,QAAQ,CAAC,IAAI,CAAC,4BAA4B,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC5E,MAAM;YAER,KAAK,QAAQ;gBACX,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,GAAG,CAAC;gBAChD,gCAAgC;gBAChC,QAAQ,CAAC,IAAI,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,QAAQ,CAAC,IAAI,CAAC,6BAA6B,YAAY,IAAI,CAAC,CAAC;gBAC7D,MAAM;YAER;gBACE,MAAM,IAAI,KAAK,CAAC,8BAA8B,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QACnE,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,KAAK,CAAC,aAAa,CACxB,MAAwB,EACxB,IAAS;QAET,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QAClD,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEzG,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;YACtB,KAAK,OAAO;gBACV,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC7B,MAAM;YAER,KAAK,aAAa;gBAChB,MAAM,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAChC,MAAM;YAER,KAAK,YAAY;gBACf,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;gBAClD,MAAM;YAER,KAAK,OAAO;gBACV,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC5B,MAAM;YAER,KAAK,MAAM;gBACT,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;oBACvE,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;gBAC7D,CAAC;gBACD,MAAM,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACvG,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC5B,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;gBACxB,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;gBAClC,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC;gBACtB,MAAM;YAER,KAAK,MAAM;gBACT,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;oBAClB,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;gBACzC,CAAC;gBACD,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC7B,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;gBAC/B,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACvC,MAAM;YAER,KAAK,QAAQ;gBACX,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,GAAG,CAAC;gBAChD,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC5B,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC;gBACxC,MAAM;YAER;gBACE,MAAM,IAAI,KAAK,CAAC,8BAA8B,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;CACF;AAzJD,kDAyJC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "testchimp-runner-core",
3
- "version": "0.0.32",
3
+ "version": "0.0.34",
4
4
  "description": "Core TestChimp functionality for test generation and AI repair",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -0,0 +1,148 @@
1
+ # Before/After Screenshot Verification
2
+
3
+ ## Feature: Visual Goal Verification for Coordinate Actions
4
+
5
+ ### Problem Solved:
6
+ When using coordinate-based actions (clicking at x,y%), the agent has no way to know if the click achieved the goal:
7
+ - No element reference to check state
8
+ - No selector feedback
9
+ - Can't verify if expected page loaded or modal opened
10
+
11
+ This led to:
12
+ - False positives (click succeeded but goal not achieved)
13
+ - Infinite loops (agent keeps clicking, unsure if it worked)
14
+
15
+ ### Solution:
16
+ Automatic before/after screenshot comparison after coordinate clicks.
17
+
18
+ ## How It Works:
19
+
20
+ ### 1. **Automatic Trigger** (No Agent Action Required)
21
+ When agent uses coordinate action:
22
+ ```typescript
23
+ Iteration 4: 🎯 Coordinate mode activated
24
+ Step 1: Capture BEFORE screenshot
25
+ Step 2: Execute coordinate click (x%, y%)
26
+ Step 3: Wait 1000ms for UI to settle
27
+ Step 4: Capture AFTER screenshot
28
+ Step 5: Call LLM with both images (labeled "BEFORE", "AFTER")
29
+ Step 6: LLM responds: { goalAchieved: true/false, reasoning: "..." }
30
+ Step 7a: If TRUE → Mark complete, exit step ✅
31
+ Step 7b: If FALSE → Continue to next iteration, try different coordinates
32
+ ```
33
+
34
+ ### 2. **LLM Prompt for Verification**
35
+ ```
36
+ Goal: [Current step goal]
37
+
38
+ Compare the BEFORE and AFTER screenshots.
39
+
40
+ Did the action achieve the goal? Respond with JSON:
41
+ {
42
+ "goalAchieved": boolean,
43
+ "reasoning": "What changed (or didn't change)",
44
+ "visibleChanges": ["List of UI changes observed"]
45
+ }
46
+
47
+ Focus on:
48
+ - Did expected elements appear/disappear?
49
+ - Did page navigate or content change?
50
+ - Visual indicators of success (new panels, forms, highlights)?
51
+
52
+ Be strict: Only return true if you clearly see the expected change.
53
+ ```
54
+
55
+ ### 3. **Multi-Image LLM Interface**
56
+ ```typescript
57
+ // NEW: LabeledImage interface
58
+ export interface LabeledImage {
59
+ label: string; // "Before", "After", etc.
60
+ dataUrl: string; // Base64 data URL
61
+ }
62
+
63
+ // UPDATED: LLMRequest
64
+ export interface LLMRequest {
65
+ imageUrl?: string; // Backward compatible (single image)
66
+ images?: LabeledImage[]; // NEW - multi-image support
67
+ }
68
+ ```
69
+
70
+ ### 4. **Provider Implementation** (scriptservice-llm-provider.ts)
71
+ ```typescript
72
+ if (request.images && request.images.length > 0) {
73
+ for (const img of request.images) {
74
+ contentParts.push({ type: 'text', text: `\n[${img.label}]:` });
75
+ contentParts.push({ type: 'image_url', image_url: { url: img.dataUrl } });
76
+ }
77
+ // Sends: [BEFORE]: <image1>, [AFTER]: <image2>
78
+ }
79
+ ```
80
+
81
+ ## When Verification Happens:
82
+
83
+ ✅ **Always**: After first coordinate action attempt
84
+ ❌ **Never**: After selector-based actions (have element state to check)
85
+ ⚠️ **Conditional**: Can add for other scenarios where goal verification is unclear
86
+
87
+ ## Cost Considerations:
88
+
89
+ **Per verification call:**
90
+ - 2 viewport screenshots (~50-100KB each)
91
+ - Vision model (gpt-5-mini): ~$0.001 per call
92
+ - Used only when coordinate mode activates (after 3 selector failures)
93
+
94
+ **Typical scenario:**
95
+ - Steps 1-10: Regular selectors → No verification cost
96
+ - Step 5 gets stuck → Coordinate mode → 1 verification call → $0.001
97
+ - Overall impact: Minimal, used sparingly
98
+
99
+ ## Example Flow:
100
+
101
+ **Step 5: "Select Employee Information"**
102
+ ```
103
+ Iteration 1: getByText('Employee Information') → Strict mode ❌
104
+ Iteration 2: locator('#collapse-1').getByText('Employee Information') → Click succeeds ✅
105
+ BUT: Didn't navigate to Employee Information page (false positive)
106
+
107
+ Iteration 3: Selector fails again
108
+ Iteration 4: 🎯 Coordinate mode
109
+ → BEFORE: Homepage with sidebar
110
+ → Click at (19.3%, 22.9%)
111
+ → Wait 1s
112
+ → AFTER: Check screenshot
113
+ → LLM: "goalAchieved": true, "reasoning": "Employee Information page loaded with form"
114
+ → ✅ Mark complete, exit
115
+ ```
116
+
117
+ ## Backward Compatibility:
118
+
119
+ ✅ **Single image still works:**
120
+ ```typescript
121
+ const request = {
122
+ imageUrl: 'data:image/png;base64,...' // Old way
123
+ };
124
+ ```
125
+
126
+ ✅ **Multi-image NEW:**
127
+ ```typescript
128
+ const request = {
129
+ images: [
130
+ { label: 'BEFORE', dataUrl: '...' },
131
+ { label: 'AFTER', dataUrl: '...' }
132
+ ]
133
+ };
134
+ ```
135
+
136
+ ## Files Modified:
137
+
138
+ 1. `runner-core/src/llm-provider.ts` - Added LabeledImage interface and images field
139
+ 2. `scriptservice/providers/scriptservice-llm-provider.ts` - Handle multiple images in OpenAI API
140
+ 3. `runner-core/src/orchestrator/orchestrator-agent.ts` - Added verifyGoalWithScreenshotComparison method
141
+ 4. Automatic trigger after coordinate actions
142
+
143
+ ## Next Steps:
144
+
145
+ - ✅ Infrastructure ready
146
+ - ⏳ Need to test with real scenario
147
+ - 🔮 Future: Could expose as agent-callable tool if needed
148
+
@@ -0,0 +1,144 @@
1
+ # Coordinate Mode Diagnosis - Live Test Results
2
+
3
+ ## Test Scenario: PeopleHR Employee Information Flow
4
+
5
+ ### ✅ What Worked:
6
+
7
+ 1. **Coordinate fallback DID activate** (after fix from >= 3 to >= 5)
8
+ 2. **Agent successfully used coordinates** at (87.5%, 23.438%)
9
+ 3. **Physical clicks succeeded** - page.mouse.click(1120, 169)
10
+ 4. **Agent learned** to stick with coordinates after selectors failed
11
+
12
+ ### ❌ What Didn't Work:
13
+
14
+ **Agent hit max iterations (8) without marking "complete"**
15
+
16
+ ## Detailed Step 6 Flow:
17
+
18
+ ```
19
+ Iteration 1: Selector attempt → Timeout ❌
20
+ Iteration 2: Selector attempt → Timeout ❌
21
+ Iteration 3: Selector attempt → Timeout ❌
22
+ Iteration 4: 🎯 COORDINATE MODE → Click (87.5%, 23.438%) → ✅ Success
23
+ Iteration 5: Repeat coordinate → ✅ Success
24
+ Iteration 6: Repeat coordinate → ✅ Success (?)
25
+ Iteration 7: Repeat coordinate → ✅ Success
26
+ Iteration 8: Repeat coordinate → ✅ Success
27
+ Result: ⚠️ Max iterations → system_limit
28
+ ```
29
+
30
+ ## Root Cause Analysis:
31
+
32
+ ### Problem: **No Goal Verification After Coordinate Success**
33
+
34
+ **With selectors:**
35
+ ```typescript
36
+ await page.getByRole('button').click();
37
+ // Can verify: await expect(button).toHaveState('pressed')
38
+ // Can check: New elements appeared, URL changed, etc.
39
+ ```
40
+
41
+ **With coordinates:**
42
+ ```typescript
43
+ await page.mouse.click(1120, 169);
44
+ // ❓ Did it work? No element reference!
45
+ // ❓ How to verify? Can't check button state
46
+ // ❓ What changed? Need to inspect DOM/screenshot
47
+ ```
48
+
49
+ ### Why Agent Kept Retrying:
50
+
51
+ **Agent's reasoning (iterations 5-8):**
52
+ - "Coordinate click succeeded (executed without error)"
53
+ - "But I don't know if goal was achieved"
54
+ - "Step says 'Click on New' - did the New form open?"
55
+ - "I should try again to be sure..."
56
+ - → **Loops until max iterations**
57
+
58
+ ## Solutions to Consider:
59
+
60
+ ### Option 1: **Trust Coordinate Success** (Simple)
61
+ After coordinate click succeeds:
62
+ - Wait 500ms for UI response
63
+ - Mark status="complete" automatically
64
+ - Assume click worked (trust the coordinates)
65
+
66
+ ```typescript
67
+ if (coordinateAction && coordResult.allSucceeded) {
68
+ await page.waitForTimeout(500); // Let UI respond
69
+ return { status: 'complete', reasoning: 'Coordinate click succeeded' };
70
+ }
71
+ ```
72
+
73
+ **Pros**: Simple, fast
74
+ **Cons**: No verification of actual goal achievement
75
+
76
+ ### Option 2: **Visual Verification** (Better)
77
+ After coordinate click:
78
+ - Wait 500ms
79
+ - Take screenshot
80
+ - Compare before/after
81
+ - If changed → complete, else → retry with different coords
82
+
83
+ ```typescript
84
+ const beforeScreenshot = await page.screenshot();
85
+ await page.mouse.click(x, y);
86
+ await page.waitForTimeout(500);
87
+ const afterScreenshot = await page.screenshot();
88
+ if (screenshotsAreDifferent(before, after)) {
89
+ return { status: 'complete' };
90
+ }
91
+ ```
92
+
93
+ **Pros**: Validates something changed
94
+ **Cons**: Slower, more LLM calls
95
+
96
+ ### Option 3: **DOM Change Detection** (Balanced)
97
+ After coordinate click:
98
+ - Capture DOM snapshot before
99
+ - Click coordinates
100
+ - Capture DOM snapshot after
101
+ - If new elements/navigation → complete
102
+
103
+ ```typescript
104
+ const beforeUrl = page.url();
105
+ const beforeElements = await getEnhancedPageInfo(page);
106
+ await page.mouse.click(x, y);
107
+ await page.waitForTimeout(500);
108
+ const afterUrl = page.url();
109
+ const afterElements = await getEnhancedPageInfo(page);
110
+
111
+ if (afterUrl !== beforeUrl || afterElements.count !== beforeElements.count) {
112
+ return { status: 'complete', reasoning: 'Page state changed after coordinate click' };
113
+ }
114
+ ```
115
+
116
+ **Pros**: Fast, objective verification
117
+ **Cons**: Might miss subtle changes (modal opens without URL/element count change)
118
+
119
+ ### Option 4: **Prompt Guidance** (Immediate)
120
+ Update prompt to tell agent:
121
+ "After coordinate click succeeds, mark status='complete' unless you can clearly verify it failed"
122
+
123
+ **Pros**: No code changes
124
+ **Cons**: Relies on LLM judgment
125
+
126
+ ## Recommendation:
127
+
128
+ **Hybrid approach:**
129
+ 1. **Immediate** (Prompt): Tell agent to trust coordinate success
130
+ 2. **Phase 2** (Code): Add DOM change detection for validation
131
+
132
+ ## Current Status:
133
+
134
+ - ✅ Coordinate fallback works technically
135
+ - ✅ Physical clicks succeed
136
+ - ❌ Agent doesn't know when to stop
137
+ - 🔧 Need completion detection logic
138
+
139
+ ## Test Results Summary:
140
+
141
+ **Steps 1-5**: ✅ All completed successfully
142
+ **Step 6**: ⚠️ Coordinates worked but hit max iterations (no completion detection)
143
+ **Overall**: Coordinate mode is functional but needs completion logic
144
+
@@ -0,0 +1,108 @@
1
+ # Runner-Core Visual Agent Implementation Status
2
+
3
+ ## Phase 1: ✅ COMPLETE (v0.0.33)
4
+
5
+ ### Implemented Features:
6
+
7
+ 1. **Note to Future Self** - Tactical iteration memory
8
+ 2. **Percentage-Based Coordinates** - Last-resort fallback with 3-decimal precision
9
+ 3. **Two-Tier Auto-Escalation** - Code-controlled mode switching
10
+
11
+ ### Current Behavior (Phase 1):
12
+
13
+ ```
14
+ Iteration 1-3: Normal Playwright selectors + note-to-self (3 attempts)
15
+ ↓ (after 3 failures)
16
+ Iteration 4-5: Percentage coordinates (2 attempts max)
17
+ ↓ (if both coordinate attempts fail)
18
+ Give up - mark as stuck
19
+
20
+ Total: Maximum 5 iterations per step
21
+ ```
22
+
23
+ ---
24
+
25
+ ## Phase 2: 📋 PLANNED (Not Started)
26
+
27
+ ### Will Add:
28
+
29
+ 1. **ElementDetector** - Detect interactive elements with z-index awareness
30
+ 2. **VisualMarkerInjector** - Number elements [1], [2], [3] on screenshot
31
+ 3. **SelectorResolver** - Translate index → native Playwright selector
32
+ 4. **IndexCommandTranslator** - Convert CLICK[3] → native Playwright command
33
+
34
+ ### Future Behavior (Phase 2):
35
+
36
+ ```
37
+ Iteration 1: Playwright selector (1 attempt) → 70% success
38
+ ↓ (on first failure)
39
+ Iteration 2-3: Index commands CLICK[3] (2 attempts) → 25% success
40
+ ↓ (after 3 total failures)
41
+ Iteration 4-5: Percentage coordinates (2 attempts max) → 5% success
42
+ ↓ (if all fail)
43
+ Give up - mark as stuck
44
+
45
+ Total: Maximum 5 iterations per step (down from 8)
46
+ Average: ~1.5 iterations per step (fast!)
47
+ ```
48
+
49
+ ### Key Design Principle for Phase 2:
50
+
51
+ **During Execution:**
52
+ - Agent clicks using `data-testchimp-el="[3]"` (reliable, we inject it)
53
+
54
+ **In Generated Script:**
55
+ - Translator outputs NATIVE selector: `getByRole('button', {name: 'Menu'})`
56
+ - Script works standalone without data-testchimp-el
57
+
58
+ **Why Two-Stage:**
59
+ 1. Agent needs reliability during exploration → use data attribute
60
+ 2. Generated script must be portable → use native selectors
61
+ 3. Best of both worlds: reliable execution + maintainable output
62
+
63
+ ---
64
+
65
+ ## Optimizations vs Original Plan
66
+
67
+ ### Original Plan:
68
+ - Tier 1: iterations 1-2
69
+ - Tier 2: iterations 3-4
70
+ - Tier 3: iterations 5+
71
+ - Average: ~4 iterations per step
72
+
73
+ ### Optimized Plan (Current):
74
+ - Tier 1: iteration 1 ONLY (fast path)
75
+ - Tier 2: iterations 2-3 (reliable fallback)
76
+ - Tier 3: iterations 4+ (absolute last resort)
77
+ - **Target: ~1.5 average iterations per step**
78
+
79
+ **Rationale:** Don't waste time! Simple tasks finish in 1 iteration, complex tasks escalate quickly to more reliable methods.
80
+
81
+ ---
82
+
83
+ ## Testing Checklist
84
+
85
+ ### Phase 1 (Ready Now):
86
+ - [ ] Run PeopleHR scenario - verify note-to-self helps
87
+ - [ ] Test coordinate fallback on deliberately difficult case
88
+ - [ ] Measure iteration reduction (expect 20-30%)
89
+ - [ ] Verify timeout fixes for waitForLoadState
90
+
91
+ ### Phase 2 (When Implemented):
92
+ - [ ] Test ElementDetector on modals/overlays
93
+ - [ ] Verify z-index occlusion detection
94
+ - [ ] Validate native selector generation (no data-testchimp-el in output)
95
+ - [ ] Run generated scripts standalone - must work!
96
+ - [ ] Measure tier distribution: 70/25/5
97
+
98
+ ---
99
+
100
+ ## Current Version
101
+
102
+ **Runner-Core:** v0.0.33
103
+ **Status:** Built and ready to test
104
+ **Phase 1:** ✅ Complete
105
+ **Phase 2:** 📋 Planned but not started
106
+
107
+ **Next Step:** Test Phase 1 with PeopleHR scenario to validate improvements before implementing Phase 2.
108
+