testchimp-runner-core 0.0.33 → 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. package/dist/execution-service.d.ts +1 -4
  2. package/dist/execution-service.d.ts.map +1 -1
  3. package/dist/execution-service.js +155 -468
  4. package/dist/execution-service.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +11 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/llm-facade.d.ts.map +1 -1
  10. package/dist/llm-facade.js +7 -7
  11. package/dist/llm-facade.js.map +1 -1
  12. package/dist/llm-provider.d.ts +9 -0
  13. package/dist/llm-provider.d.ts.map +1 -1
  14. package/dist/model-constants.d.ts +16 -5
  15. package/dist/model-constants.d.ts.map +1 -1
  16. package/dist/model-constants.js +17 -6
  17. package/dist/model-constants.js.map +1 -1
  18. package/dist/orchestrator/decision-parser.d.ts +18 -0
  19. package/dist/orchestrator/decision-parser.d.ts.map +1 -0
  20. package/dist/orchestrator/decision-parser.js +127 -0
  21. package/dist/orchestrator/decision-parser.js.map +1 -0
  22. package/dist/orchestrator/index.d.ts +4 -2
  23. package/dist/orchestrator/index.d.ts.map +1 -1
  24. package/dist/orchestrator/index.js +15 -2
  25. package/dist/orchestrator/index.js.map +1 -1
  26. package/dist/orchestrator/orchestrator-agent.d.ts +17 -22
  27. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  28. package/dist/orchestrator/orchestrator-agent.js +708 -577
  29. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  30. package/dist/orchestrator/orchestrator-prompts.d.ts +32 -0
  31. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -0
  32. package/dist/orchestrator/orchestrator-prompts.js +737 -0
  33. package/dist/orchestrator/orchestrator-prompts.js.map +1 -0
  34. package/dist/orchestrator/page-som-handler.d.ts +106 -0
  35. package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
  36. package/dist/orchestrator/page-som-handler.js +1353 -0
  37. package/dist/orchestrator/page-som-handler.js.map +1 -0
  38. package/dist/orchestrator/som-types.d.ts +149 -0
  39. package/dist/orchestrator/som-types.d.ts.map +1 -0
  40. package/dist/orchestrator/som-types.js +87 -0
  41. package/dist/orchestrator/som-types.js.map +1 -0
  42. package/dist/orchestrator/tool-registry.d.ts +2 -0
  43. package/dist/orchestrator/tool-registry.d.ts.map +1 -1
  44. package/dist/orchestrator/tool-registry.js.map +1 -1
  45. package/dist/orchestrator/tools/index.d.ts +5 -1
  46. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  47. package/dist/orchestrator/tools/index.js +9 -2
  48. package/dist/orchestrator/tools/index.js.map +1 -1
  49. package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
  50. package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
  51. package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
  52. package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
  53. package/dist/orchestrator/tools/verify-action-result.d.ts +17 -0
  54. package/dist/orchestrator/tools/verify-action-result.d.ts.map +1 -0
  55. package/dist/orchestrator/tools/verify-action-result.js +140 -0
  56. package/dist/orchestrator/tools/verify-action-result.js.map +1 -0
  57. package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
  58. package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
  59. package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
  60. package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
  61. package/dist/orchestrator/types.d.ts +49 -1
  62. package/dist/orchestrator/types.d.ts.map +1 -1
  63. package/dist/orchestrator/types.js +11 -1
  64. package/dist/orchestrator/types.js.map +1 -1
  65. package/dist/prompts.d.ts.map +1 -1
  66. package/dist/prompts.js +40 -34
  67. package/dist/prompts.js.map +1 -1
  68. package/dist/scenario-service.d.ts +5 -0
  69. package/dist/scenario-service.d.ts.map +1 -1
  70. package/dist/scenario-service.js +17 -0
  71. package/dist/scenario-service.js.map +1 -1
  72. package/dist/scenario-worker-class.d.ts +4 -0
  73. package/dist/scenario-worker-class.d.ts.map +1 -1
  74. package/dist/scenario-worker-class.js +21 -3
  75. package/dist/scenario-worker-class.js.map +1 -1
  76. package/dist/testing/agent-tester.d.ts +35 -0
  77. package/dist/testing/agent-tester.d.ts.map +1 -0
  78. package/dist/testing/agent-tester.js +84 -0
  79. package/dist/testing/agent-tester.js.map +1 -0
  80. package/dist/testing/ref-translator-tester.d.ts +44 -0
  81. package/dist/testing/ref-translator-tester.d.ts.map +1 -0
  82. package/dist/testing/ref-translator-tester.js +104 -0
  83. package/dist/testing/ref-translator-tester.js.map +1 -0
  84. package/dist/utils/coordinate-converter.d.ts +32 -0
  85. package/dist/utils/coordinate-converter.d.ts.map +1 -0
  86. package/dist/utils/coordinate-converter.js +130 -0
  87. package/dist/utils/coordinate-converter.js.map +1 -0
  88. package/dist/utils/hierarchical-selector.d.ts +47 -0
  89. package/dist/utils/hierarchical-selector.d.ts.map +1 -0
  90. package/dist/utils/hierarchical-selector.js +212 -0
  91. package/dist/utils/hierarchical-selector.js.map +1 -0
  92. package/dist/utils/page-info-retry.d.ts +14 -0
  93. package/dist/utils/page-info-retry.d.ts.map +1 -0
  94. package/dist/utils/page-info-retry.js +60 -0
  95. package/dist/utils/page-info-retry.js.map +1 -0
  96. package/dist/utils/page-info-utils.d.ts +1 -0
  97. package/dist/utils/page-info-utils.d.ts.map +1 -1
  98. package/dist/utils/page-info-utils.js +46 -18
  99. package/dist/utils/page-info-utils.js.map +1 -1
  100. package/dist/utils/ref-attacher.d.ts +21 -0
  101. package/dist/utils/ref-attacher.d.ts.map +1 -0
  102. package/dist/utils/ref-attacher.js +149 -0
  103. package/dist/utils/ref-attacher.js.map +1 -0
  104. package/dist/utils/ref-translator.d.ts +49 -0
  105. package/dist/utils/ref-translator.d.ts.map +1 -0
  106. package/dist/utils/ref-translator.js +276 -0
  107. package/dist/utils/ref-translator.js.map +1 -0
  108. package/package.json +1 -1
  109. package/plandocs/BEFORE_AFTER_VERIFICATION.md +148 -0
  110. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +144 -0
  111. package/plandocs/IMPLEMENTATION_STATUS.md +108 -0
  112. package/plandocs/PHASE_1_COMPLETE.md +165 -0
  113. package/plandocs/PHASE_1_SUMMARY.md +184 -0
  114. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +120 -0
  115. package/plandocs/PROMPT_SANITY_CHECK.md +120 -0
  116. package/plandocs/SESSION_SUMMARY_v0.0.33.md +151 -0
  117. package/plandocs/TROUBLESHOOTING_SESSION.md +72 -0
  118. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +396 -0
  119. package/plandocs/WHATS_NEW_v0.0.33.md +183 -0
  120. package/plandocs/exploratory-mode-support-v2.plan.md +953 -0
  121. package/plandocs/exploratory-mode-support.plan.md +928 -0
  122. package/plandocs/journey-id-tracking-addendum.md +227 -0
  123. package/src/execution-service.ts +179 -596
  124. package/src/index.ts +10 -0
  125. package/src/llm-facade.ts +8 -8
  126. package/src/llm-provider.ts +11 -1
  127. package/src/model-constants.ts +17 -5
  128. package/src/orchestrator/decision-parser.ts +139 -0
  129. package/src/orchestrator/index.ts +27 -2
  130. package/src/orchestrator/orchestrator-agent.ts +868 -623
  131. package/src/orchestrator/orchestrator-prompts.ts +786 -0
  132. package/src/orchestrator/page-som-handler.ts +1565 -0
  133. package/src/orchestrator/som-types.ts +188 -0
  134. package/src/orchestrator/tool-registry.ts +2 -0
  135. package/src/orchestrator/tools/index.ts +5 -1
  136. package/src/orchestrator/tools/refresh-som-markers.ts +69 -0
  137. package/src/orchestrator/tools/verify-action-result.ts +159 -0
  138. package/src/orchestrator/tools/view-previous-screenshot.ts +103 -0
  139. package/src/orchestrator/types.ts +95 -4
  140. package/src/prompts.ts +40 -34
  141. package/src/scenario-service.ts +20 -0
  142. package/src/scenario-worker-class.ts +30 -4
  143. package/src/utils/coordinate-converter.ts +162 -0
  144. package/src/utils/page-info-retry.ts +65 -0
  145. package/src/utils/page-info-utils.ts +53 -18
  146. package/testchimp-runner-core-0.0.35.tgz +0 -0
  147. /package/{CREDIT_CALLBACK_ARCHITECTURE.md → plandocs/CREDIT_CALLBACK_ARCHITECTURE.md} +0 -0
  148. /package/{INTEGRATION_COMPLETE.md → plandocs/INTEGRATION_COMPLETE.md} +0 -0
  149. /package/{VISION_DIAGNOSTICS_IMPROVEMENTS.md → plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md} +0 -0
  150. /package/{RELEASE_0.0.26.md → releasenotes/RELEASE_0.0.26.md} +0 -0
  151. /package/{RELEASE_0.0.27.md → releasenotes/RELEASE_0.0.27.md} +0 -0
  152. /package/{RELEASE_0.0.28.md → releasenotes/RELEASE_0.0.28.md} +0 -0
@@ -12,6 +12,42 @@ export interface SelfReflection {
12
12
  loopReasoning?: string; // "Tried text-based selectors 3 times, switching to IDs"
13
13
  }
14
14
 
15
+ /**
16
+ * Note to future self - iteration-specific memory for tactical continuity
17
+ * More flexible than structured SelfReflection - agent can write freely
18
+ */
19
+ export interface NoteToFutureSelf {
20
+ fromIteration: number;
21
+ content: string; // FREE-FORM - agent writes whatever tactical info it needs for next iteration
22
+ // Examples:
23
+ // - "Tried #menu-btn, failed. Will try SVG child next."
24
+ // - "Plan: Hover over menu to reveal dropdown, then click Profile option."
25
+ // - "Element loads async. Wait 2s after page load before clicking."
26
+ }
27
+
28
+ /**
29
+ * Coordinate-based action (fallback when selectors fail)
30
+ * Uses percentage-based positioning for resolution independence
31
+ */
32
+ export interface CoordinateAction {
33
+ type: 'coordinate';
34
+ action: 'click' | 'doubleClick' | 'rightClick' | 'hover' | 'drag' | 'fill' | 'scroll';
35
+
36
+ // Primary coordinates as percentages (0.0 to 100.0, 3 decimal precision for ~1px accuracy)
37
+ xPercent: number; // 0 = left edge, 100 = right edge
38
+ yPercent: number; // 0 = top edge, 100 = bottom edge
39
+
40
+ // For drag actions
41
+ toXPercent?: number;
42
+ toYPercent?: number;
43
+
44
+ // For fill actions (click then type)
45
+ value?: string;
46
+
47
+ // For scroll actions
48
+ scrollAmount?: number; // Positive = scroll down, negative = scroll up
49
+ }
50
+
15
51
  /**
16
52
  * Journey memory - tracks the agent's journey through the scenario
17
53
  */
@@ -24,6 +60,9 @@ export interface JourneyMemory {
24
60
 
25
61
  // Data extracted and saved for later steps
26
62
  extractedData: Record<string, string>;
63
+
64
+ // Latest note from agent (persists across steps for continuity)
65
+ latestNote?: NoteToFutureSelf;
27
66
  }
28
67
 
29
68
  /**
@@ -69,12 +108,18 @@ export interface AgentDecision {
69
108
  needsToolResults?: boolean; // Wait for tool results before proceeding with commands
70
109
 
71
110
  // Command batch (executed sequentially)
72
- commands?: string[];
111
+ commands?: string[]; // Plain Playwright commands
73
112
  commandReasoning?: string;
74
113
 
75
114
  // Self-reflection for next iteration
76
115
  selfReflection?: SelfReflection;
77
116
 
117
+ // Note to future self (NEW - free-form iteration memory)
118
+ noteToFutureSelf?: string; // Free-form tactical note for next iteration
119
+
120
+ // Coordinate-based action (NEW - fallback when selectors fail)
121
+ coordinateAction?: CoordinateAction;
122
+
78
123
  // Memory updates
79
124
  memoryUpdate?: {
80
125
  action: string;
@@ -102,6 +147,12 @@ export interface AgentDecision {
102
147
  issue: 'prior_incomplete' | 'already_done' | 'wrong_order' | null;
103
148
  explanation: string; // Why agent thinks step order is off
104
149
  };
150
+
151
+ // Meta-learning: Suggested prompt improvements based on journey learnings
152
+ debugInfo?: {
153
+ suggestedPromptUpdates?: string; // Confident suggestions for improving system/user prompts
154
+ reasoning?: string; // Why these updates would help
155
+ };
105
156
  }
106
157
 
107
158
  /**
@@ -127,13 +178,34 @@ export interface AgentContext {
127
178
  experiences: string[];
128
179
  extractedData: Record<string, string>;
129
180
 
130
- // Self-reflection from previous iteration
131
- previousIterationGuidance?: SelfReflection;
181
+ // Note from previous iteration (tactical continuity)
182
+ noteFromPreviousIteration?: NoteToFutureSelf;
183
+
184
+ // Test data / credentials for exploration
185
+ testDataPrompt?: string;
186
+
187
+ // SoM (Set-of-Marks) screenshot with visual markers
188
+ somScreenshot?: string; // Data URL of screenshot with SoM markers
189
+ somElementMap?: string; // Text map of SoM IDs to element details for disambiguation
190
+
191
+ // Repair mode context (undefined for script gen/exploration)
192
+ priorSteps?: string[]; // Steps completed before current (e.g., ["1. Navigate", "2. Login"])
193
+ nextSteps?: string[]; // Steps after current (e.g., ["5. Submit", "6. Verify"])
132
194
 
133
195
  // Tool results from this iteration (if any)
134
196
  toolResults?: Record<string, ToolResult>;
135
197
  }
136
198
 
199
+ /**
200
+ * Exploration mode configuration
201
+ */
202
+ export interface ExplorationMode {
203
+ enabled: boolean; // Whether exploration mode is active
204
+ explorationPrompt: string; // Journey-specific focus: "Explore Dashboard and test all widgets"
205
+ testDataPrompt?: string; // Test data, credentials context
206
+ maxExplorationSteps?: number; // Budget limit (default: 50) - agent can stop earlier
207
+ }
208
+
137
209
  /**
138
210
  * Configurable guardrails
139
211
  */
@@ -163,6 +235,15 @@ export interface AgentConfig {
163
235
  // Allowed actions
164
236
  allowedExplorationActions?: string[]; // Default: ['hover', 'click_info', 'click_menu', 'focus'] (Phase 2)
165
237
  allowedDomains?: string[]; // For navigate_to_url validation
238
+
239
+ // Feature flags
240
+ enableCoordinateMode?: boolean; // Default: false (experimental - disable until stable)
241
+ useSoM?: boolean; // Default: true (Set-of-Marks visual mode)
242
+ somUseSomIdBasedCommands?: boolean; // Default: false (use semantic selectors first)
243
+ somRestrictCoordinates?: boolean; // Default: false (if true, strongly discourage coord commands except as absolute last resort)
244
+
245
+ // Exploration mode (NEW)
246
+ explorationMode?: ExplorationMode;
166
247
  }
167
248
 
168
249
  /**
@@ -195,6 +276,16 @@ export const DEFAULT_AGENT_CONFIG: Required<AgentConfig> = {
195
276
  commandTimeout: 30000,
196
277
  explorationTimeout: 2000,
197
278
  allowedExplorationActions: ['hover', 'click_info', 'click_menu', 'focus'],
198
- allowedDomains: []
279
+ allowedDomains: [],
280
+ enableCoordinateMode: false, // Disabled by default - experimental feature
281
+ useSoM: true, // Enabled by default - use Set-of-Marks visual mode
282
+ somUseSomIdBasedCommands: false, // Use semantic selectors first
283
+ somRestrictCoordinates: false, // Allow coords as valid fallback (for exploration)
284
+ explorationMode: {
285
+ enabled: false,
286
+ explorationPrompt: '',
287
+ testDataPrompt: undefined,
288
+ maxExplorationSteps: 50
289
+ }
199
290
  };
200
291
 
package/src/prompts.ts CHANGED
@@ -329,10 +329,10 @@ ${pageInfo.formattedElements}
329
329
 
330
330
  CRITICAL: Extract and use exact values mentioned in the goal.
331
331
 
332
- Examples:
333
- - Goal: "Login with credentials: Willy, Willy@1234"
334
- ✅ Use: await page.fill('username', 'Willy'); await page.fill('password', 'Willy@1234');
335
- ❌ NOT: await page.fill('username', process.env.USERNAME);
332
+ Examples:
333
+ - Goal: "Login with credentials: alice, pass123"
334
+ ✅ Use: await page.fill('username', 'alice'); await page.fill('password', 'pass123');
335
+ ❌ NOT: await page.fill('username', process.env.USERNAME);
336
336
 
337
337
  - Goal: "Enter name: John Doe"
338
338
  ✅ Use: await page.fill('[name="name"]', 'John Doe');
@@ -362,36 +362,42 @@ ${pageInfo.formattedElements}
362
362
 
363
363
  Playwright throws "strict mode violation" when a selector matches MULTIPLE elements.
364
364
 
365
- **PROACTIVE DETECTION** - Check BEFORE generating command:
366
- - Review the "CURRENT PAGE STATE" section below (accessibility tree / aria snapshot)
367
- - Look for duplicate elements with same role/text (e.g., multiple links with "Employee Information")
368
- - If duplicates exist, generate a MORE SPECIFIC selector from the start
369
- - Don't wait for strict mode error - prevent it by analyzing the DOM structure
370
-
371
- 🚨 ERROR PATTERNS:
372
- - "strict mode violation" → Your selector matched multiple elements
373
- - "Multiple elements found" → Same issue
374
- - Command chain with multiple strategiesSign of selector problems
375
-
376
- ✅ SOLUTIONS (in order of preference):
377
-
378
- **Option 1: Be More Specific** (BEST):
379
- - BAD: page.locator('a', { hasText: 'Employee Information' }).click() Matches multiple links
380
- - GOOD: page.locator('nav a', { hasText: 'Employee Information' }).click() → Scoped to nav
381
- - GOOD: page.getByRole('navigation').getByRole('link', { name: 'Employee Information' }).click() → Role-based scoping
382
- - GOOD: page.locator('a[href*="/employee"]', { hasText: 'Employee Information' }).click() → Combined attributes
383
-
384
- **Option 2: Use Position-Based Selection**:
385
- - If multiple matches are expected: page.locator('a', { hasText: 'Employee Information' }).first().click()
386
- - Or use: .nth(0) for first, .last() for last
387
-
388
- **Option 3: Filter by Visibility/State**:
389
- - page.locator('button', { hasText: 'Submit' }).filter({ hasNotText: 'Draft' }).click()
390
-
391
- 🚫 **ANTI-PATTERNS (DON'T DO THIS)**:
392
- - BAD: Chaining multiple selector strategies in one command with semicolons
393
- - BAD: Using page.evaluate() to find/click elements (defeats Playwright's auto-waiting)
394
- - GOOD: ONE clear, specific selector like page.locator('nav a', { hasText: 'Employee Information' }).click()
365
+ **PROACTIVE DETECTION** - Check BEFORE generating command:
366
+ - Review the "CURRENT PAGE STATE" section below (accessibility tree / aria snapshot)
367
+ - Look for duplicate elements with same role/text (e.g., multiple links with "Settings")
368
+ - If duplicates exist, generate a MORE SPECIFIC selector from the start
369
+ - Don't wait for strict mode error - prevent it by analyzing the DOM structure
370
+
371
+ 🚨 ERROR PATTERNS:
372
+ - "strict mode violation" → Your selector matched multiple elements
373
+ - "Multiple elements found" → Same issue
374
+ - "locator resolved to 2 elements"Often one is a tooltip/popover with duplicate text
375
+ - Command chain with multiple strategies → Sign of selector problems
376
+
377
+ ✅ SOLUTIONS (in order of preference):
378
+
379
+ **Option 1: Use Role-Based Selectors** (BEST - avoids tooltips):
380
+ - BAD: page.getByText('Settings').click() → Matches button AND its tooltip
381
+ - GOOD: page.getByRole('button', { name: 'Settings' }).click() → Only matches button role
382
+ - GOOD: page.locator('button').getByText('Settings').click() → Scoped to button tag
383
+ - GOOD: page.locator('[role="button"]').getByText('Settings').click() → Scoped to role
384
+
385
+ **Option 2: Scope to Container**:
386
+ - BAD: page.locator('a', { hasText: 'Settings' }).click() Matches multiple links
387
+ - GOOD: page.locator('nav a', { hasText: 'Settings' }).click() → Scoped to nav
388
+ - GOOD: page.locator('a[href*="/settings"]').click() Use unique attribute
389
+
390
+ **Option 2: Use Position-Based Selection**:
391
+ - If multiple matches are expected: page.locator('a', { hasText: 'Settings' }).first().click()
392
+ - Or use: .nth(0) for first, .last() for last
393
+
394
+ **Option 3: Filter by Visibility/State**:
395
+ - page.locator('button', { hasText: 'Submit' }).filter({ hasNotText: 'Draft' }).click()
396
+
397
+ 🚫 **ANTI-PATTERNS (DON'T DO THIS)**:
398
+ - BAD: Chaining multiple selector strategies in one command with semicolons
399
+ - BAD: Using page.evaluate() to find/click elements (defeats Playwright's auto-waiting)
400
+ - GOOD: ONE clear, specific selector like page.locator('nav a', { hasText: 'Settings' }).click()
395
401
 
396
402
  **When You See Strict Mode Errors:**
397
403
  1. Analyze - Why did my selector match multiple elements?
@@ -218,6 +218,26 @@ export class ScenarioService extends EventEmitter {
218
218
  this.processNextJob();
219
219
  }
220
220
 
221
+ /**
222
+ * Execute exploration mode using orchestrator
223
+ * Requires orchestrator to be enabled via useOrchestrator option
224
+ */
225
+ async executeExploration(page: any, explorationConfig: any, jobId: string): Promise<any> {
226
+ if (!this.useOrchestrator) {
227
+ throw new Error('Exploration mode requires orchestrator to be enabled');
228
+ }
229
+
230
+ // Get an available worker (or create one if needed)
231
+ let worker = this.workers.find(w => !this.busyWorkers.has(w));
232
+ if (!worker) {
233
+ await this.createWorker();
234
+ worker = this.workers[this.workers.length - 1];
235
+ }
236
+
237
+ // Execute exploration via worker's orchestrator
238
+ return worker.executeExploration(page, explorationConfig, jobId);
239
+ }
240
+
221
241
  async shutdown(): Promise<void> {
222
242
  this.log('Shutting down scenario service...');
223
243
 
@@ -18,10 +18,13 @@ import {
18
18
  JourneyMemory,
19
19
  AgentConfig,
20
20
  TakeScreenshotTool,
21
+ ViewPreviousScreenshotTool,
22
+ RefreshSomMarkersTool,
21
23
  RecallHistoryTool,
22
24
  InspectPageTool,
23
25
  CheckPageReadyTool,
24
- ExtractDataTool
26
+ ExtractDataTool,
27
+ VerifyActionResultTool
25
28
  } from './orchestrator';
26
29
 
27
30
  // Define a simple logging interface for compatibility
@@ -104,12 +107,23 @@ export class ScenarioWorker extends EventEmitter {
104
107
  const takeScreenshotTool = new TakeScreenshotTool();
105
108
  takeScreenshotTool.setLLMFacade(this.llmFacade); // Inject LLM for vision analysis
106
109
 
107
- // Register 5 information-gathering tools (state changes via Playwright commands)
110
+ const viewPreviousScreenshotTool = new ViewPreviousScreenshotTool();
111
+ viewPreviousScreenshotTool.setLLMFacade(this.llmFacade); // Inject LLM for vision analysis
112
+
113
+ const refreshSomMarkersTool = new RefreshSomMarkersTool();
114
+
115
+ const verifyActionTool = new VerifyActionResultTool();
116
+ verifyActionTool.setLLMFacade(this.llmFacade); // Inject LLM for vision comparison
117
+
118
+ // Register 8 information-gathering tools (state changes via Playwright commands)
108
119
  this.toolRegistry.register(takeScreenshotTool);
120
+ this.toolRegistry.register(viewPreviousScreenshotTool);
121
+ this.toolRegistry.register(refreshSomMarkersTool);
109
122
  this.toolRegistry.register(new RecallHistoryTool());
110
123
  this.toolRegistry.register(new InspectPageTool());
111
124
  this.toolRegistry.register(new CheckPageReadyTool());
112
125
  this.toolRegistry.register(new ExtractDataTool());
126
+ this.toolRegistry.register(verifyActionTool);
113
127
 
114
128
  // Create orchestrator agent
115
129
  this.orchestratorAgent = new OrchestratorAgent(
@@ -312,12 +326,12 @@ export class ScenarioWorker extends EventEmitter {
312
326
  page = job.existingPage;
313
327
  } else {
314
328
  // Create new browser (default behavior for local clients)
315
- // Default to headed mode (headless: false) for better debugging
329
+ // Let the playwrightConfig control headless mode (don't override with hardcoded value)
316
330
  // Create logger function from outputChannel for browser initialization
317
331
  const logger = this.outputChannel ? (message: string, level?: 'log' | 'error' | 'warn') => {
318
332
  this.outputChannel!.appendLine(`[Browser] ${message}`);
319
333
  } : undefined;
320
- const browserInstance = await initializeBrowser(job.playwrightConfig, false, undefined, logger);
334
+ const browserInstance = await initializeBrowser(job.playwrightConfig, undefined, undefined, logger);
321
335
  browser = browserInstance.browser;
322
336
  context = browserInstance.context;
323
337
  page = browserInstance.page;
@@ -1077,6 +1091,18 @@ export class ScenarioWorker extends EventEmitter {
1077
1091
 
1078
1092
 
1079
1093
 
1094
+ /**
1095
+ * Execute exploration mode using orchestrator
1096
+ */
1097
+ async executeExploration(page: any, explorationConfig: any, jobId: string): Promise<any> {
1098
+ if (!this.useOrchestrator || !this.orchestratorAgent) {
1099
+ throw new Error('Orchestrator not available - exploration mode requires orchestrator');
1100
+ }
1101
+
1102
+ // Execute exploration via orchestrator
1103
+ return this.orchestratorAgent.executeExploration(page, explorationConfig, jobId);
1104
+ }
1105
+
1080
1106
  async cleanup(): Promise<void> {
1081
1107
  this.initialized = false;
1082
1108
  this.sessionId = null;
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Coordinate Converter Utility
3
+ * Converts percentage-based coordinates to pixel coordinates and generates Playwright commands
4
+ */
5
+
6
+ import { CoordinateAction } from '../orchestrator/types';
7
+
8
+ export class CoordinateConverter {
9
+
10
+ /**
11
+ * Convert percentage coordinates to actual pixel coordinates
12
+ */
13
+ static percentToPixels(
14
+ xPercent: number,
15
+ yPercent: number,
16
+ viewportWidth: number,
17
+ viewportHeight: number
18
+ ): { x: number; y: number } {
19
+ return {
20
+ x: Math.round((xPercent / 100) * viewportWidth),
21
+ y: Math.round((yPercent / 100) * viewportHeight)
22
+ };
23
+ }
24
+
25
+ /**
26
+ * Get viewport dimensions from page
27
+ */
28
+ static async getViewportSize(page: any): Promise<{ width: number; height: number }> {
29
+ return await page.evaluate((): { width: number; height: number } => {
30
+ const win = (globalThis as any).window;
31
+ return {
32
+ width: win.innerWidth as number,
33
+ height: win.innerHeight as number
34
+ };
35
+ });
36
+ }
37
+
38
+ /**
39
+ * Convert coordinate action with percentages to Playwright commands
40
+ * Returns array of command strings
41
+ */
42
+ static async generateCommands(
43
+ action: CoordinateAction,
44
+ page: any
45
+ ): Promise<string[]> {
46
+ const viewport = await this.getViewportSize(page);
47
+ const { x, y } = this.percentToPixels(action.xPercent, action.yPercent, viewport.width, viewport.height);
48
+
49
+ const commands: string[] = [];
50
+
51
+ switch (action.action) {
52
+ case 'click':
53
+ commands.push(`await page.mouse.click(${x}, ${y});`);
54
+ break;
55
+
56
+ case 'doubleClick':
57
+ commands.push(`await page.mouse.dblclick(${x}, ${y});`);
58
+ break;
59
+
60
+ case 'rightClick':
61
+ commands.push(`await page.mouse.click(${x}, ${y}, { button: 'right' });`);
62
+ break;
63
+
64
+ case 'hover':
65
+ commands.push(`await page.mouse.move(${x}, ${y});`);
66
+ break;
67
+
68
+ case 'drag':
69
+ if (action.toXPercent === undefined || action.toYPercent === undefined) {
70
+ throw new Error('Drag action requires toXPercent and toYPercent');
71
+ }
72
+ const to = this.percentToPixels(action.toXPercent, action.toYPercent, viewport.width, viewport.height);
73
+ commands.push(`await page.mouse.move(${x}, ${y});`);
74
+ commands.push(`await page.mouse.down();`);
75
+ commands.push(`await page.mouse.move(${to.x}, ${to.y});`);
76
+ commands.push(`await page.mouse.up();`);
77
+ break;
78
+
79
+ case 'fill':
80
+ if (!action.value) {
81
+ throw new Error('Fill action requires value');
82
+ }
83
+ // Click to focus, wait briefly, then type
84
+ commands.push(`await page.mouse.click(${x}, ${y});`);
85
+ commands.push(`await page.waitForTimeout(100);`);
86
+ commands.push(`await page.keyboard.type(${JSON.stringify(action.value)});`);
87
+ break;
88
+
89
+ case 'scroll':
90
+ const scrollAmount = action.scrollAmount || 100;
91
+ // Move to position, then scroll
92
+ commands.push(`await page.mouse.move(${x}, ${y});`);
93
+ commands.push(`await page.mouse.wheel(0, ${scrollAmount});`);
94
+ break;
95
+
96
+ default:
97
+ throw new Error(`Unknown coordinate action: ${action.action}`);
98
+ }
99
+
100
+ return commands;
101
+ }
102
+
103
+ /**
104
+ * Execute coordinate action directly on page
105
+ * Used during agent execution (converts and runs immediately)
106
+ */
107
+ static async executeAction(
108
+ action: CoordinateAction,
109
+ page: any
110
+ ): Promise<void> {
111
+ const viewport = await this.getViewportSize(page);
112
+ const { x, y } = this.percentToPixels(action.xPercent, action.yPercent, viewport.width, viewport.height);
113
+
114
+ switch (action.action) {
115
+ case 'click':
116
+ await page.mouse.click(x, y);
117
+ break;
118
+
119
+ case 'doubleClick':
120
+ await page.mouse.dblclick(x, y);
121
+ break;
122
+
123
+ case 'rightClick':
124
+ await page.mouse.click(x, y, { button: 'right' });
125
+ break;
126
+
127
+ case 'hover':
128
+ await page.mouse.move(x, y);
129
+ break;
130
+
131
+ case 'drag':
132
+ if (action.toXPercent === undefined || action.toYPercent === undefined) {
133
+ throw new Error('Drag requires toXPercent and toYPercent');
134
+ }
135
+ const to = this.percentToPixels(action.toXPercent, action.toYPercent, viewport.width, viewport.height);
136
+ await page.mouse.move(x, y);
137
+ await page.mouse.down();
138
+ await page.mouse.move(to.x, to.y);
139
+ await page.mouse.up();
140
+ break;
141
+
142
+ case 'fill':
143
+ if (!action.value) {
144
+ throw new Error('Fill requires value');
145
+ }
146
+ await page.mouse.click(x, y);
147
+ await page.waitForTimeout(100);
148
+ await page.keyboard.type(action.value);
149
+ break;
150
+
151
+ case 'scroll':
152
+ const scrollAmount = action.scrollAmount || 100;
153
+ await page.mouse.move(x, y);
154
+ await page.mouse.wheel(0, scrollAmount);
155
+ break;
156
+
157
+ default:
158
+ throw new Error(`Unknown coordinate action: ${action.action}`);
159
+ }
160
+ }
161
+ }
162
+
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Page Info Retry Utility
3
+ * Handles adaptive page loading with exponential backoff
4
+ */
5
+
6
+ import { getEnhancedPageInfo, PageInfo } from './page-info-utils';
7
+
8
+ export class PageInfoRetry {
9
+ /**
10
+ * Get page info with retry logic - waits for interactive elements to appear
11
+ * Uses exponential backoff to handle slow-loading React/Vue/Angular apps
12
+ */
13
+ static async getWithRetry(page: any, maxAttempts: number = 6): Promise<PageInfo> {
14
+ // Wait for initial page load (generous timeout for slow apps)
15
+ try {
16
+ await page.waitForLoadState('domcontentloaded', { timeout: 20000 }).catch(() => {});
17
+ } catch (waitError) {
18
+ // Continue even if wait fails
19
+ }
20
+
21
+ let attempt = 0;
22
+ let backoffMs = 1000; // Start with 1 second (adequate for most sites)
23
+
24
+ while (attempt < maxAttempts) {
25
+ attempt++;
26
+
27
+ // Try to extract page info
28
+ const pageInfo = await getEnhancedPageInfo(page);
29
+
30
+ // If we got a reasonable number of elements, we're done
31
+ if (pageInfo.interactiveElements && pageInfo.interactiveElements.length >= 3) {
32
+ if (attempt > 1) {
33
+ console.log(`[PageInfoRetry] ✓ Page elements loaded after ${attempt} attempts`);
34
+ }
35
+ return pageInfo;
36
+ }
37
+
38
+ // If this is the last attempt, return what we have
39
+ if (attempt >= maxAttempts) {
40
+ const totalWait = this.calculateTotalWaitTime(maxAttempts);
41
+ console.log(`[PageInfoRetry] ⚠️ Only found ${pageInfo.interactiveElements?.length || 0} elements after ${maxAttempts} attempts (total wait: ~${totalWait}ms)`);
42
+ return pageInfo;
43
+ }
44
+
45
+ // Wait with exponential backoff before retrying
46
+ console.log(`[PageInfoRetry] Only ${pageInfo.interactiveElements?.length || 0} elements found (attempt ${attempt}/${maxAttempts}), waiting ${backoffMs}ms...`);
47
+ await page.waitForTimeout(backoffMs);
48
+ backoffMs = Math.min(backoffMs * 1.6, 15000); // Cap at 15 seconds per attempt
49
+ }
50
+
51
+ // Fallback (shouldn't reach here, but for type safety)
52
+ return await getEnhancedPageInfo(page);
53
+ }
54
+
55
+ private static calculateTotalWaitTime(maxAttempts: number): number {
56
+ let total = 0;
57
+ let backoffMs = 1000;
58
+ for (let i = 1; i < maxAttempts; i++) {
59
+ total += backoffMs;
60
+ backoffMs = Math.min(backoffMs * 1.6, 15000);
61
+ }
62
+ return Math.round(total);
63
+ }
64
+ }
65
+