testchimp-runner-core 0.0.34 → 0.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/execution-service.d.ts +1 -4
  2. package/dist/execution-service.d.ts.map +1 -1
  3. package/dist/execution-service.js +155 -468
  4. package/dist/execution-service.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +11 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/orchestrator/decision-parser.d.ts +18 -0
  10. package/dist/orchestrator/decision-parser.d.ts.map +1 -0
  11. package/dist/orchestrator/decision-parser.js +127 -0
  12. package/dist/orchestrator/decision-parser.js.map +1 -0
  13. package/dist/orchestrator/index.d.ts +4 -2
  14. package/dist/orchestrator/index.d.ts.map +1 -1
  15. package/dist/orchestrator/index.js +14 -2
  16. package/dist/orchestrator/index.js.map +1 -1
  17. package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
  18. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  19. package/dist/orchestrator/orchestrator-agent.js +534 -204
  20. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  21. package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
  22. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  23. package/dist/orchestrator/orchestrator-prompts.js +529 -247
  24. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  25. package/dist/orchestrator/page-som-handler.d.ts +106 -0
  26. package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
  27. package/dist/orchestrator/page-som-handler.js +1353 -0
  28. package/dist/orchestrator/page-som-handler.js.map +1 -0
  29. package/dist/orchestrator/som-types.d.ts +149 -0
  30. package/dist/orchestrator/som-types.d.ts.map +1 -0
  31. package/dist/orchestrator/som-types.js +87 -0
  32. package/dist/orchestrator/som-types.js.map +1 -0
  33. package/dist/orchestrator/tool-registry.d.ts +2 -0
  34. package/dist/orchestrator/tool-registry.d.ts.map +1 -1
  35. package/dist/orchestrator/tool-registry.js.map +1 -1
  36. package/dist/orchestrator/tools/index.d.ts +4 -1
  37. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  38. package/dist/orchestrator/tools/index.js +7 -2
  39. package/dist/orchestrator/tools/index.js.map +1 -1
  40. package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
  41. package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
  42. package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
  43. package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
  44. package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
  45. package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
  46. package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
  47. package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
  48. package/dist/orchestrator/types.d.ts +23 -1
  49. package/dist/orchestrator/types.d.ts.map +1 -1
  50. package/dist/orchestrator/types.js +11 -1
  51. package/dist/orchestrator/types.js.map +1 -1
  52. package/dist/scenario-service.d.ts +5 -0
  53. package/dist/scenario-service.d.ts.map +1 -1
  54. package/dist/scenario-service.js +17 -0
  55. package/dist/scenario-service.js.map +1 -1
  56. package/dist/scenario-worker-class.d.ts +4 -0
  57. package/dist/scenario-worker-class.d.ts.map +1 -1
  58. package/dist/scenario-worker-class.js +18 -3
  59. package/dist/scenario-worker-class.js.map +1 -1
  60. package/dist/testing/agent-tester.d.ts +35 -0
  61. package/dist/testing/agent-tester.d.ts.map +1 -0
  62. package/dist/testing/agent-tester.js +84 -0
  63. package/dist/testing/agent-tester.js.map +1 -0
  64. package/dist/testing/ref-translator-tester.d.ts +44 -0
  65. package/dist/testing/ref-translator-tester.d.ts.map +1 -0
  66. package/dist/testing/ref-translator-tester.js +104 -0
  67. package/dist/testing/ref-translator-tester.js.map +1 -0
  68. package/dist/utils/hierarchical-selector.d.ts +47 -0
  69. package/dist/utils/hierarchical-selector.d.ts.map +1 -0
  70. package/dist/utils/hierarchical-selector.js +212 -0
  71. package/dist/utils/hierarchical-selector.js.map +1 -0
  72. package/dist/utils/page-info-retry.d.ts +14 -0
  73. package/dist/utils/page-info-retry.d.ts.map +1 -0
  74. package/dist/utils/page-info-retry.js +60 -0
  75. package/dist/utils/page-info-retry.js.map +1 -0
  76. package/dist/utils/page-info-utils.d.ts +1 -0
  77. package/dist/utils/page-info-utils.d.ts.map +1 -1
  78. package/dist/utils/page-info-utils.js +46 -18
  79. package/dist/utils/page-info-utils.js.map +1 -1
  80. package/dist/utils/ref-attacher.d.ts +21 -0
  81. package/dist/utils/ref-attacher.d.ts.map +1 -0
  82. package/dist/utils/ref-attacher.js +149 -0
  83. package/dist/utils/ref-attacher.js.map +1 -0
  84. package/dist/utils/ref-translator.d.ts +49 -0
  85. package/dist/utils/ref-translator.d.ts.map +1 -0
  86. package/dist/utils/ref-translator.js +276 -0
  87. package/dist/utils/ref-translator.js.map +1 -0
  88. package/package.json +6 -1
  89. package/RELEASE_0.0.26.md +0 -165
  90. package/RELEASE_0.0.27.md +0 -236
  91. package/RELEASE_0.0.28.md +0 -286
  92. package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
  93. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
  94. package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
  95. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
  96. package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
  97. package/plandocs/INTEGRATION_COMPLETE.md +0 -322
  98. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
  99. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
  100. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
  101. package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
  102. package/plandocs/PHASE_1_COMPLETE.md +0 -165
  103. package/plandocs/PHASE_1_SUMMARY.md +0 -184
  104. package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
  105. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
  106. package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
  107. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
  108. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
  109. package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
  110. package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
  111. package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
  112. package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
  113. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
  114. package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
  115. package/src/auth-config.ts +0 -84
  116. package/src/credit-usage-service.ts +0 -188
  117. package/src/env-loader.ts +0 -103
  118. package/src/execution-service.ts +0 -1413
  119. package/src/file-handler.ts +0 -104
  120. package/src/index.ts +0 -422
  121. package/src/llm-facade.ts +0 -821
  122. package/src/llm-provider.ts +0 -53
  123. package/src/model-constants.ts +0 -35
  124. package/src/orchestrator/index.ts +0 -34
  125. package/src/orchestrator/orchestrator-agent.ts +0 -862
  126. package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
  127. package/src/orchestrator/orchestrator-prompts.ts +0 -474
  128. package/src/orchestrator/tool-registry.ts +0 -182
  129. package/src/orchestrator/tools/check-page-ready.ts +0 -75
  130. package/src/orchestrator/tools/extract-data.ts +0 -92
  131. package/src/orchestrator/tools/index.ts +0 -12
  132. package/src/orchestrator/tools/inspect-page.ts +0 -42
  133. package/src/orchestrator/tools/recall-history.ts +0 -72
  134. package/src/orchestrator/tools/take-screenshot.ts +0 -128
  135. package/src/orchestrator/tools/verify-action-result.ts +0 -159
  136. package/src/orchestrator/types.ts +0 -248
  137. package/src/playwright-mcp-service.ts +0 -224
  138. package/src/progress-reporter.ts +0 -144
  139. package/src/prompts.ts +0 -842
  140. package/src/providers/backend-proxy-llm-provider.ts +0 -91
  141. package/src/providers/local-llm-provider.ts +0 -38
  142. package/src/scenario-service.ts +0 -232
  143. package/src/scenario-worker-class.ts +0 -1089
  144. package/src/script-utils.ts +0 -203
  145. package/src/types.ts +0 -239
  146. package/src/utils/browser-utils.ts +0 -348
  147. package/src/utils/coordinate-converter.ts +0 -162
  148. package/src/utils/page-info-utils.ts +0 -250
  149. package/testchimp-runner-core-0.0.33.tgz +0 -0
  150. package/tsconfig.json +0 -19
@@ -5,13 +5,16 @@
5
5
  */
6
6
  Object.defineProperty(exports, "__esModule", { value: true });
7
7
  exports.OrchestratorAgent = void 0;
8
- const test_1 = require("@playwright/test");
9
8
  const progress_reporter_1 = require("../progress-reporter");
10
9
  const page_info_utils_1 = require("../utils/page-info-utils");
11
10
  const coordinate_converter_1 = require("../utils/coordinate-converter");
12
11
  const model_constants_1 = require("../model-constants");
13
12
  const types_1 = require("./types");
14
13
  const orchestrator_prompts_1 = require("./orchestrator-prompts");
14
+ const page_info_retry_1 = require("../utils/page-info-retry");
15
+ const decision_parser_1 = require("./decision-parser");
16
+ const page_som_handler_1 = require("./page-som-handler");
17
+ const som_types_1 = require("./som-types");
15
18
  /**
16
19
  * Orchestrator Agent - manages step execution with tool use and memory
17
20
  */
@@ -24,6 +27,11 @@ class OrchestratorAgent {
24
27
  this.progressReporter = progressReporter;
25
28
  this.logger = logger;
26
29
  this.debugMode = debugMode || false;
30
+ this.decisionParser = new decision_parser_1.DecisionParser(logger);
31
+ // Initialize SoM handler if enabled
32
+ if (this.config.useSoM) {
33
+ this.somHandler = new page_som_handler_1.PageSoMHandler(null, this.logger);
34
+ }
27
35
  }
28
36
  setDebugMode(enabled) {
29
37
  this.debugMode = enabled;
@@ -31,11 +39,12 @@ class OrchestratorAgent {
31
39
  /**
32
40
  * Execute a single step of the scenario
33
41
  */
34
- async executeStep(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, jobId) {
42
+ async executeStep(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, jobId, priorSteps, // NEW: For repair mode (undefined for script gen)
43
+ nextSteps // NEW: For repair mode (undefined for script gen)
44
+ ) {
35
45
  this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
36
46
  this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
37
47
  let iteration = 0;
38
- let previousReflection = undefined;
39
48
  let noteToSelf = memory.latestNote; // Start with note from previous step
40
49
  const commandsExecuted = [];
41
50
  let consecutiveFailures = 0; // Track consecutive iterations with failed commands
@@ -44,24 +53,45 @@ class OrchestratorAgent {
44
53
  iteration++;
45
54
  this.logger?.(`\n[Orchestrator] === Iteration ${iteration}/${this.config.maxIterationsPerStep} ===`);
46
55
  // Build context for agent
47
- const context = await this.buildAgentContext(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, previousReflection, consecutiveFailures, noteToSelf // NEW: Pass note from previous iteration
56
+ const context = await this.buildAgentContext(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, consecutiveFailures, noteToSelf, // Pass note from previous iteration
57
+ priorSteps, // NEW: Pass repair context
58
+ nextSteps // NEW: Pass repair context
48
59
  );
49
60
  // Call agent to make decision
50
61
  const decision = await this.callAgent(context, jobId, stepNumber, iteration, consecutiveFailures);
51
62
  // Log agent's reasoning
52
- this.logAgentDecision(decision, iteration);
63
+ this.decisionParser.log(decision, iteration);
53
64
  // Report progress
54
65
  await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
55
66
  // Execute tools if requested (tools are READ-ONLY, they don't change state)
56
67
  let toolResults = {};
57
- // ANTI-LOOP: Detect if agent is taking screenshots repeatedly without acting
68
+ // ANTI-LOOP: Detect and BLOCK screenshot loops (PER STEP)
69
+ const screenshotsThisStep = memory.history.filter(s => s.stepNumber === stepNumber &&
70
+ (s.code.includes('take_screenshot') || s.action.includes('Screenshot')));
58
71
  const recentScreenshots = memory.history.slice(-3).filter(s => s.code.includes('take_screenshot') || s.action.includes('Screenshot'));
59
- if (recentScreenshots.length >= 2 && iteration >= 3) {
72
+ if (screenshotsThisStep.length >= 3) {
73
+ this.logger?.(`[Orchestrator] 🚨 SCREENSHOT LOOP - ${screenshotsThisStep.length} screenshots THIS STEP! BLOCKING further screenshots`, 'error');
74
+ }
75
+ else if (recentScreenshots.length >= 2 && iteration >= 3) {
60
76
  this.logger?.(`[Orchestrator] ⚠️ WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
61
- this.logger?.(`[Orchestrator] 💭 System: Stop gathering info, START ACTING with available selectors`);
62
77
  }
63
78
  if (decision.toolCalls && decision.toolCalls.length > 0) {
64
- toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
79
+ // ENFORCE: Block screenshot tool calls if too many taken IN THIS STEP
80
+ if (screenshotsThisStep.length >= 3) {
81
+ decision.toolCalls = decision.toolCalls.filter(tc => tc.name !== 'take_screenshot');
82
+ if (decision.toolCalls.length === 0) {
83
+ this.logger?.(`[Orchestrator] 🚫 REJECTED screenshot tool call - loop detected. Agent must ACT.`, 'warn');
84
+ toolResults = [{
85
+ toolName: 'take_screenshot',
86
+ success: false,
87
+ error: 'SYSTEM BLOCKED: Too many screenshots taken. You must use existing DOM snapshots and execute commands now. Analysis paralysis detected.',
88
+ data: null
89
+ }];
90
+ }
91
+ }
92
+ if (decision.toolCalls.length > 0) {
93
+ toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber, context.currentPageInfo.refMap);
94
+ }
65
95
  // If agent wants to wait for tool results before proceeding, call agent again
66
96
  if (decision.needsToolResults) {
67
97
  const updatedContext = { ...context, toolResults };
@@ -80,7 +110,7 @@ class OrchestratorAgent {
80
110
  if (decision.blockerDetected && decision.blockerDetected.clearingCommands && decision.blockerDetected.clearingCommands.length > 0) {
81
111
  this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
82
112
  this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
83
- const blockerResult = await this.executeCommandsSequentially(decision.blockerDetected.clearingCommands, page, memory, stepNumber, iteration, jobId);
113
+ const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, iteration, jobId);
84
114
  // Add blocker commands with comment to output
85
115
  if (blockerResult.executed.length > 0) {
86
116
  commandsExecuted.push(`// Blocker: ${decision.blockerDetected.description}`);
@@ -94,8 +124,8 @@ class OrchestratorAgent {
94
124
  }
95
125
  }
96
126
  // Execute main commands (only if no blocker failure)
97
- if (decision.commands && decision.commands.length > 0 && !iterationHadFailure) {
98
- const executeResult = await this.executeCommandsSequentially(decision.commands, page, memory, stepNumber, iteration, jobId);
127
+ if (!iterationHadFailure && decision.commands && decision.commands.length > 0) {
128
+ const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, iteration, jobId);
99
129
  commandsExecuted.push(...executeResult.executed);
100
130
  // Track failures
101
131
  if (!executeResult.allSucceeded) {
@@ -107,8 +137,8 @@ class OrchestratorAgent {
107
137
  consecutiveFailures = 0; // Reset on success
108
138
  }
109
139
  }
110
- // Handle coordinate-based actions (NEW - fallback when selectors fail)
111
- if (decision.coordinateAction && !iterationHadFailure) {
140
+ // Handle coordinate-based actions (NEW - fallback when selectors fail) - ONLY if enabled
141
+ if (this.config.enableCoordinateMode && decision.coordinateAction && !iterationHadFailure) {
112
142
  coordinateAttempts++;
113
143
  this.logger?.(`[Orchestrator] 🎯 Coordinate Action (attempt ${coordinateAttempts}/2): ${decision.coordinateAction.action} at (${decision.coordinateAction.xPercent}%, ${decision.coordinateAction.yPercent}%)`);
114
144
  try {
@@ -120,7 +150,7 @@ class OrchestratorAgent {
120
150
  this.logger?.(`[Orchestrator] Generated commands:`);
121
151
  coordCommands.forEach(cmd => this.logger?.(` ${cmd}`));
122
152
  // Execute coordinate commands
123
- const coordResult = await this.executeCommandsSequentially(coordCommands, page, memory, stepNumber, iteration, jobId);
153
+ const coordResult = await this.executeCommands(coordCommands, page, memory, stepNumber, iteration, jobId);
124
154
  commandsExecuted.push(...coordResult.executed);
125
155
  if (!coordResult.allSucceeded) {
126
156
  this.logger?.(`[Orchestrator] ❌ Coordinate action failed (Playwright error)`);
@@ -263,9 +293,7 @@ class OrchestratorAgent {
263
293
  memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
264
294
  }
265
295
  }
266
- // Store self-reflection for next iteration
267
- previousReflection = decision.selfReflection;
268
- // Store note to future self (NEW - tactical memory across iterations AND steps)
296
+ // Store note to future self (tactical memory across iterations AND steps)
269
297
  if (decision.noteToFutureSelf) {
270
298
  noteToSelf = {
271
299
  fromIteration: iteration,
@@ -312,15 +340,46 @@ class OrchestratorAgent {
312
340
  /**
313
341
  * Build context for agent
314
342
  */
315
- async buildAgentContext(page, currentStepGoal, stepNumber, totalSteps, scenarioSteps, memory, previousReflection, consecutiveFailures, noteFromPreviousIteration // NEW
343
+ async buildAgentContext(page, currentStepGoal, stepNumber, totalSteps, scenarioSteps, memory, consecutiveFailures, noteFromPreviousIteration, priorSteps, // NEW: For repair mode
344
+ nextSteps // NEW: For repair mode
316
345
  ) {
317
346
  // Get fresh DOM
318
347
  const currentPageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(page);
319
348
  const currentURL = page.url();
320
349
  // Get recent steps
321
350
  const recentSteps = memory.history.slice(-this.config.recentStepsCount);
351
+ // SoM integration: Update markers and capture screenshot with visual IDs
352
+ let somScreenshot = undefined;
353
+ let somElementMap = undefined;
354
+ if (this.config.useSoM && this.somHandler) {
355
+ try {
356
+ if (!this.somHandler) {
357
+ this.somHandler = new page_som_handler_1.PageSoMHandler(page, this.logger);
358
+ }
359
+ else {
360
+ this.somHandler.setPage(page);
361
+ }
362
+ // Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
363
+ try {
364
+ await page.waitForLoadState('domcontentloaded', { timeout: 5000 });
365
+ }
366
+ catch (error) {
367
+ // Page already loaded or timeout - continue
368
+ }
369
+ // Update SoM markers
370
+ await this.somHandler.updateSom();
371
+ // Get screenshot WITH markers (viewport only - agent can scroll or use take_screenshot for full page)
372
+ somScreenshot = await this.somHandler.getScreenshot(true, false, 60);
373
+ // Get element map for disambiguation
374
+ somElementMap = this.somHandler.getSomElementMap();
375
+ this.logger?.(`[Orchestrator] SoM screenshot captured for agent decision-making`, 'log');
376
+ }
377
+ catch (error) {
378
+ this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
379
+ }
380
+ }
322
381
  // Build context
323
- return {
382
+ const context = {
324
383
  overallGoal: scenarioSteps.join('\n'),
325
384
  currentStepGoal,
326
385
  stepNumber,
@@ -332,9 +391,17 @@ class OrchestratorAgent {
332
391
  recentSteps,
333
392
  experiences: memory.experiences,
334
393
  extractedData: memory.extractedData,
335
- previousIterationGuidance: previousReflection,
336
- noteFromPreviousIteration // NEW: Pass tactical note from previous iteration
394
+ noteFromPreviousIteration, // Pass tactical note from previous iteration
395
+ somScreenshot, // SoM screenshot with visual markers (current)
396
+ somElementMap, // SoM element details for disambiguation
397
+ priorSteps, // NEW: Repair context (undefined for script gen)
398
+ nextSteps // NEW: Repair context (undefined for script gen)
337
399
  };
400
+ // Save current screenshot as previous for next iteration (for tool access)
401
+ if (somScreenshot) {
402
+ this.previousSomScreenshot = somScreenshot;
403
+ }
404
+ return context;
338
405
  }
339
406
  /**
340
407
  * Call agent to make decision
@@ -346,10 +413,20 @@ class OrchestratorAgent {
346
413
  const useCoordinateMode = consecutiveFailures !== undefined && consecutiveFailures >= 3;
347
414
  // Build appropriate system prompt based on mode
348
415
  const toolDescriptions = this.toolRegistry.generateToolDescriptions();
349
- const systemPrompt = useCoordinateMode
350
- ? orchestrator_prompts_1.OrchestratorPrompts.buildCoordinateSystemPrompt()
351
- : orchestrator_prompts_1.OrchestratorPrompts.buildSystemPrompt(toolDescriptions);
352
- const userPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures);
416
+ let systemPrompt;
417
+ if (this.config.useSoM) {
418
+ // SoM mode: Use visual element identification
419
+ systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates);
420
+ }
421
+ else if (useCoordinateMode) {
422
+ // Coordinate mode: Fallback when selectors fail
423
+ systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildCoordinateSystemPrompt();
424
+ }
425
+ else {
426
+ // Standard mode: DOM-based selectors
427
+ systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSystemPrompt(toolDescriptions, this.config.enableCoordinateMode);
428
+ }
429
+ const userPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures, this.config.enableCoordinateMode);
353
430
  // Log prompt lengths for monitoring
354
431
  const systemLength = systemPrompt.length;
355
432
  const userLength = userPrompt.length;
@@ -363,6 +440,11 @@ class OrchestratorAgent {
363
440
  systemPrompt,
364
441
  userPrompt
365
442
  };
443
+ // Include current SoM screenshot as image
444
+ if (context.somScreenshot) {
445
+ llmRequest.imageUrl = context.somScreenshot;
446
+ this.logger?.(`[Orchestrator] Including SoM screenshot in LLM request`, 'log');
447
+ }
366
448
  const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
367
449
  // Report token usage
368
450
  if (response.usage && this.progressReporter?.onTokensUsed) {
@@ -383,7 +465,7 @@ class OrchestratorAgent {
383
465
  this.logger?.(`[Orchestrator] ⚠ No usage data in LLM response`, 'warn');
384
466
  }
385
467
  // Parse response
386
- return this.parseAgentDecision(response.answer);
468
+ return this.decisionParser.parse(response.answer);
387
469
  }
388
470
  catch (error) {
389
471
  this.logger?.(`[Orchestrator] ✗ Agent call failed: ${error.message}`, 'error');
@@ -395,59 +477,20 @@ class OrchestratorAgent {
395
477
  };
396
478
  }
397
479
  }
398
- /**
399
- * Parse agent decision from LLM response
400
- */
401
- parseAgentDecision(response) {
402
- try {
403
- // Extract JSON from response
404
- const jsonMatch = response.match(/\{[\s\S]*\}/);
405
- if (!jsonMatch) {
406
- this.logger?.(`[Orchestrator] ✗ No JSON found in LLM response`, 'error');
407
- this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
408
- throw new Error('No JSON found in response');
409
- }
410
- const parsed = JSON.parse(jsonMatch[0]);
411
- // Validate required fields
412
- // Accept either "reasoning" or "statusReasoning" (LLMs sometimes only provide one)
413
- if (!parsed.status || (!parsed.reasoning && !parsed.statusReasoning)) {
414
- this.logger?.(`[Orchestrator] ✗ Missing required fields in parsed JSON`, 'error');
415
- this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
416
- this.logger?.(`[Orchestrator] 📄 PARSED JSON:\n${JSON.stringify(parsed, null, 2)}`, 'error');
417
- this.logger?.(`[Orchestrator] ❌ Has status: ${!!parsed.status}, Has reasoning: ${!!parsed.reasoning}, Has statusReasoning: ${!!parsed.statusReasoning}`, 'error');
418
- throw new Error('Missing required fields: status and (reasoning or statusReasoning)');
419
- }
420
- // Normalize: if reasoning is missing but statusReasoning exists, use statusReasoning as reasoning
421
- if (!parsed.reasoning && parsed.statusReasoning) {
422
- parsed.reasoning = parsed.statusReasoning;
423
- }
424
- return parsed;
425
- }
426
- catch (error) {
427
- this.logger?.(`[Orchestrator] ✗ Failed to parse agent decision: ${error.message}`, 'error');
428
- // Only log full response if not already logged above
429
- if (!error.message.includes('Missing required fields') && !error.message.includes('No JSON found')) {
430
- this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
431
- }
432
- // Return fallback
433
- return {
434
- status: 'stuck',
435
- statusReasoning: 'Failed to parse agent response',
436
- reasoning: `Parse error: ${error.message}`
437
- };
438
- }
439
- }
440
480
  /**
441
481
  * Execute tools
442
482
  */
443
- async executeTools(toolCalls, page, memory, stepNumber) {
483
+ async executeTools(toolCalls, page, memory, stepNumber, refMap) {
444
484
  this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
445
485
  const results = {};
446
486
  const toolContext = {
447
487
  page,
448
488
  memory,
449
489
  stepNumber,
450
- logger: this.logger
490
+ logger: this.logger,
491
+ refMap, // Pass refMap for interact_with_ref tool
492
+ previousSomScreenshot: this.previousSomScreenshot, // For view_previous_screenshot tool
493
+ somHandler: this.somHandler // For refresh_som_markers tool
451
494
  };
452
495
  for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
453
496
  this.logger?.(`[Orchestrator] ▶ ${toolCall.name}(${JSON.stringify(toolCall.params).substring(0, 50)}...)`);
@@ -463,45 +506,214 @@ class OrchestratorAgent {
463
506
  return results;
464
507
  }
465
508
  /**
466
- * Execute commands sequentially with SHARED context (variables persist across commands)
509
+ * Parse SomCommand from command object
467
510
  */
468
- async executeCommandsSequentially(commands, page, memory, stepNumber, iteration, jobId) {
469
- this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s) in shared context`);
511
+ parseSomCommand(cmd) {
512
+ if (typeof cmd === 'object' && cmd.action) {
513
+ // Valid if: has elementRef, OR has coord, OR is navigation action
514
+ const isNavigationAction = ['navigate', 'goBack', 'goForward', 'reload'].includes(cmd.action);
515
+ const hasValidTarget = cmd.elementRef || cmd.coord || isNavigationAction;
516
+ if (hasValidTarget) {
517
+ return {
518
+ elementRef: cmd.elementRef,
519
+ coord: cmd.coord,
520
+ action: cmd.action,
521
+ value: cmd.value,
522
+ fromCoord: cmd.fromCoord,
523
+ toCoord: cmd.toCoord,
524
+ force: cmd.force,
525
+ scrollAmount: cmd.scrollAmount,
526
+ scrollDirection: cmd.scrollDirection,
527
+ button: cmd.button,
528
+ clickCount: cmd.clickCount,
529
+ modifiers: cmd.modifiers,
530
+ delay: cmd.delay,
531
+ timeout: cmd.timeout
532
+ };
533
+ }
534
+ }
535
+ return null;
536
+ }
537
+ /**
538
+ * Execute commands (mix of ref and playwright commands)
539
+ */
540
+ async executeCommands(commands, page, memory, stepNumber, iteration, jobId) {
541
+ this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)`);
470
542
  const executed = [];
471
- const limitedCommands = commands.slice(0, this.config.maxCommandsPerIteration);
472
- // Build execution with shared context (all commands share scope - variables persist)
473
- const commandsWithTracking = limitedCommands.map((cmd, i) => {
474
- return `
475
- // Command ${i + 1}/${limitedCommands.length}
543
+ if (commands.length === 0) {
544
+ return { executed: [], allSucceeded: true };
545
+ }
546
+ // SoM mode: Execute commands through PageSoMHandler
547
+ if (this.config.useSoM && this.somHandler) {
548
+ this.logger?.(`[Orchestrator] Using SoM mode for command execution`, 'log');
549
+ for (let i = 0; i < commands.length; i++) {
550
+ const cmd = commands[i];
551
+ // Check if verification or action command
552
+ if ((0, som_types_1.isSomVerification)(cmd)) {
553
+ // Handle verification command
554
+ try {
555
+ const result = await this.somHandler.executeVerification(cmd);
556
+ // Always add command to executed array (even if verification failed)
557
+ // Scripts should contain the expect even if it fails during generation
558
+ if (result.playwrightCommand) {
559
+ executed.push(result.playwrightCommand);
560
+ }
561
+ if (result.success) {
562
+ this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Verification passed`, 'log');
563
+ memory.history.push({
564
+ stepNumber,
565
+ iteration,
566
+ action: `Verification ${i + 1}/${commands.length}: ${cmd.verificationType}`,
567
+ code: result.playwrightCommand,
568
+ result: 'success',
569
+ observation: `Verified: ${cmd.description || cmd.expected}`,
570
+ url: page.url(),
571
+ timestamp: Date.now()
572
+ });
573
+ }
574
+ else {
575
+ this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification failed (non-fatal): ${result.error}`, 'warn');
576
+ memory.history.push({
577
+ stepNumber,
578
+ iteration,
579
+ action: `Verification ${i + 1}/${commands.length} - FAILED`,
580
+ code: result.playwrightCommand || JSON.stringify(cmd),
581
+ result: 'failure',
582
+ observation: `Failed: ${result.error}`,
583
+ error: result.error,
584
+ url: page.url(),
585
+ timestamp: Date.now()
586
+ });
587
+ // Continue anyway - verification failures are non-blocking for script generation
588
+ }
589
+ // Small delay between commands
590
+ if (i < commands.length - 1) {
591
+ await page.waitForTimeout(300);
592
+ }
593
+ }
594
+ catch (error) {
595
+ this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification exception: ${error.message}`, 'error');
596
+ }
597
+ }
598
+ else if ((0, som_types_1.isSomCommand)(cmd)) {
599
+ // Handle action command (existing logic)
600
+ const somCommand = cmd;
601
+ try {
602
+ const result = await this.somHandler.runCommand(somCommand, this.config.somUseSomIdBasedCommands || false);
603
+ if (result.status === som_types_1.CommandRunStatus.SUCCESS && result.successAttempt) {
604
+ this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] SoM action succeeded`, 'log');
605
+ executed.push(result.successAttempt.command);
606
+ memory.history.push({
607
+ stepNumber,
608
+ iteration,
609
+ action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action}`,
610
+ code: result.successAttempt.command,
611
+ result: 'success',
612
+ observation: 'Executed successfully',
613
+ url: page.url(),
614
+ timestamp: Date.now()
615
+ });
616
+ // Small delay for form validation/animations
617
+ if (i < commands.length - 1) {
618
+ await page.waitForTimeout(300);
619
+ }
620
+ }
621
+ else {
622
+ this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action failed: ${result.error}`, 'error');
623
+ memory.history.push({
624
+ stepNumber,
625
+ iteration,
626
+ action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action} - FAILED`,
627
+ code: JSON.stringify(somCommand),
628
+ result: 'failure',
629
+ observation: `Failed: ${result.error}`,
630
+ error: result.error,
631
+ url: page.url(),
632
+ timestamp: Date.now()
633
+ });
634
+ // Refresh SoM after batch (DOM may have changed)
635
+ if (this.somHandler && page) {
636
+ this.somHandler.setPage(page);
637
+ await this.somHandler.updateSom();
638
+ }
639
+ return { executed, allSucceeded: false };
640
+ }
641
+ }
642
+ catch (error) {
643
+ this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action exception: ${error.message}`, 'error');
644
+ memory.history.push({
645
+ stepNumber,
646
+ iteration,
647
+ action: `SoM Action ${i + 1}/${commands.length} - EXCEPTION`,
648
+ code: JSON.stringify(somCommand),
649
+ result: 'failure',
650
+ observation: `Exception: ${error.message}`,
651
+ error: error.message,
652
+ url: page.url(),
653
+ timestamp: Date.now()
654
+ });
655
+ // Refresh SoM after batch (DOM may have changed)
656
+ if (this.somHandler && page) {
657
+ this.somHandler.setPage(page);
658
+ await this.somHandler.updateSom();
659
+ }
660
+ return { executed, allSucceeded: false };
661
+ }
662
+ }
663
+ else {
664
+ this.logger?.(`[Orchestrator] ⚠ [${i + 1}/${commands.length}] Not a valid SoM command/verification, skipping`, 'warn');
665
+ }
666
+ }
667
+ // Always wait for page to stabilize after command batch
668
+ // This handles both explicit navigation AND clicks that trigger navigation/SPA routes
669
+ try {
670
+ this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
671
+ // Use networkidle with short timeout to catch navigation without blocking on SPAs with continuous requests
672
+ await page.waitForLoadState('networkidle', { timeout: 3000 });
673
+ this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
674
+ }
675
+ catch (error) {
676
+ // If networkidle times out, fall back to domcontentloaded
677
+ try {
678
+ await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
679
+ this.logger?.(`[Orchestrator] Page loaded (domcontentloaded)`, 'log');
680
+ }
681
+ catch (error2) {
682
+ this.logger?.(`[Orchestrator] Page load wait timeout (continuing anyway)`, 'warn');
683
+ }
684
+ }
685
+ // Refresh SoM after batch (DOM may have changed and page is now stable)
686
+ if (this.somHandler && page) {
687
+ this.somHandler.setPage(page);
688
+ await this.somHandler.updateSom();
689
+ }
690
+ return { executed, allSucceeded: true };
691
+ }
692
+ // Standard mode: Execute all commands in sequence with small delay between them
693
+ // Delay helps with form validation, button enabling, and animations
694
+ const wrappedCode = commands.map((cmd, i) => `
695
+ // Command ${i + 1}/${commands.length}
476
696
  try {
477
697
  ${cmd}
478
698
  __results.push({ index: ${i}, success: true });
699
+ ${i < commands.length - 1 ? 'await page.waitForTimeout(300);' : ''} // Small delay for form validation/animations
479
700
  } catch (error) {
480
701
  __results.push({ index: ${i}, success: false, error: error.message });
481
- throw error; // Stop on first failure
482
- }`;
483
- }).join('\n');
484
- const wrappedCode = `
485
- const __results = [];
486
- ${commandsWithTracking}
487
- return __results;
488
- `;
702
+ throw error;
703
+ }`).join('\n');
704
+ const fullCode = `const __results = []; ${wrappedCode} return __results;`;
489
705
  try {
490
- // Execute in shared context - variables declared here persist for entire scenario
491
- // Pass both page and expect to make Playwright assertions available
492
- const func = new Function('page', 'expect', 'return (async () => { ' + wrappedCode + ' })()');
493
- const results = await func(page, test_1.expect);
494
- // Record results for each command
495
- for (let i = 0; i < limitedCommands.length; i++) {
496
- const cmd = limitedCommands[i];
706
+ const func = new Function('page', 'expect', 'return (async () => { ' + fullCode + ' })()');
707
+ const results = await func(page, global.expect);
708
+ for (let i = 0; i < commands.length; i++) {
709
+ const cmd = commands[i];
497
710
  const result = results[i];
498
711
  if (result && result.success) {
499
- this.logger?.(`[Orchestrator] ✓ [${i + 1}/${limitedCommands.length}] Success`);
500
- // Record in history
712
+ this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Success`);
501
713
  memory.history.push({
502
714
  stepNumber,
503
715
  iteration,
504
- action: `Command ${i + 1}/${limitedCommands.length}`,
716
+ action: `Command ${i + 1}/${commands.length}`,
505
717
  code: cmd,
506
718
  result: 'success',
507
719
  observation: 'Executed successfully',
@@ -511,122 +723,27 @@ return __results;
511
723
  executed.push(cmd);
512
724
  }
513
725
  }
514
- // Cap history
515
726
  if (memory.history.length > this.config.maxHistorySize) {
516
727
  memory.history = memory.history.slice(-this.config.maxHistorySize);
517
728
  }
518
729
  return { executed, allSucceeded: true };
519
730
  }
520
731
  catch (error) {
521
- // One of the commands failed - find which one
522
732
  const errorMessage = error.message || String(error);
523
- // Capture page state for debug logging
524
- let pageStateDebug = '';
525
- if (this.debugMode) {
526
- try {
527
- const pageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(page);
528
- pageStateDebug = `
529
- === DEBUG: PAGE STATE AT FAILURE ===
530
- URL: ${page.url()}
531
- Title: ${pageInfo.title}
532
-
533
- INTERACTIVE ELEMENTS:
534
- ${pageInfo.formattedElements}
535
-
536
- ARIA SNAPSHOT:
537
- ${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
538
-
539
- ====================================`;
540
- }
541
- catch (debugError) {
542
- pageStateDebug = `Failed to capture page state: ${debugError.message}`;
543
- }
544
- }
545
- // Record all that succeeded, then the failure
546
- for (let i = 0; i < limitedCommands.length; i++) {
547
- const cmd = limitedCommands[i];
548
- // This is a failed command (error happened here or earlier)
549
- if (executed.length <= i) {
550
- this.logger?.(`[Orchestrator] ✗ [${i + 1}/${limitedCommands.length}] Failed: ${errorMessage}`, 'error');
551
- // Log detailed debug info
552
- if (this.debugMode && pageStateDebug) {
553
- this.logger?.(pageStateDebug, 'debug');
554
- }
555
- memory.history.push({
556
- stepNumber,
557
- iteration,
558
- action: `Command ${i + 1}/${limitedCommands.length} - FAILED`,
559
- code: cmd,
560
- result: 'failure',
561
- observation: `Failed with error: ${errorMessage}. This selector likely doesn't exist or is incorrect.`,
562
- error: errorMessage,
563
- url: page.url(),
564
- timestamp: Date.now()
565
- });
566
- if (i < limitedCommands.length - 1) {
567
- this.logger?.(`[Orchestrator] ⚠ Skipping remaining ${limitedCommands.length - i - 1} command(s)`, 'warn');
568
- }
569
- break;
570
- }
571
- }
572
- return { executed, allSucceeded: false };
573
- }
574
- }
575
- /**
576
- * Execute a single command
577
- */
578
- async executeCommand(cmd, page) {
579
- // Wrap in async function and execute
580
- const wrapped = `(async () => { ${cmd} })()`;
581
- try {
582
- await eval(wrapped);
583
- }
584
- catch (error) {
585
- // If eval fails, try direct execution with page context
586
- // Pass both page and expect to make Playwright assertions available
587
- const func = new Function('page', 'expect', `return (async () => { ${cmd} })()`);
588
- await func(page, test_1.expect);
589
- }
590
- }
591
- /**
592
- * Log agent decision
593
- */
594
- logAgentDecision(decision, iteration) {
595
- this.logger?.(`[Orchestrator] 💭 REASONING: ${decision.reasoning}`);
596
- if (decision.selfReflection) {
597
- this.logger?.(`[Orchestrator] 🧠 SELF-REFLECTION:`);
598
- this.logger?.(`[Orchestrator] Next: ${decision.selfReflection.guidanceForNext}`);
599
- if (decision.selfReflection.detectingLoop) {
600
- this.logger?.(`[Orchestrator] 🔄 LOOP DETECTED: ${decision.selfReflection.loopReasoning}`, 'warn');
601
- }
602
- }
603
- if (decision.toolCalls && decision.toolCalls.length > 0) {
604
- this.logger?.(`[Orchestrator] 🔧 TOOLS: ${decision.toolCalls.map(t => t.name).join(', ')}`);
605
- if (decision.toolReasoning) {
606
- this.logger?.(`[Orchestrator] 📋 Why: ${decision.toolReasoning}`);
607
- }
608
- }
609
- if (decision.blockerDetected) {
610
- this.logger?.(`[Orchestrator] 🚧 BLOCKER: ${decision.blockerDetected.description}`, 'warn');
611
- this.logger?.(`[Orchestrator] 🧹 Clearing with ${decision.blockerDetected.clearingCommands.length} command(s)`);
612
- }
613
- if (decision.stepReEvaluation?.detected) {
614
- this.logger?.(`[Orchestrator] 🔍 STEP RE-EVALUATION: ${decision.stepReEvaluation.issue}`, 'warn');
615
- this.logger?.(`[Orchestrator] 📝 Explanation: ${decision.stepReEvaluation.explanation}`);
616
- }
617
- if (decision.commands && decision.commands.length > 0) {
618
- this.logger?.(`[Orchestrator] 📝 COMMANDS (${decision.commands.length}):`);
619
- decision.commands.slice(0, 3).forEach((cmd, i) => {
620
- this.logger?.(`[Orchestrator] ${i + 1}. ${cmd.substring(0, 80)}...`);
733
+ this.logger?.(`[Orchestrator] Command execution failed: ${errorMessage}`, 'error');
734
+ memory.history.push({
735
+ stepNumber,
736
+ iteration,
737
+ action: `Command - FAILED`,
738
+ code: commands[executed.length] || '',
739
+ result: 'failure',
740
+ observation: `Failed: ${errorMessage}`,
741
+ error: errorMessage,
742
+ url: page.url(),
743
+ timestamp: Date.now()
621
744
  });
622
- if (decision.commands.length > 3) {
623
- this.logger?.(`[Orchestrator] ... and ${decision.commands.length - 3} more`);
624
- }
625
- if (decision.commandReasoning) {
626
- this.logger?.(`[Orchestrator] 💡 Why: ${decision.commandReasoning}`);
627
- }
745
+ return { executed, allSucceeded: false };
628
746
  }
629
- // Experiences will be logged when added to memory, no need to log here
630
747
  }
631
748
  /**
632
749
  * Report step progress
@@ -651,6 +768,219 @@ ${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
651
768
  agentStatus: decision.status
652
769
  });
653
770
  }
771
+ /**
772
+ * Execute exploration mode - agent autonomously explores to achieve journey goal
773
+ * Fires onStepProgress callbacks for each autonomous action (transparent to caller)
774
+ */
775
+ async executeExploration(page, explorationConfig, jobId) {
776
+ this.logger?.(`\n[Orchestrator] ========== EXPLORATION MODE ==========`);
777
+ this.logger?.(`[Orchestrator] 🎯 Journey Goal: ${explorationConfig.explorationPrompt}`);
778
+ if (explorationConfig.testDataPrompt) {
779
+ this.logger?.(`[Orchestrator] 📋 Test Data: ${explorationConfig.testDataPrompt}`);
780
+ }
781
+ const memory = {
782
+ history: [],
783
+ experiences: [],
784
+ extractedData: {}
785
+ };
786
+ const maxSteps = explorationConfig.maxExplorationSteps || 50;
787
+ let stepNumber = 0;
788
+ const commandsExecuted = [];
789
+ while (stepNumber < maxSteps) {
790
+ stepNumber++;
791
+ this.logger?.(`\n[Orchestrator] === Exploration Step ${stepNumber}/${maxSteps} ===`);
792
+ // Build exploratory context
793
+ const context = await this.buildExploratoryContext(page, explorationConfig.explorationPrompt, explorationConfig.testDataPrompt, memory, stepNumber, maxSteps);
794
+ // Call agent with exploratory prompt
795
+ const decision = await this.callExploratoryAgent(context, jobId, stepNumber);
796
+ this.decisionParser.log(decision, stepNumber);
797
+ // Report step start (fires JourneyRunner's beforeStepStart callback)
798
+ if (this.progressReporter?.onStepProgress) {
799
+ const stepInfo = {
800
+ jobId,
801
+ stepNumber,
802
+ stepId: `exploration-${stepNumber}-${Date.now()}`,
803
+ description: decision.reasoning,
804
+ code: '', // Will be filled after commands execute
805
+ status: progress_reporter_1.StepExecutionStatus.IN_PROGRESS,
806
+ wasRepaired: false
807
+ };
808
+ await this.progressReporter.onStepProgress(stepInfo);
809
+ }
810
+ // Execute tools if requested
811
+ if (decision.toolCalls && decision.toolCalls.length > 0) {
812
+ const toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
813
+ // If needs tool results, call agent again
814
+ if (decision.needsToolResults) {
815
+ const updatedContext = { ...context, toolResults };
816
+ const continuedDecision = await this.callExploratoryAgent(updatedContext, jobId, stepNumber);
817
+ decision.commands = continuedDecision.commands || decision.commands;
818
+ decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
819
+ decision.status = continuedDecision.status;
820
+ }
821
+ }
822
+ // Handle blocker clearing
823
+ if (decision.blockerDetected && decision.blockerDetected.clearingCommands) {
824
+ this.logger?.(`[Orchestrator] 🚧 Clearing blocker: ${decision.blockerDetected.description}`);
825
+ const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, 1, jobId);
826
+ commandsExecuted.push(...blockerResult.executed);
827
+ }
828
+ // Execute exploration commands
829
+ let commandsSucceeded = true;
830
+ if (decision.commands && decision.commands.length > 0) {
831
+ const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, 1, jobId);
832
+ commandsExecuted.push(...executeResult.executed);
833
+ commandsSucceeded = executeResult.allSucceeded;
834
+ }
835
+ // Report step completion (fires JourneyRunner's onStepComplete callback)
836
+ if (this.progressReporter?.onStepProgress) {
837
+ const stepInfo = {
838
+ jobId,
839
+ stepNumber,
840
+ stepId: `exploration-${stepNumber}-${Date.now()}`,
841
+ description: decision.reasoning,
842
+ code: decision.commands?.join('\n') || '',
843
+ status: commandsSucceeded ? progress_reporter_1.StepExecutionStatus.SUCCESS : progress_reporter_1.StepExecutionStatus.FAILURE,
844
+ error: commandsSucceeded ? undefined : 'Command execution failed',
845
+ wasRepaired: false
846
+ };
847
+ await this.progressReporter.onStepProgress(stepInfo);
848
+ }
849
+ // Add experiences (both app patterns AND exploration progress)
850
+ if (decision.experiences) {
851
+ memory.experiences.push(...decision.experiences);
852
+ if (memory.experiences.length > this.config.maxExperiences) {
853
+ memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
854
+ }
855
+ }
856
+ // Store note for next iteration
857
+ if (decision.noteToFutureSelf) {
858
+ memory.latestNote = {
859
+ fromIteration: stepNumber,
860
+ content: decision.noteToFutureSelf
861
+ };
862
+ }
863
+ // Check termination
864
+ if (decision.status === 'complete') {
865
+ this.logger?.(`[Orchestrator] ✅ Journey exploration complete: ${decision.statusReasoning}`);
866
+ return {
867
+ success: true,
868
+ commands: commandsExecuted,
869
+ iterations: stepNumber,
870
+ terminationReason: 'complete',
871
+ memory
872
+ };
873
+ }
874
+ else if (decision.status === 'stuck') {
875
+ this.logger?.(`[Orchestrator] ❌ Exploration stuck: ${decision.statusReasoning}`);
876
+ return {
877
+ success: false,
878
+ commands: commandsExecuted,
879
+ iterations: stepNumber,
880
+ terminationReason: 'agent_stuck',
881
+ memory,
882
+ error: decision.statusReasoning
883
+ };
884
+ }
885
+ }
886
+ // Hit max steps - not necessarily a failure
887
+ this.logger?.(`[Orchestrator] ⚠ Maximum exploration steps reached (budget limit)`);
888
+ return {
889
+ success: true, // Not a failure - just budget limit
890
+ commands: commandsExecuted,
891
+ iterations: stepNumber,
892
+ terminationReason: 'system_limit',
893
+ memory
894
+ };
895
+ }
896
+ async buildExploratoryContext(page, explorationPrompt, testDataPrompt, memory, stepNumber, maxSteps) {
897
+ // Wait for page to be ready and elements to appear (especially important after navigation)
898
+ const currentPageInfo = await page_info_retry_1.PageInfoRetry.getWithRetry(page);
899
+ const currentURL = page.url();
900
+ const recentSteps = memory.history.slice(-this.config.recentStepsCount);
901
+ // SoM integration for exploratory mode
902
+ let somScreenshot = undefined;
903
+ let somElementMap = undefined;
904
+ if (this.config.useSoM && this.somHandler) {
905
+ try {
906
+ this.somHandler.setPage(page);
907
+ // Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
908
+ try {
909
+ await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
910
+ }
911
+ catch (error) {
912
+ // Page already loaded or timeout - continue
913
+ }
914
+ // Update SoM markers
915
+ await this.somHandler.updateSom();
916
+ somScreenshot = await this.somHandler.getScreenshot(true, false, 60); // Viewport only - agent can scroll or request full page
917
+ // Get element map for disambiguation
918
+ somElementMap = this.somHandler.getSomElementMap();
919
+ this.logger?.(`[Orchestrator] SoM screenshot captured for exploratory agent`, 'log');
920
+ }
921
+ catch (error) {
922
+ this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
923
+ }
924
+ }
925
+ const context = {
926
+ overallGoal: explorationPrompt,
927
+ currentStepGoal: explorationPrompt, // Same as overall for single journey
928
+ stepNumber,
929
+ totalSteps: maxSteps,
930
+ completedSteps: [],
931
+ remainingSteps: [],
932
+ currentPageInfo,
933
+ currentURL,
934
+ recentSteps,
935
+ experiences: memory.experiences,
936
+ extractedData: memory.extractedData,
937
+ noteFromPreviousIteration: memory.latestNote,
938
+ testDataPrompt, // CRITICAL: Store testDataPrompt in context
939
+ somScreenshot, // SoM screenshot for exploratory mode (current)
940
+ somElementMap // SoM element details for disambiguation
941
+ };
942
+ // Save current screenshot as previous for next iteration (for tool access)
943
+ if (somScreenshot) {
944
+ this.previousSomScreenshot = somScreenshot;
945
+ }
946
+ return context;
947
+ }
948
+ async callExploratoryAgent(context, jobId, stepNumber) {
949
+ const toolDescriptions = this.toolRegistry.generateToolDescriptions();
950
+ // Use SoM system prompt if in SoM mode, otherwise use standard exploratory prompt
951
+ const systemPrompt = this.config.useSoM
952
+ ? orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates)
953
+ : orchestrator_prompts_1.OrchestratorPrompts.buildExploratorySystemPrompt(toolDescriptions);
954
+ const userPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildExploratoryUserPrompt(context, context.overallGoal, context.testDataPrompt, // Pass testDataPrompt from context
955
+ stepNumber, context.totalSteps);
956
+ const llmRequest = {
957
+ model: model_constants_1.DEFAULT_MODEL,
958
+ systemPrompt,
959
+ userPrompt
960
+ };
961
+ // Include current SoM screenshot as image
962
+ if (context.somScreenshot) {
963
+ llmRequest.imageUrl = context.somScreenshot;
964
+ this.logger?.(`[Orchestrator] Including SoM screenshot in exploratory LLM request`, 'log');
965
+ }
966
+ const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
967
+ // Report token usage
968
+ if (response.usage && this.progressReporter?.onTokensUsed) {
969
+ await this.progressReporter.onTokensUsed({
970
+ jobId,
971
+ stepNumber,
972
+ iteration: 1,
973
+ inputTokens: response.usage.inputTokens,
974
+ outputTokens: response.usage.outputTokens,
975
+ includesImage: false,
976
+ model: model_constants_1.DEFAULT_MODEL,
977
+ timestamp: Date.now()
978
+ });
979
+ }
980
+ // Parse response (same JSON format as regular mode)
981
+ const decision = this.decisionParser.parse(response.answer);
982
+ return decision;
983
+ }
654
984
  }
655
985
  exports.OrchestratorAgent = OrchestratorAgent;
656
986
  //# sourceMappingURL=orchestrator-agent.js.map