testchimp-runner-core 0.0.34 → 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/dist/execution-service.d.ts +1 -4
  2. package/dist/execution-service.d.ts.map +1 -1
  3. package/dist/execution-service.js +155 -468
  4. package/dist/execution-service.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +11 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/orchestrator/decision-parser.d.ts +18 -0
  10. package/dist/orchestrator/decision-parser.d.ts.map +1 -0
  11. package/dist/orchestrator/decision-parser.js +127 -0
  12. package/dist/orchestrator/decision-parser.js.map +1 -0
  13. package/dist/orchestrator/index.d.ts +4 -2
  14. package/dist/orchestrator/index.d.ts.map +1 -1
  15. package/dist/orchestrator/index.js +14 -2
  16. package/dist/orchestrator/index.js.map +1 -1
  17. package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
  18. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  19. package/dist/orchestrator/orchestrator-agent.js +534 -204
  20. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  21. package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
  22. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  23. package/dist/orchestrator/orchestrator-prompts.js +529 -247
  24. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  25. package/dist/orchestrator/page-som-handler.d.ts +106 -0
  26. package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
  27. package/dist/orchestrator/page-som-handler.js +1353 -0
  28. package/dist/orchestrator/page-som-handler.js.map +1 -0
  29. package/dist/orchestrator/som-types.d.ts +149 -0
  30. package/dist/orchestrator/som-types.d.ts.map +1 -0
  31. package/dist/orchestrator/som-types.js +87 -0
  32. package/dist/orchestrator/som-types.js.map +1 -0
  33. package/dist/orchestrator/tool-registry.d.ts +2 -0
  34. package/dist/orchestrator/tool-registry.d.ts.map +1 -1
  35. package/dist/orchestrator/tool-registry.js.map +1 -1
  36. package/dist/orchestrator/tools/index.d.ts +4 -1
  37. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  38. package/dist/orchestrator/tools/index.js +7 -2
  39. package/dist/orchestrator/tools/index.js.map +1 -1
  40. package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
  41. package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
  42. package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
  43. package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
  44. package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
  45. package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
  46. package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
  47. package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
  48. package/dist/orchestrator/types.d.ts +23 -1
  49. package/dist/orchestrator/types.d.ts.map +1 -1
  50. package/dist/orchestrator/types.js +11 -1
  51. package/dist/orchestrator/types.js.map +1 -1
  52. package/dist/scenario-service.d.ts +5 -0
  53. package/dist/scenario-service.d.ts.map +1 -1
  54. package/dist/scenario-service.js +17 -0
  55. package/dist/scenario-service.js.map +1 -1
  56. package/dist/scenario-worker-class.d.ts +4 -0
  57. package/dist/scenario-worker-class.d.ts.map +1 -1
  58. package/dist/scenario-worker-class.js +18 -3
  59. package/dist/scenario-worker-class.js.map +1 -1
  60. package/dist/testing/agent-tester.d.ts +35 -0
  61. package/dist/testing/agent-tester.d.ts.map +1 -0
  62. package/dist/testing/agent-tester.js +84 -0
  63. package/dist/testing/agent-tester.js.map +1 -0
  64. package/dist/testing/ref-translator-tester.d.ts +44 -0
  65. package/dist/testing/ref-translator-tester.d.ts.map +1 -0
  66. package/dist/testing/ref-translator-tester.js +104 -0
  67. package/dist/testing/ref-translator-tester.js.map +1 -0
  68. package/dist/utils/hierarchical-selector.d.ts +47 -0
  69. package/dist/utils/hierarchical-selector.d.ts.map +1 -0
  70. package/dist/utils/hierarchical-selector.js +212 -0
  71. package/dist/utils/hierarchical-selector.js.map +1 -0
  72. package/dist/utils/page-info-retry.d.ts +14 -0
  73. package/dist/utils/page-info-retry.d.ts.map +1 -0
  74. package/dist/utils/page-info-retry.js +60 -0
  75. package/dist/utils/page-info-retry.js.map +1 -0
  76. package/dist/utils/page-info-utils.d.ts +1 -0
  77. package/dist/utils/page-info-utils.d.ts.map +1 -1
  78. package/dist/utils/page-info-utils.js +46 -18
  79. package/dist/utils/page-info-utils.js.map +1 -1
  80. package/dist/utils/ref-attacher.d.ts +21 -0
  81. package/dist/utils/ref-attacher.d.ts.map +1 -0
  82. package/dist/utils/ref-attacher.js +149 -0
  83. package/dist/utils/ref-attacher.js.map +1 -0
  84. package/dist/utils/ref-translator.d.ts +49 -0
  85. package/dist/utils/ref-translator.d.ts.map +1 -0
  86. package/dist/utils/ref-translator.js +276 -0
  87. package/dist/utils/ref-translator.js.map +1 -0
  88. package/package.json +1 -1
  89. package/plandocs/exploratory-mode-support-v2.plan.md +953 -0
  90. package/plandocs/exploratory-mode-support.plan.md +928 -0
  91. package/plandocs/journey-id-tracking-addendum.md +227 -0
  92. package/src/execution-service.ts +179 -596
  93. package/src/index.ts +10 -0
  94. package/src/orchestrator/decision-parser.ts +139 -0
  95. package/src/orchestrator/index.ts +25 -1
  96. package/src/orchestrator/orchestrator-agent.ts +656 -236
  97. package/src/orchestrator/orchestrator-prompts.ts +559 -247
  98. package/src/orchestrator/page-som-handler.ts +1565 -0
  99. package/src/orchestrator/som-types.ts +188 -0
  100. package/src/orchestrator/tool-registry.ts +2 -0
  101. package/src/orchestrator/tools/index.ts +4 -1
  102. package/src/orchestrator/tools/refresh-som-markers.ts +69 -0
  103. package/src/orchestrator/tools/view-previous-screenshot.ts +103 -0
  104. package/src/orchestrator/types.ts +49 -6
  105. package/src/scenario-service.ts +20 -0
  106. package/src/scenario-worker-class.ts +24 -3
  107. package/src/utils/page-info-retry.ts +65 -0
  108. package/src/utils/page-info-utils.ts +53 -18
  109. package/testchimp-runner-core-0.0.35.tgz +0 -0
  110. package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
  111. package/testchimp-runner-core-0.0.33.tgz +0 -0
  112. /package/{RELEASE_0.0.26.md → releasenotes/RELEASE_0.0.26.md} +0 -0
  113. /package/{RELEASE_0.0.27.md → releasenotes/RELEASE_0.0.27.md} +0 -0
  114. /package/{RELEASE_0.0.28.md → releasenotes/RELEASE_0.0.28.md} +0 -0
@@ -6,7 +6,7 @@
6
6
  import { expect } from '@playwright/test';
7
7
  import { LLMFacade } from '../llm-facade';
8
8
  import { ProgressReporter, TokenUsage, StepExecutionStatus } from '../progress-reporter';
9
- import { getEnhancedPageInfo } from '../utils/page-info-utils';
9
+ import { getEnhancedPageInfo, PageInfo } from '../utils/page-info-utils';
10
10
  import { CoordinateConverter } from '../utils/coordinate-converter';
11
11
  import { ToolRegistry, ToolExecutionContext } from './tool-registry';
12
12
  import { DEFAULT_MODEL } from '../model-constants';
@@ -20,9 +20,14 @@ import {
20
20
  SelfReflection,
21
21
  NoteToFutureSelf,
22
22
  CoordinateAction,
23
+ ExplorationMode,
23
24
  DEFAULT_AGENT_CONFIG
24
25
  } from './types';
25
26
  import { OrchestratorPrompts } from './orchestrator-prompts';
27
+ import { PageInfoRetry } from '../utils/page-info-retry';
28
+ import { DecisionParser } from './decision-parser';
29
+ import { PageSoMHandler } from './page-som-handler';
30
+ import { SomCommand, CommandRunStatus, InteractionAction, isSomVerification, isSomCommand, SomVerification } from './som-types';
26
31
 
27
32
  /**
28
33
  * Orchestrator Agent - manages step execution with tool use and memory
@@ -34,6 +39,9 @@ export class OrchestratorAgent {
34
39
  private config: Required<AgentConfig>;
35
40
  private logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void;
36
41
  private debugMode: boolean = false;
42
+ private decisionParser: DecisionParser;
43
+ private somHandler?: PageSoMHandler;
44
+ private previousSomScreenshot?: string; // Track previous iteration's screenshot
37
45
 
38
46
  constructor(
39
47
  llmFacade: LLMFacade,
@@ -49,6 +57,12 @@ export class OrchestratorAgent {
49
57
  this.progressReporter = progressReporter;
50
58
  this.logger = logger;
51
59
  this.debugMode = debugMode || false;
60
+ this.decisionParser = new DecisionParser(logger);
61
+
62
+ // Initialize SoM handler if enabled
63
+ if (this.config.useSoM) {
64
+ this.somHandler = new PageSoMHandler(null as any, this.logger);
65
+ }
52
66
  }
53
67
 
54
68
  setDebugMode(enabled: boolean): void {
@@ -65,13 +79,14 @@ export class OrchestratorAgent {
65
79
  totalSteps: number,
66
80
  scenarioSteps: string[],
67
81
  memory: JourneyMemory,
68
- jobId: string
82
+ jobId: string,
83
+ priorSteps?: string[], // NEW: For repair mode (undefined for script gen)
84
+ nextSteps?: string[] // NEW: For repair mode (undefined for script gen)
69
85
  ): Promise<OrchestratorStepResult> {
70
86
  this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
71
87
  this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
72
88
 
73
89
  let iteration = 0;
74
- let previousReflection: SelfReflection | undefined = undefined;
75
90
  let noteToSelf: NoteToFutureSelf | undefined = memory.latestNote; // Start with note from previous step
76
91
  const commandsExecuted: string[] = [];
77
92
  let consecutiveFailures = 0; // Track consecutive iterations with failed commands
@@ -90,9 +105,10 @@ export class OrchestratorAgent {
90
105
  totalSteps,
91
106
  scenarioSteps,
92
107
  memory,
93
- previousReflection,
94
108
  consecutiveFailures,
95
- noteToSelf // NEW: Pass note from previous iteration
109
+ noteToSelf, // Pass note from previous iteration
110
+ priorSteps, // NEW: Pass repair context
111
+ nextSteps // NEW: Pass repair context
96
112
  );
97
113
 
98
114
  // Call agent to make decision
@@ -105,7 +121,7 @@ export class OrchestratorAgent {
105
121
  );
106
122
 
107
123
  // Log agent's reasoning
108
- this.logAgentDecision(decision, iteration);
124
+ this.decisionParser.log(decision, iteration);
109
125
 
110
126
  // Report progress
111
127
  await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
@@ -113,17 +129,39 @@ export class OrchestratorAgent {
113
129
  // Execute tools if requested (tools are READ-ONLY, they don't change state)
114
130
  let toolResults: Record<string, any> = {};
115
131
 
116
- // ANTI-LOOP: Detect if agent is taking screenshots repeatedly without acting
132
+ // ANTI-LOOP: Detect and BLOCK screenshot loops (PER STEP)
133
+ const screenshotsThisStep = memory.history.filter(s =>
134
+ s.stepNumber === stepNumber &&
135
+ (s.code.includes('take_screenshot') || s.action.includes('Screenshot'))
136
+ );
117
137
  const recentScreenshots = memory.history.slice(-3).filter(s =>
118
138
  s.code.includes('take_screenshot') || s.action.includes('Screenshot')
119
139
  );
120
- if (recentScreenshots.length >= 2 && iteration >= 3) {
140
+
141
+ if (screenshotsThisStep.length >= 3) {
142
+ this.logger?.(`[Orchestrator] 🚨 SCREENSHOT LOOP - ${screenshotsThisStep.length} screenshots THIS STEP! BLOCKING further screenshots`, 'error');
143
+ } else if (recentScreenshots.length >= 2 && iteration >= 3) {
121
144
  this.logger?.(`[Orchestrator] ⚠️ WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
122
- this.logger?.(`[Orchestrator] 💭 System: Stop gathering info, START ACTING with available selectors`);
123
145
  }
124
146
 
125
147
  if (decision.toolCalls && decision.toolCalls.length > 0) {
126
- toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
148
+ // ENFORCE: Block screenshot tool calls if too many taken IN THIS STEP
149
+ if (screenshotsThisStep.length >= 3) {
150
+ decision.toolCalls = decision.toolCalls.filter(tc => tc.name !== 'take_screenshot');
151
+ if (decision.toolCalls.length === 0) {
152
+ this.logger?.(`[Orchestrator] 🚫 REJECTED screenshot tool call - loop detected. Agent must ACT.`, 'warn');
153
+ toolResults = [{
154
+ toolName: 'take_screenshot',
155
+ success: false,
156
+ error: 'SYSTEM BLOCKED: Too many screenshots taken. You must use existing DOM snapshots and execute commands now. Analysis paralysis detected.',
157
+ data: null
158
+ }];
159
+ }
160
+ }
161
+
162
+ if (decision.toolCalls.length > 0) {
163
+ toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber, context.currentPageInfo.refMap);
164
+ }
127
165
 
128
166
  // If agent wants to wait for tool results before proceeding, call agent again
129
167
  if (decision.needsToolResults) {
@@ -147,7 +185,7 @@ export class OrchestratorAgent {
147
185
  this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
148
186
  this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
149
187
 
150
- const blockerResult = await this.executeCommandsSequentially(
188
+ const blockerResult = await this.executeCommands(
151
189
  decision.blockerDetected.clearingCommands,
152
190
  page,
153
191
  memory,
@@ -171,8 +209,8 @@ export class OrchestratorAgent {
171
209
  }
172
210
 
173
211
  // Execute main commands (only if no blocker failure)
174
- if (decision.commands && decision.commands.length > 0 && !iterationHadFailure) {
175
- const executeResult = await this.executeCommandsSequentially(
212
+ if (!iterationHadFailure && decision.commands && decision.commands.length > 0) {
213
+ const executeResult = await this.executeCommands(
176
214
  decision.commands,
177
215
  page,
178
216
  memory,
@@ -193,8 +231,8 @@ export class OrchestratorAgent {
193
231
  }
194
232
  }
195
233
 
196
- // Handle coordinate-based actions (NEW - fallback when selectors fail)
197
- if (decision.coordinateAction && !iterationHadFailure) {
234
+ // Handle coordinate-based actions (NEW - fallback when selectors fail) - ONLY if enabled
235
+ if (this.config.enableCoordinateMode && decision.coordinateAction && !iterationHadFailure) {
198
236
  coordinateAttempts++;
199
237
 
200
238
  this.logger?.(`[Orchestrator] 🎯 Coordinate Action (attempt ${coordinateAttempts}/2): ${decision.coordinateAction.action} at (${decision.coordinateAction.xPercent}%, ${decision.coordinateAction.yPercent}%)`);
@@ -211,7 +249,7 @@ export class OrchestratorAgent {
211
249
  coordCommands.forEach(cmd => this.logger?.(` ${cmd}`));
212
250
 
213
251
  // Execute coordinate commands
214
- const coordResult = await this.executeCommandsSequentially(
252
+ const coordResult = await this.executeCommands(
215
253
  coordCommands,
216
254
  page,
217
255
  memory,
@@ -380,10 +418,7 @@ export class OrchestratorAgent {
380
418
  }
381
419
  }
382
420
 
383
- // Store self-reflection for next iteration
384
- previousReflection = decision.selfReflection;
385
-
386
- // Store note to future self (NEW - tactical memory across iterations AND steps)
421
+ // Store note to future self (tactical memory across iterations AND steps)
387
422
  if (decision.noteToFutureSelf) {
388
423
  noteToSelf = {
389
424
  fromIteration: iteration,
@@ -441,9 +476,10 @@ export class OrchestratorAgent {
441
476
  totalSteps: number,
442
477
  scenarioSteps: string[],
443
478
  memory: JourneyMemory,
444
- previousReflection?: SelfReflection,
445
479
  consecutiveFailures?: number,
446
- noteFromPreviousIteration?: NoteToFutureSelf // NEW
480
+ noteFromPreviousIteration?: NoteToFutureSelf,
481
+ priorSteps?: string[], // NEW: For repair mode
482
+ nextSteps?: string[] // NEW: For repair mode
447
483
  ): Promise<AgentContext> {
448
484
  // Get fresh DOM
449
485
  const currentPageInfo = await getEnhancedPageInfo(page);
@@ -452,8 +488,41 @@ export class OrchestratorAgent {
452
488
  // Get recent steps
453
489
  const recentSteps = memory.history.slice(-this.config.recentStepsCount);
454
490
 
491
+ // SoM integration: Update markers and capture screenshot with visual IDs
492
+ let somScreenshot: string | undefined = undefined;
493
+ let somElementMap: string | undefined = undefined;
494
+ if (this.config.useSoM && this.somHandler) {
495
+ try {
496
+ if (!this.somHandler) {
497
+ this.somHandler = new PageSoMHandler(page, this.logger);
498
+ } else {
499
+ this.somHandler.setPage(page);
500
+ }
501
+
502
+ // Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
503
+ try {
504
+ await page.waitForLoadState('domcontentloaded', { timeout: 5000 });
505
+ } catch (error: any) {
506
+ // Page already loaded or timeout - continue
507
+ }
508
+
509
+ // Update SoM markers
510
+ await this.somHandler.updateSom();
511
+
512
+ // Get screenshot WITH markers (viewport only - agent can scroll or use take_screenshot for full page)
513
+ somScreenshot = await this.somHandler.getScreenshot(true, false, 60);
514
+
515
+ // Get element map for disambiguation
516
+ somElementMap = this.somHandler.getSomElementMap();
517
+
518
+ this.logger?.(`[Orchestrator] SoM screenshot captured for agent decision-making`, 'log');
519
+ } catch (error: any) {
520
+ this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
521
+ }
522
+ }
523
+
455
524
  // Build context
456
- return {
525
+ const context = {
457
526
  overallGoal: scenarioSteps.join('\n'),
458
527
  currentStepGoal,
459
528
  stepNumber,
@@ -465,9 +534,19 @@ export class OrchestratorAgent {
465
534
  recentSteps,
466
535
  experiences: memory.experiences,
467
536
  extractedData: memory.extractedData,
468
- previousIterationGuidance: previousReflection,
469
- noteFromPreviousIteration // NEW: Pass tactical note from previous iteration
537
+ noteFromPreviousIteration, // Pass tactical note from previous iteration
538
+ somScreenshot, // SoM screenshot with visual markers (current)
539
+ somElementMap, // SoM element details for disambiguation
540
+ priorSteps, // NEW: Repair context (undefined for script gen)
541
+ nextSteps // NEW: Repair context (undefined for script gen)
470
542
  };
543
+
544
+ // Save current screenshot as previous for next iteration (for tool access)
545
+ if (somScreenshot) {
546
+ this.previousSomScreenshot = somScreenshot;
547
+ }
548
+
549
+ return context;
471
550
  }
472
551
 
473
552
  /**
@@ -487,10 +566,20 @@ export class OrchestratorAgent {
487
566
 
488
567
  // Build appropriate system prompt based on mode
489
568
  const toolDescriptions = this.toolRegistry.generateToolDescriptions();
490
- const systemPrompt = useCoordinateMode
491
- ? OrchestratorPrompts.buildCoordinateSystemPrompt()
492
- : OrchestratorPrompts.buildSystemPrompt(toolDescriptions);
493
- const userPrompt = OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures);
569
+ let systemPrompt: string;
570
+
571
+ if (this.config.useSoM) {
572
+ // SoM mode: Use visual element identification
573
+ systemPrompt = OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates);
574
+ } else if (useCoordinateMode) {
575
+ // Coordinate mode: Fallback when selectors fail
576
+ systemPrompt = OrchestratorPrompts.buildCoordinateSystemPrompt();
577
+ } else {
578
+ // Standard mode: DOM-based selectors
579
+ systemPrompt = OrchestratorPrompts.buildSystemPrompt(toolDescriptions, this.config.enableCoordinateMode);
580
+ }
581
+
582
+ const userPrompt = OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures, this.config.enableCoordinateMode);
494
583
 
495
584
  // Log prompt lengths for monitoring
496
585
  const systemLength = systemPrompt.length;
@@ -502,12 +591,18 @@ export class OrchestratorAgent {
502
591
 
503
592
  try {
504
593
  // Call LLM directly via provider
505
- const llmRequest = {
594
+ const llmRequest: any = {
506
595
  model: DEFAULT_MODEL,
507
596
  systemPrompt,
508
597
  userPrompt
509
598
  };
510
599
 
600
+ // Include current SoM screenshot as image
601
+ if (context.somScreenshot) {
602
+ llmRequest.imageUrl = context.somScreenshot;
603
+ this.logger?.(`[Orchestrator] Including SoM screenshot in LLM request`, 'log');
604
+ }
605
+
511
606
  const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
512
607
 
513
608
  // Report token usage
@@ -529,7 +624,7 @@ export class OrchestratorAgent {
529
624
  }
530
625
 
531
626
  // Parse response
532
- return this.parseAgentDecision(response.answer);
627
+ return this.decisionParser.parse(response.answer);
533
628
 
534
629
  } catch (error: any) {
535
630
  this.logger?.(`[Orchestrator] ✗ Agent call failed: ${error.message}`, 'error');
@@ -543,57 +638,6 @@ export class OrchestratorAgent {
543
638
  }
544
639
  }
545
640
 
546
-
547
-
548
- /**
549
- * Parse agent decision from LLM response
550
- */
551
- private parseAgentDecision(response: string): AgentDecision {
552
- try {
553
- // Extract JSON from response
554
- const jsonMatch = response.match(/\{[\s\S]*\}/);
555
- if (!jsonMatch) {
556
- this.logger?.(`[Orchestrator] ✗ No JSON found in LLM response`, 'error');
557
- this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
558
- throw new Error('No JSON found in response');
559
- }
560
-
561
- const parsed = JSON.parse(jsonMatch[0]);
562
-
563
- // Validate required fields
564
- // Accept either "reasoning" or "statusReasoning" (LLMs sometimes only provide one)
565
- if (!parsed.status || (!parsed.reasoning && !parsed.statusReasoning)) {
566
- this.logger?.(`[Orchestrator] ✗ Missing required fields in parsed JSON`, 'error');
567
- this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
568
- this.logger?.(`[Orchestrator] 📄 PARSED JSON:\n${JSON.stringify(parsed, null, 2)}`, 'error');
569
- this.logger?.(`[Orchestrator] ❌ Has status: ${!!parsed.status}, Has reasoning: ${!!parsed.reasoning}, Has statusReasoning: ${!!parsed.statusReasoning}`, 'error');
570
- throw new Error('Missing required fields: status and (reasoning or statusReasoning)');
571
- }
572
-
573
- // Normalize: if reasoning is missing but statusReasoning exists, use statusReasoning as reasoning
574
- if (!parsed.reasoning && parsed.statusReasoning) {
575
- parsed.reasoning = parsed.statusReasoning;
576
- }
577
-
578
- return parsed as AgentDecision;
579
-
580
- } catch (error: any) {
581
- this.logger?.(`[Orchestrator] ✗ Failed to parse agent decision: ${error.message}`, 'error');
582
-
583
- // Only log full response if not already logged above
584
- if (!error.message.includes('Missing required fields') && !error.message.includes('No JSON found')) {
585
- this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
586
- }
587
-
588
- // Return fallback
589
- return {
590
- status: 'stuck',
591
- statusReasoning: 'Failed to parse agent response',
592
- reasoning: `Parse error: ${error.message}`
593
- };
594
- }
595
- }
596
-
597
641
  /**
598
642
  * Execute tools
599
643
  */
@@ -601,16 +645,20 @@ export class OrchestratorAgent {
601
645
  toolCalls: any[],
602
646
  page: any,
603
647
  memory: JourneyMemory,
604
- stepNumber: number
648
+ stepNumber: number,
649
+ refMap?: Map<string, any>
605
650
  ): Promise<Record<string, any>> {
606
651
  this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
607
652
 
608
653
  const results: Record<string, any> = {};
609
- const toolContext: ToolExecutionContext = {
654
+ const toolContext: ToolExecutionContext & { refMap?: Map<string, any>; previousSomScreenshot?: string; somHandler?: any } = {
610
655
  page,
611
656
  memory,
612
657
  stepNumber,
613
- logger: this.logger
658
+ logger: this.logger,
659
+ refMap, // Pass refMap for interact_with_ref tool
660
+ previousSomScreenshot: this.previousSomScreenshot, // For view_previous_screenshot tool
661
+ somHandler: this.somHandler // For refresh_som_markers tool
614
662
  };
615
663
 
616
664
  for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
@@ -630,71 +678,259 @@ export class OrchestratorAgent {
630
678
  }
631
679
 
632
680
  /**
633
- * Execute commands sequentially with SHARED context (variables persist across commands)
681
+ * Parse SomCommand from command object
634
682
  */
635
- private async executeCommandsSequentially(
636
- commands: string[],
683
+ private parseSomCommand(cmd: any): SomCommand | null {
684
+ if (typeof cmd === 'object' && cmd.action) {
685
+ // Valid if: has elementRef, OR has coord, OR is navigation action
686
+ const isNavigationAction = ['navigate', 'goBack', 'goForward', 'reload'].includes(cmd.action);
687
+ const hasValidTarget = cmd.elementRef || cmd.coord || isNavigationAction;
688
+
689
+ if (hasValidTarget) {
690
+ return {
691
+ elementRef: cmd.elementRef,
692
+ coord: cmd.coord,
693
+ action: cmd.action,
694
+ value: cmd.value,
695
+ fromCoord: cmd.fromCoord,
696
+ toCoord: cmd.toCoord,
697
+ force: cmd.force,
698
+ scrollAmount: cmd.scrollAmount,
699
+ scrollDirection: cmd.scrollDirection,
700
+ button: cmd.button,
701
+ clickCount: cmd.clickCount,
702
+ modifiers: cmd.modifiers,
703
+ delay: cmd.delay,
704
+ timeout: cmd.timeout
705
+ };
706
+ }
707
+ }
708
+ return null;
709
+ }
710
+
711
+ /**
712
+ * Execute commands (mix of ref and playwright commands)
713
+ */
714
+ private async executeCommands(
715
+ commands: string[] | any[],
637
716
  page: any,
638
717
  memory: JourneyMemory,
639
718
  stepNumber: number,
640
719
  iteration: number,
641
720
  jobId: string
642
721
  ): Promise<{ executed: string[]; allSucceeded: boolean }> {
643
- this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s) in shared context`);
722
+ this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)`);
644
723
 
645
724
  const executed: string[] = [];
646
- const limitedCommands = commands.slice(0, this.config.maxCommandsPerIteration);
647
725
 
648
- // Build execution with shared context (all commands share scope - variables persist)
649
- const commandsWithTracking = limitedCommands.map((cmd, i) => {
650
- return `
651
- // Command ${i + 1}/${limitedCommands.length}
726
+ if (commands.length === 0) {
727
+ return { executed: [], allSucceeded: true };
728
+ }
729
+
730
+ // SoM mode: Execute commands through PageSoMHandler
731
+ if (this.config.useSoM && this.somHandler) {
732
+ this.logger?.(`[Orchestrator] Using SoM mode for command execution`, 'log');
733
+
734
+ for (let i = 0; i < commands.length; i++) {
735
+ const cmd = commands[i];
736
+
737
+ // Check if verification or action command
738
+ if (isSomVerification(cmd)) {
739
+ // Handle verification command
740
+ try {
741
+ const result = await this.somHandler.executeVerification(cmd);
742
+
743
+ // Always add command to executed array (even if verification failed)
744
+ // Scripts should contain the expect even if it fails during generation
745
+ if (result.playwrightCommand) {
746
+ executed.push(result.playwrightCommand);
747
+ }
748
+
749
+ if (result.success) {
750
+ this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Verification passed`, 'log');
751
+
752
+ memory.history.push({
753
+ stepNumber,
754
+ iteration,
755
+ action: `Verification ${i + 1}/${commands.length}: ${cmd.verificationType}`,
756
+ code: result.playwrightCommand,
757
+ result: 'success',
758
+ observation: `Verified: ${cmd.description || cmd.expected}`,
759
+ url: page.url(),
760
+ timestamp: Date.now()
761
+ });
762
+ } else {
763
+ this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification failed (non-fatal): ${result.error}`, 'warn');
764
+
765
+ memory.history.push({
766
+ stepNumber,
767
+ iteration,
768
+ action: `Verification ${i + 1}/${commands.length} - FAILED`,
769
+ code: result.playwrightCommand || JSON.stringify(cmd),
770
+ result: 'failure',
771
+ observation: `Failed: ${result.error}`,
772
+ error: result.error,
773
+ url: page.url(),
774
+ timestamp: Date.now()
775
+ });
776
+
777
+ // Continue anyway - verification failures are non-blocking for script generation
778
+ }
779
+
780
+ // Small delay between commands
781
+ if (i < commands.length - 1) {
782
+ await page.waitForTimeout(300);
783
+ }
784
+
785
+ } catch (error: any) {
786
+ this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification exception: ${error.message}`, 'error');
787
+ }
788
+
789
+ } else if (isSomCommand(cmd)) {
790
+ // Handle action command (existing logic)
791
+ const somCommand = cmd as SomCommand;
792
+
793
+ try {
794
+ const result = await this.somHandler.runCommand(
795
+ somCommand,
796
+ this.config.somUseSomIdBasedCommands || false
797
+ );
798
+
799
+ if (result.status === CommandRunStatus.SUCCESS && result.successAttempt) {
800
+ this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] SoM action succeeded`, 'log');
801
+ executed.push(result.successAttempt.command!);
802
+
803
+ memory.history.push({
804
+ stepNumber,
805
+ iteration,
806
+ action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action}`,
807
+ code: result.successAttempt.command!,
808
+ result: 'success',
809
+ observation: 'Executed successfully',
810
+ url: page.url(),
811
+ timestamp: Date.now()
812
+ });
813
+
814
+ // Small delay for form validation/animations
815
+ if (i < commands.length - 1) {
816
+ await page.waitForTimeout(300);
817
+ }
818
+ } else {
819
+ this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action failed: ${result.error}`, 'error');
820
+
821
+ memory.history.push({
822
+ stepNumber,
823
+ iteration,
824
+ action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action} - FAILED`,
825
+ code: JSON.stringify(somCommand),
826
+ result: 'failure',
827
+ observation: `Failed: ${result.error}`,
828
+ error: result.error,
829
+ url: page.url(),
830
+ timestamp: Date.now()
831
+ });
832
+
833
+ // Refresh SoM after batch (DOM may have changed)
834
+ if (this.somHandler && page) {
835
+ this.somHandler.setPage(page);
836
+ await this.somHandler.updateSom();
837
+ }
838
+
839
+ return { executed, allSucceeded: false };
840
+ }
841
+ } catch (error: any) {
842
+ this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action exception: ${error.message}`, 'error');
843
+
844
+ memory.history.push({
845
+ stepNumber,
846
+ iteration,
847
+ action: `SoM Action ${i + 1}/${commands.length} - EXCEPTION`,
848
+ code: JSON.stringify(somCommand),
849
+ result: 'failure',
850
+ observation: `Exception: ${error.message}`,
851
+ error: error.message,
852
+ url: page.url(),
853
+ timestamp: Date.now()
854
+ });
855
+
856
+ // Refresh SoM after batch (DOM may have changed)
857
+ if (this.somHandler && page) {
858
+ this.somHandler.setPage(page);
859
+ await this.somHandler.updateSom();
860
+ }
861
+
862
+ return { executed, allSucceeded: false };
863
+ }
864
+ } else {
865
+ this.logger?.(`[Orchestrator] ⚠ [${i + 1}/${commands.length}] Not a valid SoM command/verification, skipping`, 'warn');
866
+ }
867
+ }
868
+
869
+ // Always wait for page to stabilize after command batch
870
+ // This handles both explicit navigation AND clicks that trigger navigation/SPA routes
871
+ try {
872
+ this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
873
+ // Use networkidle with short timeout to catch navigation without blocking on SPAs with continuous requests
874
+ await page.waitForLoadState('networkidle', { timeout: 3000 });
875
+ this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
876
+ } catch (error: any) {
877
+ // If networkidle times out, fall back to domcontentloaded
878
+ try {
879
+ await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
880
+ this.logger?.(`[Orchestrator] Page loaded (domcontentloaded)`, 'log');
881
+ } catch (error2: any) {
882
+ this.logger?.(`[Orchestrator] Page load wait timeout (continuing anyway)`, 'warn');
883
+ }
884
+ }
885
+
886
+ // Refresh SoM after batch (DOM may have changed and page is now stable)
887
+ if (this.somHandler && page) {
888
+ this.somHandler.setPage(page);
889
+ await this.somHandler.updateSom();
890
+ }
891
+
892
+ return { executed, allSucceeded: true };
893
+ }
894
+
895
+ // Standard mode: Execute all commands in sequence with small delay between them
896
+ // Delay helps with form validation, button enabling, and animations
897
+ const wrappedCode = (commands as string[]).map((cmd, i) => `
898
+ // Command ${i + 1}/${commands.length}
652
899
  try {
653
900
  ${cmd}
654
901
  __results.push({ index: ${i}, success: true });
902
+ ${i < commands.length - 1 ? 'await page.waitForTimeout(300);' : ''} // Small delay for form validation/animations
655
903
  } catch (error) {
656
904
  __results.push({ index: ${i}, success: false, error: error.message });
657
- throw error; // Stop on first failure
658
- }`;
659
- }).join('\n');
905
+ throw error;
906
+ }`).join('\n');
660
907
 
661
- const wrappedCode = `
662
- const __results = [];
663
- ${commandsWithTracking}
664
- return __results;
665
- `;
908
+ const fullCode = `const __results = []; ${wrappedCode} return __results;`;
666
909
 
667
910
  try {
668
- // Execute in shared context - variables declared here persist for entire scenario
669
- // Pass both page and expect to make Playwright assertions available
670
- const func = new Function('page', 'expect', 'return (async () => { ' + wrappedCode + ' })()');
671
- const results = await func(page, expect);
672
-
673
- // Record results for each command
674
- for (let i = 0; i < limitedCommands.length; i++) {
675
- const cmd = limitedCommands[i];
911
+ const func = new Function('page', 'expect', 'return (async () => { ' + fullCode + ' })()');
912
+ const results = await func(page, (global as any).expect);
913
+
914
+ for (let i = 0; i < commands.length; i++) {
915
+ const cmd = commands[i];
676
916
  const result = results[i];
677
917
 
678
918
  if (result && result.success) {
679
- this.logger?.(`[Orchestrator] ✓ [${i + 1}/${limitedCommands.length}] Success`);
680
-
681
- // Record in history
919
+ this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Success`);
682
920
  memory.history.push({
683
921
  stepNumber,
684
922
  iteration,
685
- action: `Command ${i + 1}/${limitedCommands.length}`,
923
+ action: `Command ${i + 1}/${commands.length}`,
686
924
  code: cmd,
687
925
  result: 'success',
688
926
  observation: 'Executed successfully',
689
927
  url: page.url(),
690
928
  timestamp: Date.now()
691
929
  });
692
-
693
930
  executed.push(cmd);
694
931
  }
695
932
  }
696
933
 
697
- // Cap history
698
934
  if (memory.history.length > this.config.maxHistorySize) {
699
935
  memory.history = memory.history.slice(-this.config.maxHistorySize);
700
936
  }
@@ -702,132 +938,25 @@ return __results;
702
938
  return { executed, allSucceeded: true };
703
939
 
704
940
  } catch (error: any) {
705
- // One of the commands failed - find which one
706
941
  const errorMessage = error.message || String(error);
942
+ this.logger?.(`[Orchestrator] ❌ Command execution failed: ${errorMessage}`, 'error');
707
943
 
708
- // Capture page state for debug logging
709
- let pageStateDebug = '';
710
- if (this.debugMode) {
711
- try {
712
- const pageInfo = await getEnhancedPageInfo(page);
713
- pageStateDebug = `
714
- === DEBUG: PAGE STATE AT FAILURE ===
715
- URL: ${page.url()}
716
- Title: ${pageInfo.title}
717
-
718
- INTERACTIVE ELEMENTS:
719
- ${pageInfo.formattedElements}
720
-
721
- ARIA SNAPSHOT:
722
- ${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
723
-
724
- ====================================`;
725
- } catch (debugError: any) {
726
- pageStateDebug = `Failed to capture page state: ${debugError.message}`;
727
- }
728
- }
729
-
730
- // Record all that succeeded, then the failure
731
- for (let i = 0; i < limitedCommands.length; i++) {
732
- const cmd = limitedCommands[i];
733
-
734
- // This is a failed command (error happened here or earlier)
735
- if (executed.length <= i) {
736
- this.logger?.(`[Orchestrator] ✗ [${i + 1}/${limitedCommands.length}] Failed: ${errorMessage}`, 'error');
737
-
738
- // Log detailed debug info
739
- if (this.debugMode && pageStateDebug) {
740
- this.logger?.(pageStateDebug, 'debug');
741
- }
742
-
743
- memory.history.push({
744
- stepNumber,
745
- iteration,
746
- action: `Command ${i + 1}/${limitedCommands.length} - FAILED`,
747
- code: cmd,
748
- result: 'failure',
749
- observation: `Failed with error: ${errorMessage}. This selector likely doesn't exist or is incorrect.`,
750
- error: errorMessage,
751
- url: page.url(),
752
- timestamp: Date.now()
753
- });
754
-
755
- if (i < limitedCommands.length - 1) {
756
- this.logger?.(`[Orchestrator] ⚠ Skipping remaining ${limitedCommands.length - i - 1} command(s)`, 'warn');
757
- }
758
-
759
- break;
760
- }
761
- }
944
+ memory.history.push({
945
+ stepNumber,
946
+ iteration,
947
+ action: `Command - FAILED`,
948
+ code: commands[executed.length] || '',
949
+ result: 'failure',
950
+ observation: `Failed: ${errorMessage}`,
951
+ error: errorMessage,
952
+ url: page.url(),
953
+ timestamp: Date.now()
954
+ });
762
955
 
763
956
  return { executed, allSucceeded: false };
764
957
  }
765
958
  }
766
-
767
- /**
768
- * Execute a single command
769
- */
770
- private async executeCommand(cmd: string, page: any): Promise<void> {
771
- // Wrap in async function and execute
772
- const wrapped = `(async () => { ${cmd} })()`;
773
-
774
- try {
775
- await eval(wrapped);
776
- } catch (error: any) {
777
- // If eval fails, try direct execution with page context
778
- // Pass both page and expect to make Playwright assertions available
779
- const func = new Function('page', 'expect', `return (async () => { ${cmd} })()`);
780
- await func(page, expect);
781
- }
782
- }
783
-
784
- /**
785
- * Log agent decision
786
- */
787
- private logAgentDecision(decision: AgentDecision, iteration: number): void {
788
- this.logger?.(`[Orchestrator] 💭 REASONING: ${decision.reasoning}`);
789
-
790
- if (decision.selfReflection) {
791
- this.logger?.(`[Orchestrator] 🧠 SELF-REFLECTION:`);
792
- this.logger?.(`[Orchestrator] Next: ${decision.selfReflection.guidanceForNext}`);
793
- if (decision.selfReflection.detectingLoop) {
794
- this.logger?.(`[Orchestrator] 🔄 LOOP DETECTED: ${decision.selfReflection.loopReasoning}`, 'warn');
795
- }
796
- }
797
-
798
- if (decision.toolCalls && decision.toolCalls.length > 0) {
799
- this.logger?.(`[Orchestrator] 🔧 TOOLS: ${decision.toolCalls.map(t => t.name).join(', ')}`);
800
- if (decision.toolReasoning) {
801
- this.logger?.(`[Orchestrator] 📋 Why: ${decision.toolReasoning}`);
802
- }
803
- }
804
-
805
- if (decision.blockerDetected) {
806
- this.logger?.(`[Orchestrator] 🚧 BLOCKER: ${decision.blockerDetected.description}`, 'warn');
807
- this.logger?.(`[Orchestrator] 🧹 Clearing with ${decision.blockerDetected.clearingCommands.length} command(s)`);
808
- }
809
-
810
- if (decision.stepReEvaluation?.detected) {
811
- this.logger?.(`[Orchestrator] 🔍 STEP RE-EVALUATION: ${decision.stepReEvaluation.issue}`, 'warn');
812
- this.logger?.(`[Orchestrator] 📝 Explanation: ${decision.stepReEvaluation.explanation}`);
813
- }
814
-
815
- if (decision.commands && decision.commands.length > 0) {
816
- this.logger?.(`[Orchestrator] 📝 COMMANDS (${decision.commands.length}):`);
817
- decision.commands.slice(0, 3).forEach((cmd, i) => {
818
- this.logger?.(`[Orchestrator] ${i + 1}. ${cmd.substring(0, 80)}...`);
819
- });
820
- if (decision.commands.length > 3) {
821
- this.logger?.(`[Orchestrator] ... and ${decision.commands.length - 3} more`);
822
- }
823
- if (decision.commandReasoning) {
824
- this.logger?.(`[Orchestrator] 💡 Why: ${decision.commandReasoning}`);
825
- }
826
- }
827
-
828
- // Experiences will be logged when added to memory, no need to log here
829
- }
830
-
959
+
831
960
  /**
832
961
  * Report step progress
833
962
  */
@@ -857,6 +986,297 @@ ${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
857
986
  agentStatus: decision.status
858
987
  });
859
988
  }
989
+
990
+ /**
991
+ * Execute exploration mode - agent autonomously explores to achieve journey goal
992
+ * Fires onStepProgress callbacks for each autonomous action (transparent to caller)
993
+ */
994
+ async executeExploration(
995
+ page: any,
996
+ explorationConfig: ExplorationMode,
997
+ jobId: string
998
+ ): Promise<OrchestratorStepResult> {
999
+ this.logger?.(`\n[Orchestrator] ========== EXPLORATION MODE ==========`);
1000
+ this.logger?.(`[Orchestrator] 🎯 Journey Goal: ${explorationConfig.explorationPrompt}`);
1001
+ if (explorationConfig.testDataPrompt) {
1002
+ this.logger?.(`[Orchestrator] 📋 Test Data: ${explorationConfig.testDataPrompt}`);
1003
+ }
1004
+
1005
+ const memory: JourneyMemory = {
1006
+ history: [],
1007
+ experiences: [],
1008
+ extractedData: {}
1009
+ };
1010
+
1011
+ const maxSteps = explorationConfig.maxExplorationSteps || 50;
1012
+ let stepNumber = 0;
1013
+ const commandsExecuted: string[] = [];
1014
+
1015
+ while (stepNumber < maxSteps) {
1016
+ stepNumber++;
1017
+
1018
+ this.logger?.(`\n[Orchestrator] === Exploration Step ${stepNumber}/${maxSteps} ===`);
1019
+
1020
+ // Build exploratory context
1021
+ const context = await this.buildExploratoryContext(
1022
+ page,
1023
+ explorationConfig.explorationPrompt,
1024
+ explorationConfig.testDataPrompt,
1025
+ memory,
1026
+ stepNumber,
1027
+ maxSteps
1028
+ );
1029
+
1030
+ // Call agent with exploratory prompt
1031
+ const decision = await this.callExploratoryAgent(
1032
+ context,
1033
+ jobId,
1034
+ stepNumber
1035
+ );
1036
+
1037
+ this.decisionParser.log(decision, stepNumber);
1038
+
1039
+ // Report step start (fires JourneyRunner's beforeStepStart callback)
1040
+ if (this.progressReporter?.onStepProgress) {
1041
+ const stepInfo = {
1042
+ jobId,
1043
+ stepNumber,
1044
+ stepId: `exploration-${stepNumber}-${Date.now()}`,
1045
+ description: decision.reasoning,
1046
+ code: '', // Will be filled after commands execute
1047
+ status: StepExecutionStatus.IN_PROGRESS,
1048
+ wasRepaired: false
1049
+ };
1050
+ await this.progressReporter.onStepProgress(stepInfo);
1051
+ }
1052
+
1053
+ // Execute tools if requested
1054
+ if (decision.toolCalls && decision.toolCalls.length > 0) {
1055
+ const toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
1056
+
1057
+ // If needs tool results, call agent again
1058
+ if (decision.needsToolResults) {
1059
+ const updatedContext = { ...context, toolResults };
1060
+ const continuedDecision = await this.callExploratoryAgent(updatedContext, jobId, stepNumber);
1061
+
1062
+ decision.commands = continuedDecision.commands || decision.commands;
1063
+ decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
1064
+ decision.status = continuedDecision.status;
1065
+ }
1066
+ }
1067
+
1068
+ // Handle blocker clearing
1069
+ if (decision.blockerDetected && decision.blockerDetected.clearingCommands) {
1070
+ this.logger?.(`[Orchestrator] 🚧 Clearing blocker: ${decision.blockerDetected.description}`);
1071
+ const blockerResult = await this.executeCommands(
1072
+ decision.blockerDetected.clearingCommands,
1073
+ page,
1074
+ memory,
1075
+ stepNumber,
1076
+ 1,
1077
+ jobId
1078
+ );
1079
+ commandsExecuted.push(...blockerResult.executed);
1080
+ }
1081
+
1082
+ // Execute exploration commands
1083
+ let commandsSucceeded = true;
1084
+ if (decision.commands && decision.commands.length > 0) {
1085
+ const executeResult = await this.executeCommands(
1086
+ decision.commands,
1087
+ page,
1088
+ memory,
1089
+ stepNumber,
1090
+ 1,
1091
+ jobId
1092
+ );
1093
+ commandsExecuted.push(...executeResult.executed);
1094
+ commandsSucceeded = executeResult.allSucceeded;
1095
+ }
1096
+
1097
+ // Report step completion (fires JourneyRunner's onStepComplete callback)
1098
+ if (this.progressReporter?.onStepProgress) {
1099
+ const stepInfo = {
1100
+ jobId,
1101
+ stepNumber,
1102
+ stepId: `exploration-${stepNumber}-${Date.now()}`,
1103
+ description: decision.reasoning,
1104
+ code: decision.commands?.join('\n') || '',
1105
+ status: commandsSucceeded ? StepExecutionStatus.SUCCESS : StepExecutionStatus.FAILURE,
1106
+ error: commandsSucceeded ? undefined : 'Command execution failed',
1107
+ wasRepaired: false
1108
+ };
1109
+ await this.progressReporter.onStepProgress(stepInfo);
1110
+ }
1111
+
1112
+ // Add experiences (both app patterns AND exploration progress)
1113
+ if (decision.experiences) {
1114
+ memory.experiences.push(...decision.experiences);
1115
+ if (memory.experiences.length > this.config.maxExperiences) {
1116
+ memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
1117
+ }
1118
+ }
1119
+
1120
+ // Store note for next iteration
1121
+ if (decision.noteToFutureSelf) {
1122
+ memory.latestNote = {
1123
+ fromIteration: stepNumber,
1124
+ content: decision.noteToFutureSelf
1125
+ };
1126
+ }
1127
+
1128
+ // Check termination
1129
+ if (decision.status === 'complete') {
1130
+ this.logger?.(`[Orchestrator] ✅ Journey exploration complete: ${decision.statusReasoning}`);
1131
+ return {
1132
+ success: true,
1133
+ commands: commandsExecuted,
1134
+ iterations: stepNumber,
1135
+ terminationReason: 'complete',
1136
+ memory
1137
+ };
1138
+ } else if (decision.status === 'stuck') {
1139
+ this.logger?.(`[Orchestrator] ❌ Exploration stuck: ${decision.statusReasoning}`);
1140
+ return {
1141
+ success: false,
1142
+ commands: commandsExecuted,
1143
+ iterations: stepNumber,
1144
+ terminationReason: 'agent_stuck',
1145
+ memory,
1146
+ error: decision.statusReasoning
1147
+ };
1148
+ }
1149
+ }
1150
+
1151
+ // Hit max steps - not necessarily a failure
1152
+ this.logger?.(`[Orchestrator] ⚠ Maximum exploration steps reached (budget limit)`);
1153
+ return {
1154
+ success: true, // Not a failure - just budget limit
1155
+ commands: commandsExecuted,
1156
+ iterations: stepNumber,
1157
+ terminationReason: 'system_limit',
1158
+ memory
1159
+ };
1160
+ }
1161
+
1162
+ private async buildExploratoryContext(
1163
+ page: any,
1164
+ explorationPrompt: string,
1165
+ testDataPrompt: string | undefined,
1166
+ memory: JourneyMemory,
1167
+ stepNumber: number,
1168
+ maxSteps: number
1169
+ ): Promise<AgentContext> {
1170
+ // Wait for page to be ready and elements to appear (especially important after navigation)
1171
+ const currentPageInfo = await PageInfoRetry.getWithRetry(page);
1172
+ const currentURL = page.url();
1173
+ const recentSteps = memory.history.slice(-this.config.recentStepsCount);
1174
+
1175
+ // SoM integration for exploratory mode
1176
+ let somScreenshot: string | undefined = undefined;
1177
+ let somElementMap: string | undefined = undefined;
1178
+ if (this.config.useSoM && this.somHandler) {
1179
+ try {
1180
+ this.somHandler.setPage(page);
1181
+
1182
+ // Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
1183
+ try {
1184
+ await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
1185
+ } catch (error: any) {
1186
+ // Page already loaded or timeout - continue
1187
+ }
1188
+
1189
+ // Update SoM markers
1190
+ await this.somHandler.updateSom();
1191
+ somScreenshot = await this.somHandler.getScreenshot(true, false, 60); // Viewport only - agent can scroll or request full page
1192
+
1193
+ // Get element map for disambiguation
1194
+ somElementMap = this.somHandler.getSomElementMap();
1195
+
1196
+ this.logger?.(`[Orchestrator] SoM screenshot captured for exploratory agent`, 'log');
1197
+ } catch (error: any) {
1198
+ this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
1199
+ }
1200
+ }
1201
+
1202
+ const context = {
1203
+ overallGoal: explorationPrompt,
1204
+ currentStepGoal: explorationPrompt, // Same as overall for single journey
1205
+ stepNumber,
1206
+ totalSteps: maxSteps,
1207
+ completedSteps: [],
1208
+ remainingSteps: [],
1209
+ currentPageInfo,
1210
+ currentURL,
1211
+ recentSteps,
1212
+ experiences: memory.experiences,
1213
+ extractedData: memory.extractedData,
1214
+ noteFromPreviousIteration: memory.latestNote,
1215
+ testDataPrompt, // CRITICAL: Store testDataPrompt in context
1216
+ somScreenshot, // SoM screenshot for exploratory mode (current)
1217
+ somElementMap // SoM element details for disambiguation
1218
+ };
1219
+
1220
+ // Save current screenshot as previous for next iteration (for tool access)
1221
+ if (somScreenshot) {
1222
+ this.previousSomScreenshot = somScreenshot;
1223
+ }
1224
+
1225
+ return context;
1226
+ }
1227
+
1228
+ private async callExploratoryAgent(
1229
+ context: AgentContext,
1230
+ jobId: string,
1231
+ stepNumber: number
1232
+ ): Promise<AgentDecision> {
1233
+ const toolDescriptions = this.toolRegistry.generateToolDescriptions();
1234
+
1235
+ // Use SoM system prompt if in SoM mode, otherwise use standard exploratory prompt
1236
+ const systemPrompt = this.config.useSoM
1237
+ ? OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates)
1238
+ : OrchestratorPrompts.buildExploratorySystemPrompt(toolDescriptions);
1239
+
1240
+ const userPrompt = OrchestratorPrompts.buildExploratoryUserPrompt(
1241
+ context,
1242
+ context.overallGoal,
1243
+ context.testDataPrompt, // Pass testDataPrompt from context
1244
+ stepNumber,
1245
+ context.totalSteps
1246
+ );
1247
+
1248
+ const llmRequest: any = {
1249
+ model: DEFAULT_MODEL,
1250
+ systemPrompt,
1251
+ userPrompt
1252
+ };
1253
+
1254
+ // Include current SoM screenshot as image
1255
+ if (context.somScreenshot) {
1256
+ llmRequest.imageUrl = context.somScreenshot;
1257
+ this.logger?.(`[Orchestrator] Including SoM screenshot in exploratory LLM request`, 'log');
1258
+ }
1259
+
1260
+ const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
1261
+
1262
+ // Report token usage
1263
+ if (response.usage && this.progressReporter?.onTokensUsed) {
1264
+ await this.progressReporter.onTokensUsed({
1265
+ jobId,
1266
+ stepNumber,
1267
+ iteration: 1,
1268
+ inputTokens: response.usage.inputTokens,
1269
+ outputTokens: response.usage.outputTokens,
1270
+ includesImage: false,
1271
+ model: DEFAULT_MODEL,
1272
+ timestamp: Date.now()
1273
+ });
1274
+ }
1275
+
1276
+ // Parse response (same JSON format as regular mode)
1277
+ const decision = this.decisionParser.parse(response.answer);
1278
+ return decision;
1279
+ }
860
1280
  }
861
1281
 
862
1282