testchimp-runner-core 0.0.35 → 0.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  2. package/dist/orchestrator/orchestrator-agent.js +7 -4
  3. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  4. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  5. package/dist/orchestrator/orchestrator-prompts.js +73 -15
  6. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  7. package/dist/orchestrator/page-som-handler.d.ts +1 -2
  8. package/dist/orchestrator/page-som-handler.d.ts.map +1 -1
  9. package/dist/orchestrator/page-som-handler.js +51 -25
  10. package/dist/orchestrator/page-som-handler.js.map +1 -1
  11. package/package.json +6 -1
  12. package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
  13. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
  14. package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
  15. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
  16. package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
  17. package/plandocs/INTEGRATION_COMPLETE.md +0 -322
  18. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
  19. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
  20. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
  21. package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
  22. package/plandocs/PHASE_1_COMPLETE.md +0 -165
  23. package/plandocs/PHASE_1_SUMMARY.md +0 -184
  24. package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
  25. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
  26. package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
  27. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
  28. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
  29. package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
  30. package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
  31. package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
  32. package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
  33. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
  34. package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
  35. package/plandocs/exploratory-mode-support-v2.plan.md +0 -953
  36. package/plandocs/exploratory-mode-support.plan.md +0 -928
  37. package/plandocs/journey-id-tracking-addendum.md +0 -227
  38. package/releasenotes/RELEASE_0.0.26.md +0 -165
  39. package/releasenotes/RELEASE_0.0.27.md +0 -236
  40. package/releasenotes/RELEASE_0.0.28.md +0 -286
  41. package/src/auth-config.ts +0 -84
  42. package/src/credit-usage-service.ts +0 -188
  43. package/src/env-loader.ts +0 -103
  44. package/src/execution-service.ts +0 -996
  45. package/src/file-handler.ts +0 -104
  46. package/src/index.ts +0 -432
  47. package/src/llm-facade.ts +0 -821
  48. package/src/llm-provider.ts +0 -53
  49. package/src/model-constants.ts +0 -35
  50. package/src/orchestrator/decision-parser.ts +0 -139
  51. package/src/orchestrator/index.ts +0 -58
  52. package/src/orchestrator/orchestrator-agent.ts +0 -1282
  53. package/src/orchestrator/orchestrator-prompts.ts +0 -786
  54. package/src/orchestrator/page-som-handler.ts +0 -1565
  55. package/src/orchestrator/som-types.ts +0 -188
  56. package/src/orchestrator/tool-registry.ts +0 -184
  57. package/src/orchestrator/tools/check-page-ready.ts +0 -75
  58. package/src/orchestrator/tools/extract-data.ts +0 -92
  59. package/src/orchestrator/tools/index.ts +0 -15
  60. package/src/orchestrator/tools/inspect-page.ts +0 -42
  61. package/src/orchestrator/tools/recall-history.ts +0 -72
  62. package/src/orchestrator/tools/refresh-som-markers.ts +0 -69
  63. package/src/orchestrator/tools/take-screenshot.ts +0 -128
  64. package/src/orchestrator/tools/verify-action-result.ts +0 -159
  65. package/src/orchestrator/tools/view-previous-screenshot.ts +0 -103
  66. package/src/orchestrator/types.ts +0 -291
  67. package/src/playwright-mcp-service.ts +0 -224
  68. package/src/progress-reporter.ts +0 -144
  69. package/src/prompts.ts +0 -842
  70. package/src/providers/backend-proxy-llm-provider.ts +0 -91
  71. package/src/providers/local-llm-provider.ts +0 -38
  72. package/src/scenario-service.ts +0 -252
  73. package/src/scenario-worker-class.ts +0 -1110
  74. package/src/script-utils.ts +0 -203
  75. package/src/types.ts +0 -239
  76. package/src/utils/browser-utils.ts +0 -348
  77. package/src/utils/coordinate-converter.ts +0 -162
  78. package/src/utils/page-info-retry.ts +0 -65
  79. package/src/utils/page-info-utils.ts +0 -285
  80. package/testchimp-runner-core-0.0.35.tgz +0 -0
  81. package/tsconfig.json +0 -19
@@ -1,1282 +0,0 @@
1
- /**
2
- * Orchestrator Agent
3
- * Single agent with tool-use capabilities that maintains journey memory and executes scenarios
4
- */
5
-
6
- import { expect } from '@playwright/test';
7
- import { LLMFacade } from '../llm-facade';
8
- import { ProgressReporter, TokenUsage, StepExecutionStatus } from '../progress-reporter';
9
- import { getEnhancedPageInfo, PageInfo } from '../utils/page-info-utils';
10
- import { CoordinateConverter } from '../utils/coordinate-converter';
11
- import { ToolRegistry, ToolExecutionContext } from './tool-registry';
12
- import { DEFAULT_MODEL } from '../model-constants';
13
- import {
14
- AgentConfig,
15
- AgentContext,
16
- AgentDecision,
17
- JourneyMemory,
18
- MemoryStep,
19
- OrchestratorStepResult,
20
- SelfReflection,
21
- NoteToFutureSelf,
22
- CoordinateAction,
23
- ExplorationMode,
24
- DEFAULT_AGENT_CONFIG
25
- } from './types';
26
- import { OrchestratorPrompts } from './orchestrator-prompts';
27
- import { PageInfoRetry } from '../utils/page-info-retry';
28
- import { DecisionParser } from './decision-parser';
29
- import { PageSoMHandler } from './page-som-handler';
30
- import { SomCommand, CommandRunStatus, InteractionAction, isSomVerification, isSomCommand, SomVerification } from './som-types';
31
-
32
- /**
33
- * Orchestrator Agent - manages step execution with tool use and memory
34
- */
35
- export class OrchestratorAgent {
36
- private llmFacade: LLMFacade;
37
- private toolRegistry: ToolRegistry;
38
- private progressReporter?: ProgressReporter;
39
- private config: Required<AgentConfig>;
40
- private logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void;
41
- private debugMode: boolean = false;
42
- private decisionParser: DecisionParser;
43
- private somHandler?: PageSoMHandler;
44
- private previousSomScreenshot?: string; // Track previous iteration's screenshot
45
-
46
- constructor(
47
- llmFacade: LLMFacade,
48
- toolRegistry: ToolRegistry,
49
- config?: Partial<AgentConfig>,
50
- progressReporter?: ProgressReporter,
51
- logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void,
52
- debugMode?: boolean
53
- ) {
54
- this.llmFacade = llmFacade;
55
- this.toolRegistry = toolRegistry;
56
- this.config = { ...DEFAULT_AGENT_CONFIG, ...config };
57
- this.progressReporter = progressReporter;
58
- this.logger = logger;
59
- this.debugMode = debugMode || false;
60
- this.decisionParser = new DecisionParser(logger);
61
-
62
- // Initialize SoM handler if enabled
63
- if (this.config.useSoM) {
64
- this.somHandler = new PageSoMHandler(null as any, this.logger);
65
- }
66
- }
67
-
68
- setDebugMode(enabled: boolean): void {
69
- this.debugMode = enabled;
70
- }
71
-
72
- /**
73
- * Execute a single step of the scenario
74
- */
75
- async executeStep(
76
- page: any,
77
- stepDescription: string,
78
- stepNumber: number,
79
- totalSteps: number,
80
- scenarioSteps: string[],
81
- memory: JourneyMemory,
82
- jobId: string,
83
- priorSteps?: string[], // NEW: For repair mode (undefined for script gen)
84
- nextSteps?: string[] // NEW: For repair mode (undefined for script gen)
85
- ): Promise<OrchestratorStepResult> {
86
- this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
87
- this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
88
-
89
- let iteration = 0;
90
- let noteToSelf: NoteToFutureSelf | undefined = memory.latestNote; // Start with note from previous step
91
- const commandsExecuted: string[] = [];
92
- let consecutiveFailures = 0; // Track consecutive iterations with failed commands
93
- let coordinateAttempts = 0; // Track coordinate mode attempts (max 2)
94
-
95
- while (iteration < this.config.maxIterationsPerStep) {
96
- iteration++;
97
-
98
- this.logger?.(`\n[Orchestrator] === Iteration ${iteration}/${this.config.maxIterationsPerStep} ===`);
99
-
100
- // Build context for agent
101
- const context = await this.buildAgentContext(
102
- page,
103
- stepDescription,
104
- stepNumber,
105
- totalSteps,
106
- scenarioSteps,
107
- memory,
108
- consecutiveFailures,
109
- noteToSelf, // Pass note from previous iteration
110
- priorSteps, // NEW: Pass repair context
111
- nextSteps // NEW: Pass repair context
112
- );
113
-
114
- // Call agent to make decision
115
- const decision = await this.callAgent(
116
- context,
117
- jobId,
118
- stepNumber,
119
- iteration,
120
- consecutiveFailures
121
- );
122
-
123
- // Log agent's reasoning
124
- this.decisionParser.log(decision, iteration);
125
-
126
- // Report progress
127
- await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
128
-
129
- // Execute tools if requested (tools are READ-ONLY, they don't change state)
130
- let toolResults: Record<string, any> = {};
131
-
132
- // ANTI-LOOP: Detect and BLOCK screenshot loops (PER STEP)
133
- const screenshotsThisStep = memory.history.filter(s =>
134
- s.stepNumber === stepNumber &&
135
- (s.code.includes('take_screenshot') || s.action.includes('Screenshot'))
136
- );
137
- const recentScreenshots = memory.history.slice(-3).filter(s =>
138
- s.code.includes('take_screenshot') || s.action.includes('Screenshot')
139
- );
140
-
141
- if (screenshotsThisStep.length >= 3) {
142
- this.logger?.(`[Orchestrator] 🚨 SCREENSHOT LOOP - ${screenshotsThisStep.length} screenshots THIS STEP! BLOCKING further screenshots`, 'error');
143
- } else if (recentScreenshots.length >= 2 && iteration >= 3) {
144
- this.logger?.(`[Orchestrator] ⚠️ WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
145
- }
146
-
147
- if (decision.toolCalls && decision.toolCalls.length > 0) {
148
- // ENFORCE: Block screenshot tool calls if too many taken IN THIS STEP
149
- if (screenshotsThisStep.length >= 3) {
150
- decision.toolCalls = decision.toolCalls.filter(tc => tc.name !== 'take_screenshot');
151
- if (decision.toolCalls.length === 0) {
152
- this.logger?.(`[Orchestrator] 🚫 REJECTED screenshot tool call - loop detected. Agent must ACT.`, 'warn');
153
- toolResults = [{
154
- toolName: 'take_screenshot',
155
- success: false,
156
- error: 'SYSTEM BLOCKED: Too many screenshots taken. You must use existing DOM snapshots and execute commands now. Analysis paralysis detected.',
157
- data: null
158
- }];
159
- }
160
- }
161
-
162
- if (decision.toolCalls.length > 0) {
163
- toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber, context.currentPageInfo.refMap);
164
- }
165
-
166
- // If agent wants to wait for tool results before proceeding, call agent again
167
- if (decision.needsToolResults) {
168
- const updatedContext = { ...context, toolResults };
169
- const continuedDecision = await this.callAgent(updatedContext, jobId, stepNumber, iteration, consecutiveFailures);
170
-
171
- // Merge continued decision
172
- decision.commands = continuedDecision.commands || decision.commands;
173
- decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
174
- decision.status = continuedDecision.status;
175
- decision.statusReasoning = continuedDecision.statusReasoning;
176
- decision.reasoning = continuedDecision.reasoning;
177
- }
178
- }
179
-
180
- // Execute commands sequentially
181
- let iterationHadFailure = false;
182
-
183
- // Handle blocker if detected (clear blocker FIRST, then proceed with main commands)
184
- if (decision.blockerDetected && decision.blockerDetected.clearingCommands && decision.blockerDetected.clearingCommands.length > 0) {
185
- this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
186
- this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
187
-
188
- const blockerResult = await this.executeCommands(
189
- decision.blockerDetected.clearingCommands,
190
- page,
191
- memory,
192
- stepNumber,
193
- iteration,
194
- jobId
195
- );
196
-
197
- // Add blocker commands with comment to output
198
- if (blockerResult.executed.length > 0) {
199
- commandsExecuted.push(`// Blocker: ${decision.blockerDetected.description}`);
200
- commandsExecuted.push(...blockerResult.executed);
201
- }
202
-
203
- // If blocker clearing failed, track it
204
- if (!blockerResult.allSucceeded) {
205
- this.logger?.(`[Orchestrator] ❌ Failed to clear blocker - continuing anyway`);
206
- consecutiveFailures++;
207
- iterationHadFailure = true;
208
- }
209
- }
210
-
211
- // Execute main commands (only if no blocker failure)
212
- if (!iterationHadFailure && decision.commands && decision.commands.length > 0) {
213
- const executeResult = await this.executeCommands(
214
- decision.commands,
215
- page,
216
- memory,
217
- stepNumber,
218
- iteration,
219
- jobId
220
- );
221
-
222
- commandsExecuted.push(...executeResult.executed);
223
-
224
- // Track failures
225
- if (!executeResult.allSucceeded) {
226
- this.logger?.(`[Orchestrator] ⚠ Command execution stopped at failure`);
227
- consecutiveFailures++;
228
- iterationHadFailure = true;
229
- } else {
230
- consecutiveFailures = 0; // Reset on success
231
- }
232
- }
233
-
234
- // Handle coordinate-based actions (NEW - fallback when selectors fail) - ONLY if enabled
235
- if (this.config.enableCoordinateMode && decision.coordinateAction && !iterationHadFailure) {
236
- coordinateAttempts++;
237
-
238
- this.logger?.(`[Orchestrator] 🎯 Coordinate Action (attempt ${coordinateAttempts}/2): ${decision.coordinateAction.action} at (${decision.coordinateAction.xPercent}%, ${decision.coordinateAction.yPercent}%)`);
239
-
240
- try {
241
- // BEFORE screenshot for visual verification
242
- const beforeScreenshot = await page.screenshot({ encoding: 'base64', fullPage: false, type: 'jpeg', quality: 60 });
243
- const beforeDataUrl = `data:image/jpeg;base64,${beforeScreenshot}`;
244
-
245
- // Generate Playwright commands from coordinate action
246
- const coordCommands = await CoordinateConverter.generateCommands(decision.coordinateAction, page);
247
-
248
- this.logger?.(`[Orchestrator] Generated commands:`);
249
- coordCommands.forEach(cmd => this.logger?.(` ${cmd}`));
250
-
251
- // Execute coordinate commands
252
- const coordResult = await this.executeCommands(
253
- coordCommands,
254
- page,
255
- memory,
256
- stepNumber,
257
- iteration,
258
- jobId
259
- );
260
-
261
- commandsExecuted.push(...coordResult.executed);
262
-
263
- if (!coordResult.allSucceeded) {
264
- this.logger?.(`[Orchestrator] ❌ Coordinate action failed (Playwright error)`);
265
- consecutiveFailures++;
266
- iterationHadFailure = true;
267
-
268
- // Give up after 2 coordinate attempts
269
- if (coordinateAttempts >= 2) {
270
- this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts) - marking stuck`);
271
- return {
272
- success: false,
273
- commands: commandsExecuted,
274
- iterations: iteration,
275
- terminationReason: 'agent_stuck',
276
- memory,
277
- error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
278
- };
279
- }
280
- } else {
281
- this.logger?.(`[Orchestrator] ✅ Coordinate action succeeded (no Playwright error)`);
282
-
283
- // CRITICAL: Verify visually if goal was achieved (coordinates might have clicked wrong place)
284
- // Wait for network idle (smarter than fixed timeout - waits as needed, max 10s)
285
- try {
286
- await page.waitForLoadState('networkidle', { timeout: 10000 });
287
- } catch (e) {
288
- // Network idle timeout - page may still be loading, but proceed with verification
289
- this.logger?.(`[Orchestrator] ⚠️ Network idle timeout after 10s, proceeding with verification`, 'warn');
290
- }
291
-
292
- const afterScreenshot = await page.screenshot({ encoding: 'base64', fullPage: false, type: 'jpeg', quality: 60 });
293
- const afterDataUrl = `data:image/jpeg;base64,${afterScreenshot}`;
294
-
295
- this.logger?.(`[Orchestrator] 📸 Verifying coordinate action visually...`);
296
-
297
- const verificationRequest = {
298
- model: 'gpt-5-mini',
299
- systemPrompt: 'You are a visual verification expert for web automation. Compare before/after screenshots to determine if an action achieved its goal.',
300
- userPrompt: `Goal: ${scenarioSteps[stepNumber - 1]}\n\nA coordinate-based action was just executed. Compare the BEFORE and AFTER screenshots.\n\nDid the action achieve the goal? Respond with JSON:\n{\n "verified": boolean,\n "reasoning": "What changed (or didn't change) between screenshots",\n "visibleChanges": ["List of UI changes observed"],\n "clickedWrongPlace": boolean\n}\n\nBe strict: Only return verified=true if you clearly see the expected change matching the goal.`,
301
- images: [
302
- { label: 'BEFORE', dataUrl: beforeDataUrl },
303
- { label: 'AFTER', dataUrl: afterDataUrl }
304
- ]
305
- };
306
-
307
- const verificationResponse = await this.llmFacade.llmProvider.callLLM(verificationRequest);
308
- const jsonMatch = verificationResponse.answer.match(/\{[\s\S]*\}/);
309
-
310
- if (jsonMatch) {
311
- const verificationResult = JSON.parse(jsonMatch[0]);
312
- const verified = verificationResult.verified === true;
313
- const reasoning = verificationResult.reasoning || 'No reasoning provided';
314
- const clickedWrongPlace = verificationResult.clickedWrongPlace === true;
315
-
316
- this.logger?.(`[Orchestrator] 📊 Visual verification: ${verified ? '✅ VERIFIED' : '❌ NOT VERIFIED'}`);
317
- this.logger?.(`[Orchestrator] 💭 Reasoning: ${reasoning}`);
318
-
319
- if (verified) {
320
- // Goal achieved! Can mark complete
321
- consecutiveFailures = 0;
322
-
323
- // Store note for context
324
- noteToSelf = {
325
- fromIteration: iteration,
326
- content: `Coordinate action verified successful: ${reasoning}`
327
- };
328
- memory.latestNote = noteToSelf;
329
- } else {
330
- // Goal NOT achieved despite no error
331
- this.logger?.(`[Orchestrator] ⚠️ Coordinate click succeeded but goal NOT achieved`, 'warn');
332
- consecutiveFailures++;
333
- iterationHadFailure = true;
334
-
335
- // Store diagnostic note for next attempt
336
- const diagnostic = clickedWrongPlace
337
- ? `Clicked wrong place. ${reasoning}. Try different coordinates.`
338
- : `Action executed but goal not achieved. ${reasoning}. May need different approach.`;
339
-
340
- noteToSelf = {
341
- fromIteration: iteration,
342
- content: diagnostic
343
- };
344
- memory.latestNote = noteToSelf;
345
-
346
- // Give up after 2 coordinate attempts
347
- if (coordinateAttempts >= 2) {
348
- this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts, none achieved goal) - marking stuck`);
349
- return {
350
- success: false,
351
- commands: commandsExecuted,
352
- iterations: iteration,
353
- terminationReason: 'agent_stuck',
354
- memory,
355
- error: `Coordinate actions clicked but didn't achieve goal: ${reasoning}`
356
- };
357
- }
358
- }
359
- } else {
360
- this.logger?.(`[Orchestrator] ⚠️ Could not parse verification response - treating as unverified`, 'warn');
361
- consecutiveFailures++;
362
- iterationHadFailure = true;
363
- }
364
- }
365
-
366
- } catch (error: any) {
367
- this.logger?.(`[Orchestrator] ❌ Coordinate action error: ${error.message}`, 'error');
368
- consecutiveFailures++;
369
- iterationHadFailure = true;
370
-
371
- // Give up after 2 coordinate attempts
372
- if (coordinateAttempts >= 2) {
373
- this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts) - marking stuck`);
374
- return {
375
- success: false,
376
- commands: commandsExecuted,
377
- iterations: iteration,
378
- terminationReason: 'agent_stuck',
379
- memory,
380
- error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
381
- };
382
- }
383
- }
384
- }
385
-
386
- // System-enforced stuck detection (agent might not detect it)
387
- // Allow 5 failures: 3 selector attempts + 2 coordinate attempts
388
- if (consecutiveFailures >= 5) {
389
- this.logger?.(`[Orchestrator] 🛑 SYSTEM: ${consecutiveFailures} consecutive failures detected - forcing stuck`, 'warn');
390
- return {
391
- success: false,
392
- commands: commandsExecuted,
393
- iterations: iteration,
394
- terminationReason: 'agent_stuck',
395
- memory,
396
- error: `Failed ${consecutiveFailures} iterations in a row - unable to proceed`
397
- };
398
- }
399
-
400
- // Update memory with experiences
401
- if (decision.experiences && decision.experiences.length > 0) {
402
- for (const exp of decision.experiences) {
403
- // Deduplicate - don't add if very similar experience exists
404
- const exists = memory.experiences.some(existing =>
405
- existing.toLowerCase().includes(exp.toLowerCase()) ||
406
- exp.toLowerCase().includes(existing.toLowerCase())
407
- );
408
-
409
- if (!exists) {
410
- memory.experiences.push(exp);
411
- this.logger?.(`[Orchestrator] 📚 Experience: ${exp}`);
412
- }
413
- }
414
-
415
- // Cap experiences
416
- if (memory.experiences.length > this.config.maxExperiences) {
417
- memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
418
- }
419
- }
420
-
421
- // Store note to future self (tactical memory across iterations AND steps)
422
- if (decision.noteToFutureSelf) {
423
- noteToSelf = {
424
- fromIteration: iteration,
425
- content: decision.noteToFutureSelf
426
- };
427
- memory.latestNote = noteToSelf; // Persist in journey memory across steps
428
- this.logger?.(`[Orchestrator] 📝 Note to self: ${decision.noteToFutureSelf}`);
429
- }
430
-
431
- // Check termination
432
- if (decision.status !== 'continue') {
433
- this.logger?.(`[Orchestrator] 🎯 Status: ${decision.status}`);
434
- this.logger?.(`[Orchestrator] 💭 Reason: ${decision.statusReasoning}`);
435
-
436
- // SAFETY CHECK: Don't allow "complete" if commands failed this iteration
437
- if (decision.status === 'complete' && iterationHadFailure) {
438
- this.logger?.(`[Orchestrator] ⚠️ OVERRIDE: Agent said "complete" but commands FAILED - forcing "continue"`, 'warn');
439
- this.logger?.(`[Orchestrator] 💭 System: Commands must succeed before marking complete`);
440
- // Don't return - continue to next iteration
441
- } else {
442
- // Valid termination
443
- return {
444
- success: decision.status === 'complete',
445
- commands: commandsExecuted,
446
- iterations: iteration,
447
- terminationReason: decision.status === 'complete' ? 'complete' :
448
- decision.status === 'stuck' ? 'agent_stuck' :
449
- 'infeasible',
450
- memory
451
- };
452
- }
453
- }
454
- }
455
-
456
- // Hit iteration limit
457
- this.logger?.(`[Orchestrator] ⚠ Maximum iterations reached (${this.config.maxIterationsPerStep})`);
458
-
459
- return {
460
- success: false,
461
- commands: commandsExecuted,
462
- iterations: iteration,
463
- terminationReason: 'system_limit',
464
- memory,
465
- error: 'Maximum iterations reached'
466
- };
467
- }
468
-
469
- /**
470
- * Build context for agent
471
- */
472
- private async buildAgentContext(
473
- page: any,
474
- currentStepGoal: string,
475
- stepNumber: number,
476
- totalSteps: number,
477
- scenarioSteps: string[],
478
- memory: JourneyMemory,
479
- consecutiveFailures?: number,
480
- noteFromPreviousIteration?: NoteToFutureSelf,
481
- priorSteps?: string[], // NEW: For repair mode
482
- nextSteps?: string[] // NEW: For repair mode
483
- ): Promise<AgentContext> {
484
- // Get fresh DOM
485
- const currentPageInfo = await getEnhancedPageInfo(page);
486
- const currentURL = page.url();
487
-
488
- // Get recent steps
489
- const recentSteps = memory.history.slice(-this.config.recentStepsCount);
490
-
491
- // SoM integration: Update markers and capture screenshot with visual IDs
492
- let somScreenshot: string | undefined = undefined;
493
- let somElementMap: string | undefined = undefined;
494
- if (this.config.useSoM && this.somHandler) {
495
- try {
496
- if (!this.somHandler) {
497
- this.somHandler = new PageSoMHandler(page, this.logger);
498
- } else {
499
- this.somHandler.setPage(page);
500
- }
501
-
502
- // Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
503
- try {
504
- await page.waitForLoadState('domcontentloaded', { timeout: 5000 });
505
- } catch (error: any) {
506
- // Page already loaded or timeout - continue
507
- }
508
-
509
- // Update SoM markers
510
- await this.somHandler.updateSom();
511
-
512
- // Get screenshot WITH markers (viewport only - agent can scroll or use take_screenshot for full page)
513
- somScreenshot = await this.somHandler.getScreenshot(true, false, 60);
514
-
515
- // Get element map for disambiguation
516
- somElementMap = this.somHandler.getSomElementMap();
517
-
518
- this.logger?.(`[Orchestrator] SoM screenshot captured for agent decision-making`, 'log');
519
- } catch (error: any) {
520
- this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
521
- }
522
- }
523
-
524
- // Build context
525
- const context = {
526
- overallGoal: scenarioSteps.join('\n'),
527
- currentStepGoal,
528
- stepNumber,
529
- totalSteps,
530
- completedSteps: scenarioSteps.slice(0, stepNumber - 1),
531
- remainingSteps: scenarioSteps.slice(stepNumber),
532
- currentPageInfo,
533
- currentURL,
534
- recentSteps,
535
- experiences: memory.experiences,
536
- extractedData: memory.extractedData,
537
- noteFromPreviousIteration, // Pass tactical note from previous iteration
538
- somScreenshot, // SoM screenshot with visual markers (current)
539
- somElementMap, // SoM element details for disambiguation
540
- priorSteps, // NEW: Repair context (undefined for script gen)
541
- nextSteps // NEW: Repair context (undefined for script gen)
542
- };
543
-
544
- // Save current screenshot as previous for next iteration (for tool access)
545
- if (somScreenshot) {
546
- this.previousSomScreenshot = somScreenshot;
547
- }
548
-
549
- return context;
550
- }
551
-
552
- /**
553
- * Call agent to make decision
554
- */
555
- private async callAgent(
556
- context: AgentContext,
557
- jobId: string,
558
- stepNumber: number,
559
- iteration: number,
560
- consecutiveFailures?: number
561
- ): Promise<AgentDecision> {
562
- // Detect if coordinate mode should be activated
563
- // Phase 1: Only 2 tiers (selectors → coordinates), so activate after 3 failures
564
- // Phase 2: Will have 3 tiers (selectors → index → coordinates), threshold will be 5
565
- const useCoordinateMode = consecutiveFailures !== undefined && consecutiveFailures >= 3;
566
-
567
- // Build appropriate system prompt based on mode
568
- const toolDescriptions = this.toolRegistry.generateToolDescriptions();
569
- let systemPrompt: string;
570
-
571
- if (this.config.useSoM) {
572
- // SoM mode: Use visual element identification
573
- systemPrompt = OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates);
574
- } else if (useCoordinateMode) {
575
- // Coordinate mode: Fallback when selectors fail
576
- systemPrompt = OrchestratorPrompts.buildCoordinateSystemPrompt();
577
- } else {
578
- // Standard mode: DOM-based selectors
579
- systemPrompt = OrchestratorPrompts.buildSystemPrompt(toolDescriptions, this.config.enableCoordinateMode);
580
- }
581
-
582
- const userPrompt = OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures, this.config.enableCoordinateMode);
583
-
584
- // Log prompt lengths for monitoring
585
- const systemLength = systemPrompt.length;
586
- const userLength = userPrompt.length;
587
- const totalLength = systemLength + userLength;
588
- const estimatedTokens = Math.ceil(totalLength / 4); // Rough estimate: 4 chars per token
589
-
590
- this.logger?.(`[Orchestrator] 📊 Prompt lengths: system=${systemLength} chars, user=${userLength} chars, total=${totalLength} chars (~${estimatedTokens} tokens)`, 'log');
591
-
592
- try {
593
- // Call LLM directly via provider
594
- const llmRequest: any = {
595
- model: DEFAULT_MODEL,
596
- systemPrompt,
597
- userPrompt
598
- };
599
-
600
- // Include current SoM screenshot as image
601
- if (context.somScreenshot) {
602
- llmRequest.imageUrl = context.somScreenshot;
603
- this.logger?.(`[Orchestrator] Including SoM screenshot in LLM request`, 'log');
604
- }
605
-
606
- const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
607
-
608
- // Report token usage
609
- if (response.usage && this.progressReporter?.onTokensUsed) {
610
- const tokenUsage: TokenUsage = {
611
- jobId,
612
- stepNumber,
613
- iteration,
614
- inputTokens: response.usage.inputTokens,
615
- outputTokens: response.usage.outputTokens,
616
- includesImage: false,
617
- model: DEFAULT_MODEL,
618
- timestamp: Date.now()
619
- };
620
- this.logger?.(`[Orchestrator] 💰 Reporting token usage: ${tokenUsage.inputTokens} + ${tokenUsage.outputTokens}`, 'log');
621
- await this.progressReporter.onTokensUsed(tokenUsage);
622
- } else if (!response.usage) {
623
- this.logger?.(`[Orchestrator] ⚠ No usage data in LLM response`, 'warn');
624
- }
625
-
626
- // Parse response
627
- return this.decisionParser.parse(response.answer);
628
-
629
- } catch (error: any) {
630
- this.logger?.(`[Orchestrator] ✗ Agent call failed: ${error.message}`, 'error');
631
-
632
- // Return fallback decision
633
- return {
634
- status: 'stuck',
635
- statusReasoning: `Agent call failed: ${error.message}`,
636
- reasoning: 'LLM call failed'
637
- };
638
- }
639
- }
640
-
641
- /**
642
- * Execute tools
643
- */
644
- private async executeTools(
645
- toolCalls: any[],
646
- page: any,
647
- memory: JourneyMemory,
648
- stepNumber: number,
649
- refMap?: Map<string, any>
650
- ): Promise<Record<string, any>> {
651
- this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
652
-
653
- const results: Record<string, any> = {};
654
- const toolContext: ToolExecutionContext & { refMap?: Map<string, any>; previousSomScreenshot?: string; somHandler?: any } = {
655
- page,
656
- memory,
657
- stepNumber,
658
- logger: this.logger,
659
- refMap, // Pass refMap for interact_with_ref tool
660
- previousSomScreenshot: this.previousSomScreenshot, // For view_previous_screenshot tool
661
- somHandler: this.somHandler // For refresh_som_markers tool
662
- };
663
-
664
- for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
665
- this.logger?.(`[Orchestrator] ▶ ${toolCall.name}(${JSON.stringify(toolCall.params).substring(0, 50)}...)`);
666
-
667
- const result = await this.toolRegistry.execute(toolCall, toolContext);
668
- results[toolCall.name] = result;
669
-
670
- if (result.success) {
671
- this.logger?.(`[Orchestrator] ✓ ${toolCall.name} succeeded`);
672
- } else {
673
- this.logger?.(`[Orchestrator] ✗ ${toolCall.name} failed: ${result.error}`, 'error');
674
- }
675
- }
676
-
677
- return results;
678
- }
679
-
680
- /**
681
- * Parse SomCommand from command object
682
- */
683
- private parseSomCommand(cmd: any): SomCommand | null {
684
- if (typeof cmd === 'object' && cmd.action) {
685
- // Valid if: has elementRef, OR has coord, OR is navigation action
686
- const isNavigationAction = ['navigate', 'goBack', 'goForward', 'reload'].includes(cmd.action);
687
- const hasValidTarget = cmd.elementRef || cmd.coord || isNavigationAction;
688
-
689
- if (hasValidTarget) {
690
- return {
691
- elementRef: cmd.elementRef,
692
- coord: cmd.coord,
693
- action: cmd.action,
694
- value: cmd.value,
695
- fromCoord: cmd.fromCoord,
696
- toCoord: cmd.toCoord,
697
- force: cmd.force,
698
- scrollAmount: cmd.scrollAmount,
699
- scrollDirection: cmd.scrollDirection,
700
- button: cmd.button,
701
- clickCount: cmd.clickCount,
702
- modifiers: cmd.modifiers,
703
- delay: cmd.delay,
704
- timeout: cmd.timeout
705
- };
706
- }
707
- }
708
- return null;
709
- }
710
-
711
- /**
712
- * Execute commands (mix of ref and playwright commands)
713
- */
714
- private async executeCommands(
715
- commands: string[] | any[],
716
- page: any,
717
- memory: JourneyMemory,
718
- stepNumber: number,
719
- iteration: number,
720
- jobId: string
721
- ): Promise<{ executed: string[]; allSucceeded: boolean }> {
722
- this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)`);
723
-
724
- const executed: string[] = [];
725
-
726
- if (commands.length === 0) {
727
- return { executed: [], allSucceeded: true };
728
- }
729
-
730
- // SoM mode: Execute commands through PageSoMHandler
731
- if (this.config.useSoM && this.somHandler) {
732
- this.logger?.(`[Orchestrator] Using SoM mode for command execution`, 'log');
733
-
734
- for (let i = 0; i < commands.length; i++) {
735
- const cmd = commands[i];
736
-
737
- // Check if verification or action command
738
- if (isSomVerification(cmd)) {
739
- // Handle verification command
740
- try {
741
- const result = await this.somHandler.executeVerification(cmd);
742
-
743
- // Always add command to executed array (even if verification failed)
744
- // Scripts should contain the expect even if it fails during generation
745
- if (result.playwrightCommand) {
746
- executed.push(result.playwrightCommand);
747
- }
748
-
749
- if (result.success) {
750
- this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Verification passed`, 'log');
751
-
752
- memory.history.push({
753
- stepNumber,
754
- iteration,
755
- action: `Verification ${i + 1}/${commands.length}: ${cmd.verificationType}`,
756
- code: result.playwrightCommand,
757
- result: 'success',
758
- observation: `Verified: ${cmd.description || cmd.expected}`,
759
- url: page.url(),
760
- timestamp: Date.now()
761
- });
762
- } else {
763
- this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification failed (non-fatal): ${result.error}`, 'warn');
764
-
765
- memory.history.push({
766
- stepNumber,
767
- iteration,
768
- action: `Verification ${i + 1}/${commands.length} - FAILED`,
769
- code: result.playwrightCommand || JSON.stringify(cmd),
770
- result: 'failure',
771
- observation: `Failed: ${result.error}`,
772
- error: result.error,
773
- url: page.url(),
774
- timestamp: Date.now()
775
- });
776
-
777
- // Continue anyway - verification failures are non-blocking for script generation
778
- }
779
-
780
- // Small delay between commands
781
- if (i < commands.length - 1) {
782
- await page.waitForTimeout(300);
783
- }
784
-
785
- } catch (error: any) {
786
- this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification exception: ${error.message}`, 'error');
787
- }
788
-
789
- } else if (isSomCommand(cmd)) {
790
- // Handle action command (existing logic)
791
- const somCommand = cmd as SomCommand;
792
-
793
- try {
794
- const result = await this.somHandler.runCommand(
795
- somCommand,
796
- this.config.somUseSomIdBasedCommands || false
797
- );
798
-
799
- if (result.status === CommandRunStatus.SUCCESS && result.successAttempt) {
800
- this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] SoM action succeeded`, 'log');
801
- executed.push(result.successAttempt.command!);
802
-
803
- memory.history.push({
804
- stepNumber,
805
- iteration,
806
- action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action}`,
807
- code: result.successAttempt.command!,
808
- result: 'success',
809
- observation: 'Executed successfully',
810
- url: page.url(),
811
- timestamp: Date.now()
812
- });
813
-
814
- // Small delay for form validation/animations
815
- if (i < commands.length - 1) {
816
- await page.waitForTimeout(300);
817
- }
818
- } else {
819
- this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action failed: ${result.error}`, 'error');
820
-
821
- memory.history.push({
822
- stepNumber,
823
- iteration,
824
- action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action} - FAILED`,
825
- code: JSON.stringify(somCommand),
826
- result: 'failure',
827
- observation: `Failed: ${result.error}`,
828
- error: result.error,
829
- url: page.url(),
830
- timestamp: Date.now()
831
- });
832
-
833
- // Refresh SoM after batch (DOM may have changed)
834
- if (this.somHandler && page) {
835
- this.somHandler.setPage(page);
836
- await this.somHandler.updateSom();
837
- }
838
-
839
- return { executed, allSucceeded: false };
840
- }
841
- } catch (error: any) {
842
- this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action exception: ${error.message}`, 'error');
843
-
844
- memory.history.push({
845
- stepNumber,
846
- iteration,
847
- action: `SoM Action ${i + 1}/${commands.length} - EXCEPTION`,
848
- code: JSON.stringify(somCommand),
849
- result: 'failure',
850
- observation: `Exception: ${error.message}`,
851
- error: error.message,
852
- url: page.url(),
853
- timestamp: Date.now()
854
- });
855
-
856
- // Refresh SoM after batch (DOM may have changed)
857
- if (this.somHandler && page) {
858
- this.somHandler.setPage(page);
859
- await this.somHandler.updateSom();
860
- }
861
-
862
- return { executed, allSucceeded: false };
863
- }
864
- } else {
865
- this.logger?.(`[Orchestrator] ⚠ [${i + 1}/${commands.length}] Not a valid SoM command/verification, skipping`, 'warn');
866
- }
867
- }
868
-
869
- // Always wait for page to stabilize after command batch
870
- // This handles both explicit navigation AND clicks that trigger navigation/SPA routes
871
- try {
872
- this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
873
- // Use networkidle with short timeout to catch navigation without blocking on SPAs with continuous requests
874
- await page.waitForLoadState('networkidle', { timeout: 3000 });
875
- this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
876
- } catch (error: any) {
877
- // If networkidle times out, fall back to domcontentloaded
878
- try {
879
- await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
880
- this.logger?.(`[Orchestrator] Page loaded (domcontentloaded)`, 'log');
881
- } catch (error2: any) {
882
- this.logger?.(`[Orchestrator] Page load wait timeout (continuing anyway)`, 'warn');
883
- }
884
- }
885
-
886
- // Refresh SoM after batch (DOM may have changed and page is now stable)
887
- if (this.somHandler && page) {
888
- this.somHandler.setPage(page);
889
- await this.somHandler.updateSom();
890
- }
891
-
892
- return { executed, allSucceeded: true };
893
- }
894
-
895
- // Standard mode: Execute all commands in sequence with small delay between them
896
- // Delay helps with form validation, button enabling, and animations
897
- const wrappedCode = (commands as string[]).map((cmd, i) => `
898
- // Command ${i + 1}/${commands.length}
899
- try {
900
- ${cmd}
901
- __results.push({ index: ${i}, success: true });
902
- ${i < commands.length - 1 ? 'await page.waitForTimeout(300);' : ''} // Small delay for form validation/animations
903
- } catch (error) {
904
- __results.push({ index: ${i}, success: false, error: error.message });
905
- throw error;
906
- }`).join('\n');
907
-
908
- const fullCode = `const __results = []; ${wrappedCode} return __results;`;
909
-
910
- try {
911
- const func = new Function('page', 'expect', 'return (async () => { ' + fullCode + ' })()');
912
- const results = await func(page, (global as any).expect);
913
-
914
- for (let i = 0; i < commands.length; i++) {
915
- const cmd = commands[i];
916
- const result = results[i];
917
-
918
- if (result && result.success) {
919
- this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Success`);
920
- memory.history.push({
921
- stepNumber,
922
- iteration,
923
- action: `Command ${i + 1}/${commands.length}`,
924
- code: cmd,
925
- result: 'success',
926
- observation: 'Executed successfully',
927
- url: page.url(),
928
- timestamp: Date.now()
929
- });
930
- executed.push(cmd);
931
- }
932
- }
933
-
934
- if (memory.history.length > this.config.maxHistorySize) {
935
- memory.history = memory.history.slice(-this.config.maxHistorySize);
936
- }
937
-
938
- return { executed, allSucceeded: true };
939
-
940
- } catch (error: any) {
941
- const errorMessage = error.message || String(error);
942
- this.logger?.(`[Orchestrator] ❌ Command execution failed: ${errorMessage}`, 'error');
943
-
944
- memory.history.push({
945
- stepNumber,
946
- iteration,
947
- action: `Command - FAILED`,
948
- code: commands[executed.length] || '',
949
- result: 'failure',
950
- observation: `Failed: ${errorMessage}`,
951
- error: errorMessage,
952
- url: page.url(),
953
- timestamp: Date.now()
954
- });
955
-
956
- return { executed, allSucceeded: false };
957
- }
958
- }
959
-
960
- /**
961
- * Report step progress
962
- */
963
- private async reportStepProgress(
964
- jobId: string,
965
- stepNumber: number,
966
- description: string,
967
- decision: AgentDecision,
968
- iteration: number
969
- ): Promise<void> {
970
- if (!this.progressReporter?.onStepProgress) return;
971
-
972
- await this.progressReporter.onStepProgress({
973
- jobId,
974
- stepNumber,
975
- description,
976
- status: decision.status === 'complete' ? StepExecutionStatus.SUCCESS :
977
- decision.status === 'stuck' || decision.status === 'infeasible' ? StepExecutionStatus.FAILURE :
978
- StepExecutionStatus.IN_PROGRESS,
979
- code: decision.commands?.join('\n'),
980
- // Include agent metadata for transparency
981
- agentIteration: iteration,
982
- agentReasoning: decision.reasoning,
983
- agentSelfReflection: decision.selfReflection,
984
- agentExperiences: decision.experiences,
985
- agentToolsUsed: decision.toolCalls?.map(t => t.name),
986
- agentStatus: decision.status
987
- });
988
- }
989
-
990
- /**
991
- * Execute exploration mode - agent autonomously explores to achieve journey goal
992
- * Fires onStepProgress callbacks for each autonomous action (transparent to caller)
993
- */
994
- async executeExploration(
995
- page: any,
996
- explorationConfig: ExplorationMode,
997
- jobId: string
998
- ): Promise<OrchestratorStepResult> {
999
- this.logger?.(`\n[Orchestrator] ========== EXPLORATION MODE ==========`);
1000
- this.logger?.(`[Orchestrator] 🎯 Journey Goal: ${explorationConfig.explorationPrompt}`);
1001
- if (explorationConfig.testDataPrompt) {
1002
- this.logger?.(`[Orchestrator] 📋 Test Data: ${explorationConfig.testDataPrompt}`);
1003
- }
1004
-
1005
- const memory: JourneyMemory = {
1006
- history: [],
1007
- experiences: [],
1008
- extractedData: {}
1009
- };
1010
-
1011
- const maxSteps = explorationConfig.maxExplorationSteps || 50;
1012
- let stepNumber = 0;
1013
- const commandsExecuted: string[] = [];
1014
-
1015
- while (stepNumber < maxSteps) {
1016
- stepNumber++;
1017
-
1018
- this.logger?.(`\n[Orchestrator] === Exploration Step ${stepNumber}/${maxSteps} ===`);
1019
-
1020
- // Build exploratory context
1021
- const context = await this.buildExploratoryContext(
1022
- page,
1023
- explorationConfig.explorationPrompt,
1024
- explorationConfig.testDataPrompt,
1025
- memory,
1026
- stepNumber,
1027
- maxSteps
1028
- );
1029
-
1030
- // Call agent with exploratory prompt
1031
- const decision = await this.callExploratoryAgent(
1032
- context,
1033
- jobId,
1034
- stepNumber
1035
- );
1036
-
1037
- this.decisionParser.log(decision, stepNumber);
1038
-
1039
- // Report step start (fires JourneyRunner's beforeStepStart callback)
1040
- if (this.progressReporter?.onStepProgress) {
1041
- const stepInfo = {
1042
- jobId,
1043
- stepNumber,
1044
- stepId: `exploration-${stepNumber}-${Date.now()}`,
1045
- description: decision.reasoning,
1046
- code: '', // Will be filled after commands execute
1047
- status: StepExecutionStatus.IN_PROGRESS,
1048
- wasRepaired: false
1049
- };
1050
- await this.progressReporter.onStepProgress(stepInfo);
1051
- }
1052
-
1053
- // Execute tools if requested
1054
- if (decision.toolCalls && decision.toolCalls.length > 0) {
1055
- const toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
1056
-
1057
- // If needs tool results, call agent again
1058
- if (decision.needsToolResults) {
1059
- const updatedContext = { ...context, toolResults };
1060
- const continuedDecision = await this.callExploratoryAgent(updatedContext, jobId, stepNumber);
1061
-
1062
- decision.commands = continuedDecision.commands || decision.commands;
1063
- decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
1064
- decision.status = continuedDecision.status;
1065
- }
1066
- }
1067
-
1068
- // Handle blocker clearing
1069
- if (decision.blockerDetected && decision.blockerDetected.clearingCommands) {
1070
- this.logger?.(`[Orchestrator] 🚧 Clearing blocker: ${decision.blockerDetected.description}`);
1071
- const blockerResult = await this.executeCommands(
1072
- decision.blockerDetected.clearingCommands,
1073
- page,
1074
- memory,
1075
- stepNumber,
1076
- 1,
1077
- jobId
1078
- );
1079
- commandsExecuted.push(...blockerResult.executed);
1080
- }
1081
-
1082
- // Execute exploration commands
1083
- let commandsSucceeded = true;
1084
- if (decision.commands && decision.commands.length > 0) {
1085
- const executeResult = await this.executeCommands(
1086
- decision.commands,
1087
- page,
1088
- memory,
1089
- stepNumber,
1090
- 1,
1091
- jobId
1092
- );
1093
- commandsExecuted.push(...executeResult.executed);
1094
- commandsSucceeded = executeResult.allSucceeded;
1095
- }
1096
-
1097
- // Report step completion (fires JourneyRunner's onStepComplete callback)
1098
- if (this.progressReporter?.onStepProgress) {
1099
- const stepInfo = {
1100
- jobId,
1101
- stepNumber,
1102
- stepId: `exploration-${stepNumber}-${Date.now()}`,
1103
- description: decision.reasoning,
1104
- code: decision.commands?.join('\n') || '',
1105
- status: commandsSucceeded ? StepExecutionStatus.SUCCESS : StepExecutionStatus.FAILURE,
1106
- error: commandsSucceeded ? undefined : 'Command execution failed',
1107
- wasRepaired: false
1108
- };
1109
- await this.progressReporter.onStepProgress(stepInfo);
1110
- }
1111
-
1112
- // Add experiences (both app patterns AND exploration progress)
1113
- if (decision.experiences) {
1114
- memory.experiences.push(...decision.experiences);
1115
- if (memory.experiences.length > this.config.maxExperiences) {
1116
- memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
1117
- }
1118
- }
1119
-
1120
- // Store note for next iteration
1121
- if (decision.noteToFutureSelf) {
1122
- memory.latestNote = {
1123
- fromIteration: stepNumber,
1124
- content: decision.noteToFutureSelf
1125
- };
1126
- }
1127
-
1128
- // Check termination
1129
- if (decision.status === 'complete') {
1130
- this.logger?.(`[Orchestrator] ✅ Journey exploration complete: ${decision.statusReasoning}`);
1131
- return {
1132
- success: true,
1133
- commands: commandsExecuted,
1134
- iterations: stepNumber,
1135
- terminationReason: 'complete',
1136
- memory
1137
- };
1138
- } else if (decision.status === 'stuck') {
1139
- this.logger?.(`[Orchestrator] ❌ Exploration stuck: ${decision.statusReasoning}`);
1140
- return {
1141
- success: false,
1142
- commands: commandsExecuted,
1143
- iterations: stepNumber,
1144
- terminationReason: 'agent_stuck',
1145
- memory,
1146
- error: decision.statusReasoning
1147
- };
1148
- }
1149
- }
1150
-
1151
- // Hit max steps - not necessarily a failure
1152
- this.logger?.(`[Orchestrator] ⚠ Maximum exploration steps reached (budget limit)`);
1153
- return {
1154
- success: true, // Not a failure - just budget limit
1155
- commands: commandsExecuted,
1156
- iterations: stepNumber,
1157
- terminationReason: 'system_limit',
1158
- memory
1159
- };
1160
- }
1161
-
1162
- private async buildExploratoryContext(
1163
- page: any,
1164
- explorationPrompt: string,
1165
- testDataPrompt: string | undefined,
1166
- memory: JourneyMemory,
1167
- stepNumber: number,
1168
- maxSteps: number
1169
- ): Promise<AgentContext> {
1170
- // Wait for page to be ready and elements to appear (especially important after navigation)
1171
- const currentPageInfo = await PageInfoRetry.getWithRetry(page);
1172
- const currentURL = page.url();
1173
- const recentSteps = memory.history.slice(-this.config.recentStepsCount);
1174
-
1175
- // SoM integration for exploratory mode
1176
- let somScreenshot: string | undefined = undefined;
1177
- let somElementMap: string | undefined = undefined;
1178
- if (this.config.useSoM && this.somHandler) {
1179
- try {
1180
- this.somHandler.setPage(page);
1181
-
1182
- // Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
1183
- try {
1184
- await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
1185
- } catch (error: any) {
1186
- // Page already loaded or timeout - continue
1187
- }
1188
-
1189
- // Update SoM markers
1190
- await this.somHandler.updateSom();
1191
- somScreenshot = await this.somHandler.getScreenshot(true, false, 60); // Viewport only - agent can scroll or request full page
1192
-
1193
- // Get element map for disambiguation
1194
- somElementMap = this.somHandler.getSomElementMap();
1195
-
1196
- this.logger?.(`[Orchestrator] SoM screenshot captured for exploratory agent`, 'log');
1197
- } catch (error: any) {
1198
- this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
1199
- }
1200
- }
1201
-
1202
- const context = {
1203
- overallGoal: explorationPrompt,
1204
- currentStepGoal: explorationPrompt, // Same as overall for single journey
1205
- stepNumber,
1206
- totalSteps: maxSteps,
1207
- completedSteps: [],
1208
- remainingSteps: [],
1209
- currentPageInfo,
1210
- currentURL,
1211
- recentSteps,
1212
- experiences: memory.experiences,
1213
- extractedData: memory.extractedData,
1214
- noteFromPreviousIteration: memory.latestNote,
1215
- testDataPrompt, // CRITICAL: Store testDataPrompt in context
1216
- somScreenshot, // SoM screenshot for exploratory mode (current)
1217
- somElementMap // SoM element details for disambiguation
1218
- };
1219
-
1220
- // Save current screenshot as previous for next iteration (for tool access)
1221
- if (somScreenshot) {
1222
- this.previousSomScreenshot = somScreenshot;
1223
- }
1224
-
1225
- return context;
1226
- }
1227
-
1228
- private async callExploratoryAgent(
1229
- context: AgentContext,
1230
- jobId: string,
1231
- stepNumber: number
1232
- ): Promise<AgentDecision> {
1233
- const toolDescriptions = this.toolRegistry.generateToolDescriptions();
1234
-
1235
- // Use SoM system prompt if in SoM mode, otherwise use standard exploratory prompt
1236
- const systemPrompt = this.config.useSoM
1237
- ? OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates)
1238
- : OrchestratorPrompts.buildExploratorySystemPrompt(toolDescriptions);
1239
-
1240
- const userPrompt = OrchestratorPrompts.buildExploratoryUserPrompt(
1241
- context,
1242
- context.overallGoal,
1243
- context.testDataPrompt, // Pass testDataPrompt from context
1244
- stepNumber,
1245
- context.totalSteps
1246
- );
1247
-
1248
- const llmRequest: any = {
1249
- model: DEFAULT_MODEL,
1250
- systemPrompt,
1251
- userPrompt
1252
- };
1253
-
1254
- // Include current SoM screenshot as image
1255
- if (context.somScreenshot) {
1256
- llmRequest.imageUrl = context.somScreenshot;
1257
- this.logger?.(`[Orchestrator] Including SoM screenshot in exploratory LLM request`, 'log');
1258
- }
1259
-
1260
- const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
1261
-
1262
- // Report token usage
1263
- if (response.usage && this.progressReporter?.onTokensUsed) {
1264
- await this.progressReporter.onTokensUsed({
1265
- jobId,
1266
- stepNumber,
1267
- iteration: 1,
1268
- inputTokens: response.usage.inputTokens,
1269
- outputTokens: response.usage.outputTokens,
1270
- includesImage: false,
1271
- model: DEFAULT_MODEL,
1272
- timestamp: Date.now()
1273
- });
1274
- }
1275
-
1276
- // Parse response (same JSON format as regular mode)
1277
- const decision = this.decisionParser.parse(response.answer);
1278
- return decision;
1279
- }
1280
- }
1281
-
1282
-