testchimp-runner-core 0.0.34 → 0.0.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-service.d.ts +1 -4
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +155 -468
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -1
- package/dist/index.js.map +1 -1
- package/dist/orchestrator/decision-parser.d.ts +18 -0
- package/dist/orchestrator/decision-parser.d.ts.map +1 -0
- package/dist/orchestrator/decision-parser.js +127 -0
- package/dist/orchestrator/decision-parser.js.map +1 -0
- package/dist/orchestrator/index.d.ts +4 -2
- package/dist/orchestrator/index.d.ts.map +1 -1
- package/dist/orchestrator/index.js +14 -2
- package/dist/orchestrator/index.js.map +1 -1
- package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +534 -204
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.js +529 -247
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
- package/dist/orchestrator/page-som-handler.d.ts +106 -0
- package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
- package/dist/orchestrator/page-som-handler.js +1353 -0
- package/dist/orchestrator/page-som-handler.js.map +1 -0
- package/dist/orchestrator/som-types.d.ts +149 -0
- package/dist/orchestrator/som-types.d.ts.map +1 -0
- package/dist/orchestrator/som-types.js +87 -0
- package/dist/orchestrator/som-types.js.map +1 -0
- package/dist/orchestrator/tool-registry.d.ts +2 -0
- package/dist/orchestrator/tool-registry.d.ts.map +1 -1
- package/dist/orchestrator/tool-registry.js.map +1 -1
- package/dist/orchestrator/tools/index.d.ts +4 -1
- package/dist/orchestrator/tools/index.d.ts.map +1 -1
- package/dist/orchestrator/tools/index.js +7 -2
- package/dist/orchestrator/tools/index.js.map +1 -1
- package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
- package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
- package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
- package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
- package/dist/orchestrator/types.d.ts +23 -1
- package/dist/orchestrator/types.d.ts.map +1 -1
- package/dist/orchestrator/types.js +11 -1
- package/dist/orchestrator/types.js.map +1 -1
- package/dist/scenario-service.d.ts +5 -0
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +17 -0
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +4 -0
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +18 -3
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/testing/agent-tester.d.ts +35 -0
- package/dist/testing/agent-tester.d.ts.map +1 -0
- package/dist/testing/agent-tester.js +84 -0
- package/dist/testing/agent-tester.js.map +1 -0
- package/dist/testing/ref-translator-tester.d.ts +44 -0
- package/dist/testing/ref-translator-tester.d.ts.map +1 -0
- package/dist/testing/ref-translator-tester.js +104 -0
- package/dist/testing/ref-translator-tester.js.map +1 -0
- package/dist/utils/hierarchical-selector.d.ts +47 -0
- package/dist/utils/hierarchical-selector.d.ts.map +1 -0
- package/dist/utils/hierarchical-selector.js +212 -0
- package/dist/utils/hierarchical-selector.js.map +1 -0
- package/dist/utils/page-info-retry.d.ts +14 -0
- package/dist/utils/page-info-retry.d.ts.map +1 -0
- package/dist/utils/page-info-retry.js +60 -0
- package/dist/utils/page-info-retry.js.map +1 -0
- package/dist/utils/page-info-utils.d.ts +1 -0
- package/dist/utils/page-info-utils.d.ts.map +1 -1
- package/dist/utils/page-info-utils.js +46 -18
- package/dist/utils/page-info-utils.js.map +1 -1
- package/dist/utils/ref-attacher.d.ts +21 -0
- package/dist/utils/ref-attacher.d.ts.map +1 -0
- package/dist/utils/ref-attacher.js +149 -0
- package/dist/utils/ref-attacher.js.map +1 -0
- package/dist/utils/ref-translator.d.ts +49 -0
- package/dist/utils/ref-translator.d.ts.map +1 -0
- package/dist/utils/ref-translator.js +276 -0
- package/dist/utils/ref-translator.js.map +1 -0
- package/package.json +1 -1
- package/plandocs/exploratory-mode-support-v2.plan.md +953 -0
- package/plandocs/exploratory-mode-support.plan.md +928 -0
- package/plandocs/journey-id-tracking-addendum.md +227 -0
- package/src/execution-service.ts +179 -596
- package/src/index.ts +10 -0
- package/src/orchestrator/decision-parser.ts +139 -0
- package/src/orchestrator/index.ts +25 -1
- package/src/orchestrator/orchestrator-agent.ts +656 -236
- package/src/orchestrator/orchestrator-prompts.ts +559 -247
- package/src/orchestrator/page-som-handler.ts +1565 -0
- package/src/orchestrator/som-types.ts +188 -0
- package/src/orchestrator/tool-registry.ts +2 -0
- package/src/orchestrator/tools/index.ts +4 -1
- package/src/orchestrator/tools/refresh-som-markers.ts +69 -0
- package/src/orchestrator/tools/view-previous-screenshot.ts +103 -0
- package/src/orchestrator/types.ts +49 -6
- package/src/scenario-service.ts +20 -0
- package/src/scenario-worker-class.ts +24 -3
- package/src/utils/page-info-retry.ts +65 -0
- package/src/utils/page-info-utils.ts +53 -18
- package/testchimp-runner-core-0.0.35.tgz +0 -0
- package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
- package/testchimp-runner-core-0.0.33.tgz +0 -0
- /package/{RELEASE_0.0.26.md → releasenotes/RELEASE_0.0.26.md} +0 -0
- /package/{RELEASE_0.0.27.md → releasenotes/RELEASE_0.0.27.md} +0 -0
- /package/{RELEASE_0.0.28.md → releasenotes/RELEASE_0.0.28.md} +0 -0
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import { expect } from '@playwright/test';
|
|
7
7
|
import { LLMFacade } from '../llm-facade';
|
|
8
8
|
import { ProgressReporter, TokenUsage, StepExecutionStatus } from '../progress-reporter';
|
|
9
|
-
import { getEnhancedPageInfo } from '../utils/page-info-utils';
|
|
9
|
+
import { getEnhancedPageInfo, PageInfo } from '../utils/page-info-utils';
|
|
10
10
|
import { CoordinateConverter } from '../utils/coordinate-converter';
|
|
11
11
|
import { ToolRegistry, ToolExecutionContext } from './tool-registry';
|
|
12
12
|
import { DEFAULT_MODEL } from '../model-constants';
|
|
@@ -20,9 +20,14 @@ import {
|
|
|
20
20
|
SelfReflection,
|
|
21
21
|
NoteToFutureSelf,
|
|
22
22
|
CoordinateAction,
|
|
23
|
+
ExplorationMode,
|
|
23
24
|
DEFAULT_AGENT_CONFIG
|
|
24
25
|
} from './types';
|
|
25
26
|
import { OrchestratorPrompts } from './orchestrator-prompts';
|
|
27
|
+
import { PageInfoRetry } from '../utils/page-info-retry';
|
|
28
|
+
import { DecisionParser } from './decision-parser';
|
|
29
|
+
import { PageSoMHandler } from './page-som-handler';
|
|
30
|
+
import { SomCommand, CommandRunStatus, InteractionAction, isSomVerification, isSomCommand, SomVerification } from './som-types';
|
|
26
31
|
|
|
27
32
|
/**
|
|
28
33
|
* Orchestrator Agent - manages step execution with tool use and memory
|
|
@@ -34,6 +39,9 @@ export class OrchestratorAgent {
|
|
|
34
39
|
private config: Required<AgentConfig>;
|
|
35
40
|
private logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void;
|
|
36
41
|
private debugMode: boolean = false;
|
|
42
|
+
private decisionParser: DecisionParser;
|
|
43
|
+
private somHandler?: PageSoMHandler;
|
|
44
|
+
private previousSomScreenshot?: string; // Track previous iteration's screenshot
|
|
37
45
|
|
|
38
46
|
constructor(
|
|
39
47
|
llmFacade: LLMFacade,
|
|
@@ -49,6 +57,12 @@ export class OrchestratorAgent {
|
|
|
49
57
|
this.progressReporter = progressReporter;
|
|
50
58
|
this.logger = logger;
|
|
51
59
|
this.debugMode = debugMode || false;
|
|
60
|
+
this.decisionParser = new DecisionParser(logger);
|
|
61
|
+
|
|
62
|
+
// Initialize SoM handler if enabled
|
|
63
|
+
if (this.config.useSoM) {
|
|
64
|
+
this.somHandler = new PageSoMHandler(null as any, this.logger);
|
|
65
|
+
}
|
|
52
66
|
}
|
|
53
67
|
|
|
54
68
|
setDebugMode(enabled: boolean): void {
|
|
@@ -65,13 +79,14 @@ export class OrchestratorAgent {
|
|
|
65
79
|
totalSteps: number,
|
|
66
80
|
scenarioSteps: string[],
|
|
67
81
|
memory: JourneyMemory,
|
|
68
|
-
jobId: string
|
|
82
|
+
jobId: string,
|
|
83
|
+
priorSteps?: string[], // NEW: For repair mode (undefined for script gen)
|
|
84
|
+
nextSteps?: string[] // NEW: For repair mode (undefined for script gen)
|
|
69
85
|
): Promise<OrchestratorStepResult> {
|
|
70
86
|
this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
|
|
71
87
|
this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
|
|
72
88
|
|
|
73
89
|
let iteration = 0;
|
|
74
|
-
let previousReflection: SelfReflection | undefined = undefined;
|
|
75
90
|
let noteToSelf: NoteToFutureSelf | undefined = memory.latestNote; // Start with note from previous step
|
|
76
91
|
const commandsExecuted: string[] = [];
|
|
77
92
|
let consecutiveFailures = 0; // Track consecutive iterations with failed commands
|
|
@@ -90,9 +105,10 @@ export class OrchestratorAgent {
|
|
|
90
105
|
totalSteps,
|
|
91
106
|
scenarioSteps,
|
|
92
107
|
memory,
|
|
93
|
-
previousReflection,
|
|
94
108
|
consecutiveFailures,
|
|
95
|
-
noteToSelf //
|
|
109
|
+
noteToSelf, // Pass note from previous iteration
|
|
110
|
+
priorSteps, // NEW: Pass repair context
|
|
111
|
+
nextSteps // NEW: Pass repair context
|
|
96
112
|
);
|
|
97
113
|
|
|
98
114
|
// Call agent to make decision
|
|
@@ -105,7 +121,7 @@ export class OrchestratorAgent {
|
|
|
105
121
|
);
|
|
106
122
|
|
|
107
123
|
// Log agent's reasoning
|
|
108
|
-
this.
|
|
124
|
+
this.decisionParser.log(decision, iteration);
|
|
109
125
|
|
|
110
126
|
// Report progress
|
|
111
127
|
await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
|
|
@@ -113,17 +129,39 @@ export class OrchestratorAgent {
|
|
|
113
129
|
// Execute tools if requested (tools are READ-ONLY, they don't change state)
|
|
114
130
|
let toolResults: Record<string, any> = {};
|
|
115
131
|
|
|
116
|
-
// ANTI-LOOP: Detect
|
|
132
|
+
// ANTI-LOOP: Detect and BLOCK screenshot loops (PER STEP)
|
|
133
|
+
const screenshotsThisStep = memory.history.filter(s =>
|
|
134
|
+
s.stepNumber === stepNumber &&
|
|
135
|
+
(s.code.includes('take_screenshot') || s.action.includes('Screenshot'))
|
|
136
|
+
);
|
|
117
137
|
const recentScreenshots = memory.history.slice(-3).filter(s =>
|
|
118
138
|
s.code.includes('take_screenshot') || s.action.includes('Screenshot')
|
|
119
139
|
);
|
|
120
|
-
|
|
140
|
+
|
|
141
|
+
if (screenshotsThisStep.length >= 3) {
|
|
142
|
+
this.logger?.(`[Orchestrator] 🚨 SCREENSHOT LOOP - ${screenshotsThisStep.length} screenshots THIS STEP! BLOCKING further screenshots`, 'error');
|
|
143
|
+
} else if (recentScreenshots.length >= 2 && iteration >= 3) {
|
|
121
144
|
this.logger?.(`[Orchestrator] ⚠️ WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
|
|
122
|
-
this.logger?.(`[Orchestrator] 💭 System: Stop gathering info, START ACTING with available selectors`);
|
|
123
145
|
}
|
|
124
146
|
|
|
125
147
|
if (decision.toolCalls && decision.toolCalls.length > 0) {
|
|
126
|
-
|
|
148
|
+
// ENFORCE: Block screenshot tool calls if too many taken IN THIS STEP
|
|
149
|
+
if (screenshotsThisStep.length >= 3) {
|
|
150
|
+
decision.toolCalls = decision.toolCalls.filter(tc => tc.name !== 'take_screenshot');
|
|
151
|
+
if (decision.toolCalls.length === 0) {
|
|
152
|
+
this.logger?.(`[Orchestrator] 🚫 REJECTED screenshot tool call - loop detected. Agent must ACT.`, 'warn');
|
|
153
|
+
toolResults = [{
|
|
154
|
+
toolName: 'take_screenshot',
|
|
155
|
+
success: false,
|
|
156
|
+
error: 'SYSTEM BLOCKED: Too many screenshots taken. You must use existing DOM snapshots and execute commands now. Analysis paralysis detected.',
|
|
157
|
+
data: null
|
|
158
|
+
}];
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (decision.toolCalls.length > 0) {
|
|
163
|
+
toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber, context.currentPageInfo.refMap);
|
|
164
|
+
}
|
|
127
165
|
|
|
128
166
|
// If agent wants to wait for tool results before proceeding, call agent again
|
|
129
167
|
if (decision.needsToolResults) {
|
|
@@ -147,7 +185,7 @@ export class OrchestratorAgent {
|
|
|
147
185
|
this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
|
|
148
186
|
this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
|
|
149
187
|
|
|
150
|
-
const blockerResult = await this.
|
|
188
|
+
const blockerResult = await this.executeCommands(
|
|
151
189
|
decision.blockerDetected.clearingCommands,
|
|
152
190
|
page,
|
|
153
191
|
memory,
|
|
@@ -171,8 +209,8 @@ export class OrchestratorAgent {
|
|
|
171
209
|
}
|
|
172
210
|
|
|
173
211
|
// Execute main commands (only if no blocker failure)
|
|
174
|
-
if (decision.commands && decision.commands.length > 0
|
|
175
|
-
const executeResult = await this.
|
|
212
|
+
if (!iterationHadFailure && decision.commands && decision.commands.length > 0) {
|
|
213
|
+
const executeResult = await this.executeCommands(
|
|
176
214
|
decision.commands,
|
|
177
215
|
page,
|
|
178
216
|
memory,
|
|
@@ -193,8 +231,8 @@ export class OrchestratorAgent {
|
|
|
193
231
|
}
|
|
194
232
|
}
|
|
195
233
|
|
|
196
|
-
// Handle coordinate-based actions (NEW - fallback when selectors fail)
|
|
197
|
-
if (decision.coordinateAction && !iterationHadFailure) {
|
|
234
|
+
// Handle coordinate-based actions (NEW - fallback when selectors fail) - ONLY if enabled
|
|
235
|
+
if (this.config.enableCoordinateMode && decision.coordinateAction && !iterationHadFailure) {
|
|
198
236
|
coordinateAttempts++;
|
|
199
237
|
|
|
200
238
|
this.logger?.(`[Orchestrator] 🎯 Coordinate Action (attempt ${coordinateAttempts}/2): ${decision.coordinateAction.action} at (${decision.coordinateAction.xPercent}%, ${decision.coordinateAction.yPercent}%)`);
|
|
@@ -211,7 +249,7 @@ export class OrchestratorAgent {
|
|
|
211
249
|
coordCommands.forEach(cmd => this.logger?.(` ${cmd}`));
|
|
212
250
|
|
|
213
251
|
// Execute coordinate commands
|
|
214
|
-
const coordResult = await this.
|
|
252
|
+
const coordResult = await this.executeCommands(
|
|
215
253
|
coordCommands,
|
|
216
254
|
page,
|
|
217
255
|
memory,
|
|
@@ -380,10 +418,7 @@ export class OrchestratorAgent {
|
|
|
380
418
|
}
|
|
381
419
|
}
|
|
382
420
|
|
|
383
|
-
// Store self
|
|
384
|
-
previousReflection = decision.selfReflection;
|
|
385
|
-
|
|
386
|
-
// Store note to future self (NEW - tactical memory across iterations AND steps)
|
|
421
|
+
// Store note to future self (tactical memory across iterations AND steps)
|
|
387
422
|
if (decision.noteToFutureSelf) {
|
|
388
423
|
noteToSelf = {
|
|
389
424
|
fromIteration: iteration,
|
|
@@ -441,9 +476,10 @@ export class OrchestratorAgent {
|
|
|
441
476
|
totalSteps: number,
|
|
442
477
|
scenarioSteps: string[],
|
|
443
478
|
memory: JourneyMemory,
|
|
444
|
-
previousReflection?: SelfReflection,
|
|
445
479
|
consecutiveFailures?: number,
|
|
446
|
-
noteFromPreviousIteration?: NoteToFutureSelf
|
|
480
|
+
noteFromPreviousIteration?: NoteToFutureSelf,
|
|
481
|
+
priorSteps?: string[], // NEW: For repair mode
|
|
482
|
+
nextSteps?: string[] // NEW: For repair mode
|
|
447
483
|
): Promise<AgentContext> {
|
|
448
484
|
// Get fresh DOM
|
|
449
485
|
const currentPageInfo = await getEnhancedPageInfo(page);
|
|
@@ -452,8 +488,41 @@ export class OrchestratorAgent {
|
|
|
452
488
|
// Get recent steps
|
|
453
489
|
const recentSteps = memory.history.slice(-this.config.recentStepsCount);
|
|
454
490
|
|
|
491
|
+
// SoM integration: Update markers and capture screenshot with visual IDs
|
|
492
|
+
let somScreenshot: string | undefined = undefined;
|
|
493
|
+
let somElementMap: string | undefined = undefined;
|
|
494
|
+
if (this.config.useSoM && this.somHandler) {
|
|
495
|
+
try {
|
|
496
|
+
if (!this.somHandler) {
|
|
497
|
+
this.somHandler = new PageSoMHandler(page, this.logger);
|
|
498
|
+
} else {
|
|
499
|
+
this.somHandler.setPage(page);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
|
|
503
|
+
try {
|
|
504
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 5000 });
|
|
505
|
+
} catch (error: any) {
|
|
506
|
+
// Page already loaded or timeout - continue
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// Update SoM markers
|
|
510
|
+
await this.somHandler.updateSom();
|
|
511
|
+
|
|
512
|
+
// Get screenshot WITH markers (viewport only - agent can scroll or use take_screenshot for full page)
|
|
513
|
+
somScreenshot = await this.somHandler.getScreenshot(true, false, 60);
|
|
514
|
+
|
|
515
|
+
// Get element map for disambiguation
|
|
516
|
+
somElementMap = this.somHandler.getSomElementMap();
|
|
517
|
+
|
|
518
|
+
this.logger?.(`[Orchestrator] SoM screenshot captured for agent decision-making`, 'log');
|
|
519
|
+
} catch (error: any) {
|
|
520
|
+
this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
455
524
|
// Build context
|
|
456
|
-
|
|
525
|
+
const context = {
|
|
457
526
|
overallGoal: scenarioSteps.join('\n'),
|
|
458
527
|
currentStepGoal,
|
|
459
528
|
stepNumber,
|
|
@@ -465,9 +534,19 @@ export class OrchestratorAgent {
|
|
|
465
534
|
recentSteps,
|
|
466
535
|
experiences: memory.experiences,
|
|
467
536
|
extractedData: memory.extractedData,
|
|
468
|
-
|
|
469
|
-
|
|
537
|
+
noteFromPreviousIteration, // Pass tactical note from previous iteration
|
|
538
|
+
somScreenshot, // SoM screenshot with visual markers (current)
|
|
539
|
+
somElementMap, // SoM element details for disambiguation
|
|
540
|
+
priorSteps, // NEW: Repair context (undefined for script gen)
|
|
541
|
+
nextSteps // NEW: Repair context (undefined for script gen)
|
|
470
542
|
};
|
|
543
|
+
|
|
544
|
+
// Save current screenshot as previous for next iteration (for tool access)
|
|
545
|
+
if (somScreenshot) {
|
|
546
|
+
this.previousSomScreenshot = somScreenshot;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
return context;
|
|
471
550
|
}
|
|
472
551
|
|
|
473
552
|
/**
|
|
@@ -487,10 +566,20 @@ export class OrchestratorAgent {
|
|
|
487
566
|
|
|
488
567
|
// Build appropriate system prompt based on mode
|
|
489
568
|
const toolDescriptions = this.toolRegistry.generateToolDescriptions();
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
569
|
+
let systemPrompt: string;
|
|
570
|
+
|
|
571
|
+
if (this.config.useSoM) {
|
|
572
|
+
// SoM mode: Use visual element identification
|
|
573
|
+
systemPrompt = OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates);
|
|
574
|
+
} else if (useCoordinateMode) {
|
|
575
|
+
// Coordinate mode: Fallback when selectors fail
|
|
576
|
+
systemPrompt = OrchestratorPrompts.buildCoordinateSystemPrompt();
|
|
577
|
+
} else {
|
|
578
|
+
// Standard mode: DOM-based selectors
|
|
579
|
+
systemPrompt = OrchestratorPrompts.buildSystemPrompt(toolDescriptions, this.config.enableCoordinateMode);
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
const userPrompt = OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures, this.config.enableCoordinateMode);
|
|
494
583
|
|
|
495
584
|
// Log prompt lengths for monitoring
|
|
496
585
|
const systemLength = systemPrompt.length;
|
|
@@ -502,12 +591,18 @@ export class OrchestratorAgent {
|
|
|
502
591
|
|
|
503
592
|
try {
|
|
504
593
|
// Call LLM directly via provider
|
|
505
|
-
const llmRequest = {
|
|
594
|
+
const llmRequest: any = {
|
|
506
595
|
model: DEFAULT_MODEL,
|
|
507
596
|
systemPrompt,
|
|
508
597
|
userPrompt
|
|
509
598
|
};
|
|
510
599
|
|
|
600
|
+
// Include current SoM screenshot as image
|
|
601
|
+
if (context.somScreenshot) {
|
|
602
|
+
llmRequest.imageUrl = context.somScreenshot;
|
|
603
|
+
this.logger?.(`[Orchestrator] Including SoM screenshot in LLM request`, 'log');
|
|
604
|
+
}
|
|
605
|
+
|
|
511
606
|
const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
|
|
512
607
|
|
|
513
608
|
// Report token usage
|
|
@@ -529,7 +624,7 @@ export class OrchestratorAgent {
|
|
|
529
624
|
}
|
|
530
625
|
|
|
531
626
|
// Parse response
|
|
532
|
-
return this.
|
|
627
|
+
return this.decisionParser.parse(response.answer);
|
|
533
628
|
|
|
534
629
|
} catch (error: any) {
|
|
535
630
|
this.logger?.(`[Orchestrator] ✗ Agent call failed: ${error.message}`, 'error');
|
|
@@ -543,57 +638,6 @@ export class OrchestratorAgent {
|
|
|
543
638
|
}
|
|
544
639
|
}
|
|
545
640
|
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
/**
|
|
549
|
-
* Parse agent decision from LLM response
|
|
550
|
-
*/
|
|
551
|
-
private parseAgentDecision(response: string): AgentDecision {
|
|
552
|
-
try {
|
|
553
|
-
// Extract JSON from response
|
|
554
|
-
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
555
|
-
if (!jsonMatch) {
|
|
556
|
-
this.logger?.(`[Orchestrator] ✗ No JSON found in LLM response`, 'error');
|
|
557
|
-
this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
|
|
558
|
-
throw new Error('No JSON found in response');
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
const parsed = JSON.parse(jsonMatch[0]);
|
|
562
|
-
|
|
563
|
-
// Validate required fields
|
|
564
|
-
// Accept either "reasoning" or "statusReasoning" (LLMs sometimes only provide one)
|
|
565
|
-
if (!parsed.status || (!parsed.reasoning && !parsed.statusReasoning)) {
|
|
566
|
-
this.logger?.(`[Orchestrator] ✗ Missing required fields in parsed JSON`, 'error');
|
|
567
|
-
this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
|
|
568
|
-
this.logger?.(`[Orchestrator] 📄 PARSED JSON:\n${JSON.stringify(parsed, null, 2)}`, 'error');
|
|
569
|
-
this.logger?.(`[Orchestrator] ❌ Has status: ${!!parsed.status}, Has reasoning: ${!!parsed.reasoning}, Has statusReasoning: ${!!parsed.statusReasoning}`, 'error');
|
|
570
|
-
throw new Error('Missing required fields: status and (reasoning or statusReasoning)');
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
// Normalize: if reasoning is missing but statusReasoning exists, use statusReasoning as reasoning
|
|
574
|
-
if (!parsed.reasoning && parsed.statusReasoning) {
|
|
575
|
-
parsed.reasoning = parsed.statusReasoning;
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
return parsed as AgentDecision;
|
|
579
|
-
|
|
580
|
-
} catch (error: any) {
|
|
581
|
-
this.logger?.(`[Orchestrator] ✗ Failed to parse agent decision: ${error.message}`, 'error');
|
|
582
|
-
|
|
583
|
-
// Only log full response if not already logged above
|
|
584
|
-
if (!error.message.includes('Missing required fields') && !error.message.includes('No JSON found')) {
|
|
585
|
-
this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
|
|
586
|
-
}
|
|
587
|
-
|
|
588
|
-
// Return fallback
|
|
589
|
-
return {
|
|
590
|
-
status: 'stuck',
|
|
591
|
-
statusReasoning: 'Failed to parse agent response',
|
|
592
|
-
reasoning: `Parse error: ${error.message}`
|
|
593
|
-
};
|
|
594
|
-
}
|
|
595
|
-
}
|
|
596
|
-
|
|
597
641
|
/**
|
|
598
642
|
* Execute tools
|
|
599
643
|
*/
|
|
@@ -601,16 +645,20 @@ export class OrchestratorAgent {
|
|
|
601
645
|
toolCalls: any[],
|
|
602
646
|
page: any,
|
|
603
647
|
memory: JourneyMemory,
|
|
604
|
-
stepNumber: number
|
|
648
|
+
stepNumber: number,
|
|
649
|
+
refMap?: Map<string, any>
|
|
605
650
|
): Promise<Record<string, any>> {
|
|
606
651
|
this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
|
|
607
652
|
|
|
608
653
|
const results: Record<string, any> = {};
|
|
609
|
-
const toolContext: ToolExecutionContext = {
|
|
654
|
+
const toolContext: ToolExecutionContext & { refMap?: Map<string, any>; previousSomScreenshot?: string; somHandler?: any } = {
|
|
610
655
|
page,
|
|
611
656
|
memory,
|
|
612
657
|
stepNumber,
|
|
613
|
-
logger: this.logger
|
|
658
|
+
logger: this.logger,
|
|
659
|
+
refMap, // Pass refMap for interact_with_ref tool
|
|
660
|
+
previousSomScreenshot: this.previousSomScreenshot, // For view_previous_screenshot tool
|
|
661
|
+
somHandler: this.somHandler // For refresh_som_markers tool
|
|
614
662
|
};
|
|
615
663
|
|
|
616
664
|
for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
|
|
@@ -630,71 +678,259 @@ export class OrchestratorAgent {
|
|
|
630
678
|
}
|
|
631
679
|
|
|
632
680
|
/**
|
|
633
|
-
*
|
|
681
|
+
* Parse SomCommand from command object
|
|
634
682
|
*/
|
|
635
|
-
private
|
|
636
|
-
|
|
683
|
+
private parseSomCommand(cmd: any): SomCommand | null {
|
|
684
|
+
if (typeof cmd === 'object' && cmd.action) {
|
|
685
|
+
// Valid if: has elementRef, OR has coord, OR is navigation action
|
|
686
|
+
const isNavigationAction = ['navigate', 'goBack', 'goForward', 'reload'].includes(cmd.action);
|
|
687
|
+
const hasValidTarget = cmd.elementRef || cmd.coord || isNavigationAction;
|
|
688
|
+
|
|
689
|
+
if (hasValidTarget) {
|
|
690
|
+
return {
|
|
691
|
+
elementRef: cmd.elementRef,
|
|
692
|
+
coord: cmd.coord,
|
|
693
|
+
action: cmd.action,
|
|
694
|
+
value: cmd.value,
|
|
695
|
+
fromCoord: cmd.fromCoord,
|
|
696
|
+
toCoord: cmd.toCoord,
|
|
697
|
+
force: cmd.force,
|
|
698
|
+
scrollAmount: cmd.scrollAmount,
|
|
699
|
+
scrollDirection: cmd.scrollDirection,
|
|
700
|
+
button: cmd.button,
|
|
701
|
+
clickCount: cmd.clickCount,
|
|
702
|
+
modifiers: cmd.modifiers,
|
|
703
|
+
delay: cmd.delay,
|
|
704
|
+
timeout: cmd.timeout
|
|
705
|
+
};
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
return null;
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
/**
|
|
712
|
+
* Execute commands (mix of ref and playwright commands)
|
|
713
|
+
*/
|
|
714
|
+
private async executeCommands(
|
|
715
|
+
commands: string[] | any[],
|
|
637
716
|
page: any,
|
|
638
717
|
memory: JourneyMemory,
|
|
639
718
|
stepNumber: number,
|
|
640
719
|
iteration: number,
|
|
641
720
|
jobId: string
|
|
642
721
|
): Promise<{ executed: string[]; allSucceeded: boolean }> {
|
|
643
|
-
this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)
|
|
722
|
+
this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)`);
|
|
644
723
|
|
|
645
724
|
const executed: string[] = [];
|
|
646
|
-
const limitedCommands = commands.slice(0, this.config.maxCommandsPerIteration);
|
|
647
725
|
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
726
|
+
if (commands.length === 0) {
|
|
727
|
+
return { executed: [], allSucceeded: true };
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
// SoM mode: Execute commands through PageSoMHandler
|
|
731
|
+
if (this.config.useSoM && this.somHandler) {
|
|
732
|
+
this.logger?.(`[Orchestrator] Using SoM mode for command execution`, 'log');
|
|
733
|
+
|
|
734
|
+
for (let i = 0; i < commands.length; i++) {
|
|
735
|
+
const cmd = commands[i];
|
|
736
|
+
|
|
737
|
+
// Check if verification or action command
|
|
738
|
+
if (isSomVerification(cmd)) {
|
|
739
|
+
// Handle verification command
|
|
740
|
+
try {
|
|
741
|
+
const result = await this.somHandler.executeVerification(cmd);
|
|
742
|
+
|
|
743
|
+
// Always add command to executed array (even if verification failed)
|
|
744
|
+
// Scripts should contain the expect even if it fails during generation
|
|
745
|
+
if (result.playwrightCommand) {
|
|
746
|
+
executed.push(result.playwrightCommand);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
if (result.success) {
|
|
750
|
+
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Verification passed`, 'log');
|
|
751
|
+
|
|
752
|
+
memory.history.push({
|
|
753
|
+
stepNumber,
|
|
754
|
+
iteration,
|
|
755
|
+
action: `Verification ${i + 1}/${commands.length}: ${cmd.verificationType}`,
|
|
756
|
+
code: result.playwrightCommand,
|
|
757
|
+
result: 'success',
|
|
758
|
+
observation: `Verified: ${cmd.description || cmd.expected}`,
|
|
759
|
+
url: page.url(),
|
|
760
|
+
timestamp: Date.now()
|
|
761
|
+
});
|
|
762
|
+
} else {
|
|
763
|
+
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification failed (non-fatal): ${result.error}`, 'warn');
|
|
764
|
+
|
|
765
|
+
memory.history.push({
|
|
766
|
+
stepNumber,
|
|
767
|
+
iteration,
|
|
768
|
+
action: `Verification ${i + 1}/${commands.length} - FAILED`,
|
|
769
|
+
code: result.playwrightCommand || JSON.stringify(cmd),
|
|
770
|
+
result: 'failure',
|
|
771
|
+
observation: `Failed: ${result.error}`,
|
|
772
|
+
error: result.error,
|
|
773
|
+
url: page.url(),
|
|
774
|
+
timestamp: Date.now()
|
|
775
|
+
});
|
|
776
|
+
|
|
777
|
+
// Continue anyway - verification failures are non-blocking for script generation
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
// Small delay between commands
|
|
781
|
+
if (i < commands.length - 1) {
|
|
782
|
+
await page.waitForTimeout(300);
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
} catch (error: any) {
|
|
786
|
+
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification exception: ${error.message}`, 'error');
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
} else if (isSomCommand(cmd)) {
|
|
790
|
+
// Handle action command (existing logic)
|
|
791
|
+
const somCommand = cmd as SomCommand;
|
|
792
|
+
|
|
793
|
+
try {
|
|
794
|
+
const result = await this.somHandler.runCommand(
|
|
795
|
+
somCommand,
|
|
796
|
+
this.config.somUseSomIdBasedCommands || false
|
|
797
|
+
);
|
|
798
|
+
|
|
799
|
+
if (result.status === CommandRunStatus.SUCCESS && result.successAttempt) {
|
|
800
|
+
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] SoM action succeeded`, 'log');
|
|
801
|
+
executed.push(result.successAttempt.command!);
|
|
802
|
+
|
|
803
|
+
memory.history.push({
|
|
804
|
+
stepNumber,
|
|
805
|
+
iteration,
|
|
806
|
+
action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action}`,
|
|
807
|
+
code: result.successAttempt.command!,
|
|
808
|
+
result: 'success',
|
|
809
|
+
observation: 'Executed successfully',
|
|
810
|
+
url: page.url(),
|
|
811
|
+
timestamp: Date.now()
|
|
812
|
+
});
|
|
813
|
+
|
|
814
|
+
// Small delay for form validation/animations
|
|
815
|
+
if (i < commands.length - 1) {
|
|
816
|
+
await page.waitForTimeout(300);
|
|
817
|
+
}
|
|
818
|
+
} else {
|
|
819
|
+
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action failed: ${result.error}`, 'error');
|
|
820
|
+
|
|
821
|
+
memory.history.push({
|
|
822
|
+
stepNumber,
|
|
823
|
+
iteration,
|
|
824
|
+
action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action} - FAILED`,
|
|
825
|
+
code: JSON.stringify(somCommand),
|
|
826
|
+
result: 'failure',
|
|
827
|
+
observation: `Failed: ${result.error}`,
|
|
828
|
+
error: result.error,
|
|
829
|
+
url: page.url(),
|
|
830
|
+
timestamp: Date.now()
|
|
831
|
+
});
|
|
832
|
+
|
|
833
|
+
// Refresh SoM after batch (DOM may have changed)
|
|
834
|
+
if (this.somHandler && page) {
|
|
835
|
+
this.somHandler.setPage(page);
|
|
836
|
+
await this.somHandler.updateSom();
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
return { executed, allSucceeded: false };
|
|
840
|
+
}
|
|
841
|
+
} catch (error: any) {
|
|
842
|
+
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action exception: ${error.message}`, 'error');
|
|
843
|
+
|
|
844
|
+
memory.history.push({
|
|
845
|
+
stepNumber,
|
|
846
|
+
iteration,
|
|
847
|
+
action: `SoM Action ${i + 1}/${commands.length} - EXCEPTION`,
|
|
848
|
+
code: JSON.stringify(somCommand),
|
|
849
|
+
result: 'failure',
|
|
850
|
+
observation: `Exception: ${error.message}`,
|
|
851
|
+
error: error.message,
|
|
852
|
+
url: page.url(),
|
|
853
|
+
timestamp: Date.now()
|
|
854
|
+
});
|
|
855
|
+
|
|
856
|
+
// Refresh SoM after batch (DOM may have changed)
|
|
857
|
+
if (this.somHandler && page) {
|
|
858
|
+
this.somHandler.setPage(page);
|
|
859
|
+
await this.somHandler.updateSom();
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
return { executed, allSucceeded: false };
|
|
863
|
+
}
|
|
864
|
+
} else {
|
|
865
|
+
this.logger?.(`[Orchestrator] ⚠ [${i + 1}/${commands.length}] Not a valid SoM command/verification, skipping`, 'warn');
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
// Always wait for page to stabilize after command batch
|
|
870
|
+
// This handles both explicit navigation AND clicks that trigger navigation/SPA routes
|
|
871
|
+
try {
|
|
872
|
+
this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
|
|
873
|
+
// Use networkidle with short timeout to catch navigation without blocking on SPAs with continuous requests
|
|
874
|
+
await page.waitForLoadState('networkidle', { timeout: 3000 });
|
|
875
|
+
this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
|
|
876
|
+
} catch (error: any) {
|
|
877
|
+
// If networkidle times out, fall back to domcontentloaded
|
|
878
|
+
try {
|
|
879
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
|
|
880
|
+
this.logger?.(`[Orchestrator] Page loaded (domcontentloaded)`, 'log');
|
|
881
|
+
} catch (error2: any) {
|
|
882
|
+
this.logger?.(`[Orchestrator] Page load wait timeout (continuing anyway)`, 'warn');
|
|
883
|
+
}
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
// Refresh SoM after batch (DOM may have changed and page is now stable)
|
|
887
|
+
if (this.somHandler && page) {
|
|
888
|
+
this.somHandler.setPage(page);
|
|
889
|
+
await this.somHandler.updateSom();
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
return { executed, allSucceeded: true };
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
// Standard mode: Execute all commands in sequence with small delay between them
|
|
896
|
+
// Delay helps with form validation, button enabling, and animations
|
|
897
|
+
const wrappedCode = (commands as string[]).map((cmd, i) => `
|
|
898
|
+
// Command ${i + 1}/${commands.length}
|
|
652
899
|
try {
|
|
653
900
|
${cmd}
|
|
654
901
|
__results.push({ index: ${i}, success: true });
|
|
902
|
+
${i < commands.length - 1 ? 'await page.waitForTimeout(300);' : ''} // Small delay for form validation/animations
|
|
655
903
|
} catch (error) {
|
|
656
904
|
__results.push({ index: ${i}, success: false, error: error.message });
|
|
657
|
-
throw error;
|
|
658
|
-
}
|
|
659
|
-
}).join('\n');
|
|
905
|
+
throw error;
|
|
906
|
+
}`).join('\n');
|
|
660
907
|
|
|
661
|
-
const
|
|
662
|
-
const __results = [];
|
|
663
|
-
${commandsWithTracking}
|
|
664
|
-
return __results;
|
|
665
|
-
`;
|
|
908
|
+
const fullCode = `const __results = []; ${wrappedCode} return __results;`;
|
|
666
909
|
|
|
667
910
|
try {
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
// Record results for each command
|
|
674
|
-
for (let i = 0; i < limitedCommands.length; i++) {
|
|
675
|
-
const cmd = limitedCommands[i];
|
|
911
|
+
const func = new Function('page', 'expect', 'return (async () => { ' + fullCode + ' })()');
|
|
912
|
+
const results = await func(page, (global as any).expect);
|
|
913
|
+
|
|
914
|
+
for (let i = 0; i < commands.length; i++) {
|
|
915
|
+
const cmd = commands[i];
|
|
676
916
|
const result = results[i];
|
|
677
917
|
|
|
678
918
|
if (result && result.success) {
|
|
679
|
-
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${
|
|
680
|
-
|
|
681
|
-
// Record in history
|
|
919
|
+
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Success`);
|
|
682
920
|
memory.history.push({
|
|
683
921
|
stepNumber,
|
|
684
922
|
iteration,
|
|
685
|
-
action: `Command ${i + 1}/${
|
|
923
|
+
action: `Command ${i + 1}/${commands.length}`,
|
|
686
924
|
code: cmd,
|
|
687
925
|
result: 'success',
|
|
688
926
|
observation: 'Executed successfully',
|
|
689
927
|
url: page.url(),
|
|
690
928
|
timestamp: Date.now()
|
|
691
929
|
});
|
|
692
|
-
|
|
693
930
|
executed.push(cmd);
|
|
694
931
|
}
|
|
695
932
|
}
|
|
696
933
|
|
|
697
|
-
// Cap history
|
|
698
934
|
if (memory.history.length > this.config.maxHistorySize) {
|
|
699
935
|
memory.history = memory.history.slice(-this.config.maxHistorySize);
|
|
700
936
|
}
|
|
@@ -702,132 +938,25 @@ return __results;
|
|
|
702
938
|
return { executed, allSucceeded: true };
|
|
703
939
|
|
|
704
940
|
} catch (error: any) {
|
|
705
|
-
// One of the commands failed - find which one
|
|
706
941
|
const errorMessage = error.message || String(error);
|
|
942
|
+
this.logger?.(`[Orchestrator] ❌ Command execution failed: ${errorMessage}`, 'error');
|
|
707
943
|
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
${pageInfo.formattedElements}
|
|
720
|
-
|
|
721
|
-
ARIA SNAPSHOT:
|
|
722
|
-
${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
|
|
723
|
-
|
|
724
|
-
====================================`;
|
|
725
|
-
} catch (debugError: any) {
|
|
726
|
-
pageStateDebug = `Failed to capture page state: ${debugError.message}`;
|
|
727
|
-
}
|
|
728
|
-
}
|
|
729
|
-
|
|
730
|
-
// Record all that succeeded, then the failure
|
|
731
|
-
for (let i = 0; i < limitedCommands.length; i++) {
|
|
732
|
-
const cmd = limitedCommands[i];
|
|
733
|
-
|
|
734
|
-
// This is a failed command (error happened here or earlier)
|
|
735
|
-
if (executed.length <= i) {
|
|
736
|
-
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${limitedCommands.length}] Failed: ${errorMessage}`, 'error');
|
|
737
|
-
|
|
738
|
-
// Log detailed debug info
|
|
739
|
-
if (this.debugMode && pageStateDebug) {
|
|
740
|
-
this.logger?.(pageStateDebug, 'debug');
|
|
741
|
-
}
|
|
742
|
-
|
|
743
|
-
memory.history.push({
|
|
744
|
-
stepNumber,
|
|
745
|
-
iteration,
|
|
746
|
-
action: `Command ${i + 1}/${limitedCommands.length} - FAILED`,
|
|
747
|
-
code: cmd,
|
|
748
|
-
result: 'failure',
|
|
749
|
-
observation: `Failed with error: ${errorMessage}. This selector likely doesn't exist or is incorrect.`,
|
|
750
|
-
error: errorMessage,
|
|
751
|
-
url: page.url(),
|
|
752
|
-
timestamp: Date.now()
|
|
753
|
-
});
|
|
754
|
-
|
|
755
|
-
if (i < limitedCommands.length - 1) {
|
|
756
|
-
this.logger?.(`[Orchestrator] ⚠ Skipping remaining ${limitedCommands.length - i - 1} command(s)`, 'warn');
|
|
757
|
-
}
|
|
758
|
-
|
|
759
|
-
break;
|
|
760
|
-
}
|
|
761
|
-
}
|
|
944
|
+
memory.history.push({
|
|
945
|
+
stepNumber,
|
|
946
|
+
iteration,
|
|
947
|
+
action: `Command - FAILED`,
|
|
948
|
+
code: commands[executed.length] || '',
|
|
949
|
+
result: 'failure',
|
|
950
|
+
observation: `Failed: ${errorMessage}`,
|
|
951
|
+
error: errorMessage,
|
|
952
|
+
url: page.url(),
|
|
953
|
+
timestamp: Date.now()
|
|
954
|
+
});
|
|
762
955
|
|
|
763
956
|
return { executed, allSucceeded: false };
|
|
764
957
|
}
|
|
765
958
|
}
|
|
766
|
-
|
|
767
|
-
/**
|
|
768
|
-
* Execute a single command
|
|
769
|
-
*/
|
|
770
|
-
private async executeCommand(cmd: string, page: any): Promise<void> {
|
|
771
|
-
// Wrap in async function and execute
|
|
772
|
-
const wrapped = `(async () => { ${cmd} })()`;
|
|
773
|
-
|
|
774
|
-
try {
|
|
775
|
-
await eval(wrapped);
|
|
776
|
-
} catch (error: any) {
|
|
777
|
-
// If eval fails, try direct execution with page context
|
|
778
|
-
// Pass both page and expect to make Playwright assertions available
|
|
779
|
-
const func = new Function('page', 'expect', `return (async () => { ${cmd} })()`);
|
|
780
|
-
await func(page, expect);
|
|
781
|
-
}
|
|
782
|
-
}
|
|
783
|
-
|
|
784
|
-
/**
|
|
785
|
-
* Log agent decision
|
|
786
|
-
*/
|
|
787
|
-
private logAgentDecision(decision: AgentDecision, iteration: number): void {
|
|
788
|
-
this.logger?.(`[Orchestrator] 💭 REASONING: ${decision.reasoning}`);
|
|
789
|
-
|
|
790
|
-
if (decision.selfReflection) {
|
|
791
|
-
this.logger?.(`[Orchestrator] 🧠 SELF-REFLECTION:`);
|
|
792
|
-
this.logger?.(`[Orchestrator] Next: ${decision.selfReflection.guidanceForNext}`);
|
|
793
|
-
if (decision.selfReflection.detectingLoop) {
|
|
794
|
-
this.logger?.(`[Orchestrator] 🔄 LOOP DETECTED: ${decision.selfReflection.loopReasoning}`, 'warn');
|
|
795
|
-
}
|
|
796
|
-
}
|
|
797
|
-
|
|
798
|
-
if (decision.toolCalls && decision.toolCalls.length > 0) {
|
|
799
|
-
this.logger?.(`[Orchestrator] 🔧 TOOLS: ${decision.toolCalls.map(t => t.name).join(', ')}`);
|
|
800
|
-
if (decision.toolReasoning) {
|
|
801
|
-
this.logger?.(`[Orchestrator] 📋 Why: ${decision.toolReasoning}`);
|
|
802
|
-
}
|
|
803
|
-
}
|
|
804
|
-
|
|
805
|
-
if (decision.blockerDetected) {
|
|
806
|
-
this.logger?.(`[Orchestrator] 🚧 BLOCKER: ${decision.blockerDetected.description}`, 'warn');
|
|
807
|
-
this.logger?.(`[Orchestrator] 🧹 Clearing with ${decision.blockerDetected.clearingCommands.length} command(s)`);
|
|
808
|
-
}
|
|
809
|
-
|
|
810
|
-
if (decision.stepReEvaluation?.detected) {
|
|
811
|
-
this.logger?.(`[Orchestrator] 🔍 STEP RE-EVALUATION: ${decision.stepReEvaluation.issue}`, 'warn');
|
|
812
|
-
this.logger?.(`[Orchestrator] 📝 Explanation: ${decision.stepReEvaluation.explanation}`);
|
|
813
|
-
}
|
|
814
|
-
|
|
815
|
-
if (decision.commands && decision.commands.length > 0) {
|
|
816
|
-
this.logger?.(`[Orchestrator] 📝 COMMANDS (${decision.commands.length}):`);
|
|
817
|
-
decision.commands.slice(0, 3).forEach((cmd, i) => {
|
|
818
|
-
this.logger?.(`[Orchestrator] ${i + 1}. ${cmd.substring(0, 80)}...`);
|
|
819
|
-
});
|
|
820
|
-
if (decision.commands.length > 3) {
|
|
821
|
-
this.logger?.(`[Orchestrator] ... and ${decision.commands.length - 3} more`);
|
|
822
|
-
}
|
|
823
|
-
if (decision.commandReasoning) {
|
|
824
|
-
this.logger?.(`[Orchestrator] 💡 Why: ${decision.commandReasoning}`);
|
|
825
|
-
}
|
|
826
|
-
}
|
|
827
|
-
|
|
828
|
-
// Experiences will be logged when added to memory, no need to log here
|
|
829
|
-
}
|
|
830
|
-
|
|
959
|
+
|
|
831
960
|
/**
|
|
832
961
|
* Report step progress
|
|
833
962
|
*/
|
|
@@ -857,6 +986,297 @@ ${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
|
|
|
857
986
|
agentStatus: decision.status
|
|
858
987
|
});
|
|
859
988
|
}
|
|
989
|
+
|
|
990
|
+
/**
|
|
991
|
+
* Execute exploration mode - agent autonomously explores to achieve journey goal
|
|
992
|
+
* Fires onStepProgress callbacks for each autonomous action (transparent to caller)
|
|
993
|
+
*/
|
|
994
|
+
async executeExploration(
|
|
995
|
+
page: any,
|
|
996
|
+
explorationConfig: ExplorationMode,
|
|
997
|
+
jobId: string
|
|
998
|
+
): Promise<OrchestratorStepResult> {
|
|
999
|
+
this.logger?.(`\n[Orchestrator] ========== EXPLORATION MODE ==========`);
|
|
1000
|
+
this.logger?.(`[Orchestrator] 🎯 Journey Goal: ${explorationConfig.explorationPrompt}`);
|
|
1001
|
+
if (explorationConfig.testDataPrompt) {
|
|
1002
|
+
this.logger?.(`[Orchestrator] 📋 Test Data: ${explorationConfig.testDataPrompt}`);
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
const memory: JourneyMemory = {
|
|
1006
|
+
history: [],
|
|
1007
|
+
experiences: [],
|
|
1008
|
+
extractedData: {}
|
|
1009
|
+
};
|
|
1010
|
+
|
|
1011
|
+
const maxSteps = explorationConfig.maxExplorationSteps || 50;
|
|
1012
|
+
let stepNumber = 0;
|
|
1013
|
+
const commandsExecuted: string[] = [];
|
|
1014
|
+
|
|
1015
|
+
while (stepNumber < maxSteps) {
|
|
1016
|
+
stepNumber++;
|
|
1017
|
+
|
|
1018
|
+
this.logger?.(`\n[Orchestrator] === Exploration Step ${stepNumber}/${maxSteps} ===`);
|
|
1019
|
+
|
|
1020
|
+
// Build exploratory context
|
|
1021
|
+
const context = await this.buildExploratoryContext(
|
|
1022
|
+
page,
|
|
1023
|
+
explorationConfig.explorationPrompt,
|
|
1024
|
+
explorationConfig.testDataPrompt,
|
|
1025
|
+
memory,
|
|
1026
|
+
stepNumber,
|
|
1027
|
+
maxSteps
|
|
1028
|
+
);
|
|
1029
|
+
|
|
1030
|
+
// Call agent with exploratory prompt
|
|
1031
|
+
const decision = await this.callExploratoryAgent(
|
|
1032
|
+
context,
|
|
1033
|
+
jobId,
|
|
1034
|
+
stepNumber
|
|
1035
|
+
);
|
|
1036
|
+
|
|
1037
|
+
this.decisionParser.log(decision, stepNumber);
|
|
1038
|
+
|
|
1039
|
+
// Report step start (fires JourneyRunner's beforeStepStart callback)
|
|
1040
|
+
if (this.progressReporter?.onStepProgress) {
|
|
1041
|
+
const stepInfo = {
|
|
1042
|
+
jobId,
|
|
1043
|
+
stepNumber,
|
|
1044
|
+
stepId: `exploration-${stepNumber}-${Date.now()}`,
|
|
1045
|
+
description: decision.reasoning,
|
|
1046
|
+
code: '', // Will be filled after commands execute
|
|
1047
|
+
status: StepExecutionStatus.IN_PROGRESS,
|
|
1048
|
+
wasRepaired: false
|
|
1049
|
+
};
|
|
1050
|
+
await this.progressReporter.onStepProgress(stepInfo);
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
// Execute tools if requested
|
|
1054
|
+
if (decision.toolCalls && decision.toolCalls.length > 0) {
|
|
1055
|
+
const toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
|
|
1056
|
+
|
|
1057
|
+
// If needs tool results, call agent again
|
|
1058
|
+
if (decision.needsToolResults) {
|
|
1059
|
+
const updatedContext = { ...context, toolResults };
|
|
1060
|
+
const continuedDecision = await this.callExploratoryAgent(updatedContext, jobId, stepNumber);
|
|
1061
|
+
|
|
1062
|
+
decision.commands = continuedDecision.commands || decision.commands;
|
|
1063
|
+
decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
|
|
1064
|
+
decision.status = continuedDecision.status;
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
// Handle blocker clearing
|
|
1069
|
+
if (decision.blockerDetected && decision.blockerDetected.clearingCommands) {
|
|
1070
|
+
this.logger?.(`[Orchestrator] 🚧 Clearing blocker: ${decision.blockerDetected.description}`);
|
|
1071
|
+
const blockerResult = await this.executeCommands(
|
|
1072
|
+
decision.blockerDetected.clearingCommands,
|
|
1073
|
+
page,
|
|
1074
|
+
memory,
|
|
1075
|
+
stepNumber,
|
|
1076
|
+
1,
|
|
1077
|
+
jobId
|
|
1078
|
+
);
|
|
1079
|
+
commandsExecuted.push(...blockerResult.executed);
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
// Execute exploration commands
|
|
1083
|
+
let commandsSucceeded = true;
|
|
1084
|
+
if (decision.commands && decision.commands.length > 0) {
|
|
1085
|
+
const executeResult = await this.executeCommands(
|
|
1086
|
+
decision.commands,
|
|
1087
|
+
page,
|
|
1088
|
+
memory,
|
|
1089
|
+
stepNumber,
|
|
1090
|
+
1,
|
|
1091
|
+
jobId
|
|
1092
|
+
);
|
|
1093
|
+
commandsExecuted.push(...executeResult.executed);
|
|
1094
|
+
commandsSucceeded = executeResult.allSucceeded;
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
// Report step completion (fires JourneyRunner's onStepComplete callback)
|
|
1098
|
+
if (this.progressReporter?.onStepProgress) {
|
|
1099
|
+
const stepInfo = {
|
|
1100
|
+
jobId,
|
|
1101
|
+
stepNumber,
|
|
1102
|
+
stepId: `exploration-${stepNumber}-${Date.now()}`,
|
|
1103
|
+
description: decision.reasoning,
|
|
1104
|
+
code: decision.commands?.join('\n') || '',
|
|
1105
|
+
status: commandsSucceeded ? StepExecutionStatus.SUCCESS : StepExecutionStatus.FAILURE,
|
|
1106
|
+
error: commandsSucceeded ? undefined : 'Command execution failed',
|
|
1107
|
+
wasRepaired: false
|
|
1108
|
+
};
|
|
1109
|
+
await this.progressReporter.onStepProgress(stepInfo);
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
// Add experiences (both app patterns AND exploration progress)
|
|
1113
|
+
if (decision.experiences) {
|
|
1114
|
+
memory.experiences.push(...decision.experiences);
|
|
1115
|
+
if (memory.experiences.length > this.config.maxExperiences) {
|
|
1116
|
+
memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
// Store note for next iteration
|
|
1121
|
+
if (decision.noteToFutureSelf) {
|
|
1122
|
+
memory.latestNote = {
|
|
1123
|
+
fromIteration: stepNumber,
|
|
1124
|
+
content: decision.noteToFutureSelf
|
|
1125
|
+
};
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
// Check termination
|
|
1129
|
+
if (decision.status === 'complete') {
|
|
1130
|
+
this.logger?.(`[Orchestrator] ✅ Journey exploration complete: ${decision.statusReasoning}`);
|
|
1131
|
+
return {
|
|
1132
|
+
success: true,
|
|
1133
|
+
commands: commandsExecuted,
|
|
1134
|
+
iterations: stepNumber,
|
|
1135
|
+
terminationReason: 'complete',
|
|
1136
|
+
memory
|
|
1137
|
+
};
|
|
1138
|
+
} else if (decision.status === 'stuck') {
|
|
1139
|
+
this.logger?.(`[Orchestrator] ❌ Exploration stuck: ${decision.statusReasoning}`);
|
|
1140
|
+
return {
|
|
1141
|
+
success: false,
|
|
1142
|
+
commands: commandsExecuted,
|
|
1143
|
+
iterations: stepNumber,
|
|
1144
|
+
terminationReason: 'agent_stuck',
|
|
1145
|
+
memory,
|
|
1146
|
+
error: decision.statusReasoning
|
|
1147
|
+
};
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
|
|
1151
|
+
// Hit max steps - not necessarily a failure
|
|
1152
|
+
this.logger?.(`[Orchestrator] ⚠ Maximum exploration steps reached (budget limit)`);
|
|
1153
|
+
return {
|
|
1154
|
+
success: true, // Not a failure - just budget limit
|
|
1155
|
+
commands: commandsExecuted,
|
|
1156
|
+
iterations: stepNumber,
|
|
1157
|
+
terminationReason: 'system_limit',
|
|
1158
|
+
memory
|
|
1159
|
+
};
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
private async buildExploratoryContext(
|
|
1163
|
+
page: any,
|
|
1164
|
+
explorationPrompt: string,
|
|
1165
|
+
testDataPrompt: string | undefined,
|
|
1166
|
+
memory: JourneyMemory,
|
|
1167
|
+
stepNumber: number,
|
|
1168
|
+
maxSteps: number
|
|
1169
|
+
): Promise<AgentContext> {
|
|
1170
|
+
// Wait for page to be ready and elements to appear (especially important after navigation)
|
|
1171
|
+
const currentPageInfo = await PageInfoRetry.getWithRetry(page);
|
|
1172
|
+
const currentURL = page.url();
|
|
1173
|
+
const recentSteps = memory.history.slice(-this.config.recentStepsCount);
|
|
1174
|
+
|
|
1175
|
+
// SoM integration for exploratory mode
|
|
1176
|
+
let somScreenshot: string | undefined = undefined;
|
|
1177
|
+
let somElementMap: string | undefined = undefined;
|
|
1178
|
+
if (this.config.useSoM && this.somHandler) {
|
|
1179
|
+
try {
|
|
1180
|
+
this.somHandler.setPage(page);
|
|
1181
|
+
|
|
1182
|
+
// Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
|
|
1183
|
+
try {
|
|
1184
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
|
|
1185
|
+
} catch (error: any) {
|
|
1186
|
+
// Page already loaded or timeout - continue
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
// Update SoM markers
|
|
1190
|
+
await this.somHandler.updateSom();
|
|
1191
|
+
somScreenshot = await this.somHandler.getScreenshot(true, false, 60); // Viewport only - agent can scroll or request full page
|
|
1192
|
+
|
|
1193
|
+
// Get element map for disambiguation
|
|
1194
|
+
somElementMap = this.somHandler.getSomElementMap();
|
|
1195
|
+
|
|
1196
|
+
this.logger?.(`[Orchestrator] SoM screenshot captured for exploratory agent`, 'log');
|
|
1197
|
+
} catch (error: any) {
|
|
1198
|
+
this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
|
|
1199
|
+
}
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
const context = {
|
|
1203
|
+
overallGoal: explorationPrompt,
|
|
1204
|
+
currentStepGoal: explorationPrompt, // Same as overall for single journey
|
|
1205
|
+
stepNumber,
|
|
1206
|
+
totalSteps: maxSteps,
|
|
1207
|
+
completedSteps: [],
|
|
1208
|
+
remainingSteps: [],
|
|
1209
|
+
currentPageInfo,
|
|
1210
|
+
currentURL,
|
|
1211
|
+
recentSteps,
|
|
1212
|
+
experiences: memory.experiences,
|
|
1213
|
+
extractedData: memory.extractedData,
|
|
1214
|
+
noteFromPreviousIteration: memory.latestNote,
|
|
1215
|
+
testDataPrompt, // CRITICAL: Store testDataPrompt in context
|
|
1216
|
+
somScreenshot, // SoM screenshot for exploratory mode (current)
|
|
1217
|
+
somElementMap // SoM element details for disambiguation
|
|
1218
|
+
};
|
|
1219
|
+
|
|
1220
|
+
// Save current screenshot as previous for next iteration (for tool access)
|
|
1221
|
+
if (somScreenshot) {
|
|
1222
|
+
this.previousSomScreenshot = somScreenshot;
|
|
1223
|
+
}
|
|
1224
|
+
|
|
1225
|
+
return context;
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
private async callExploratoryAgent(
|
|
1229
|
+
context: AgentContext,
|
|
1230
|
+
jobId: string,
|
|
1231
|
+
stepNumber: number
|
|
1232
|
+
): Promise<AgentDecision> {
|
|
1233
|
+
const toolDescriptions = this.toolRegistry.generateToolDescriptions();
|
|
1234
|
+
|
|
1235
|
+
// Use SoM system prompt if in SoM mode, otherwise use standard exploratory prompt
|
|
1236
|
+
const systemPrompt = this.config.useSoM
|
|
1237
|
+
? OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates)
|
|
1238
|
+
: OrchestratorPrompts.buildExploratorySystemPrompt(toolDescriptions);
|
|
1239
|
+
|
|
1240
|
+
const userPrompt = OrchestratorPrompts.buildExploratoryUserPrompt(
|
|
1241
|
+
context,
|
|
1242
|
+
context.overallGoal,
|
|
1243
|
+
context.testDataPrompt, // Pass testDataPrompt from context
|
|
1244
|
+
stepNumber,
|
|
1245
|
+
context.totalSteps
|
|
1246
|
+
);
|
|
1247
|
+
|
|
1248
|
+
const llmRequest: any = {
|
|
1249
|
+
model: DEFAULT_MODEL,
|
|
1250
|
+
systemPrompt,
|
|
1251
|
+
userPrompt
|
|
1252
|
+
};
|
|
1253
|
+
|
|
1254
|
+
// Include current SoM screenshot as image
|
|
1255
|
+
if (context.somScreenshot) {
|
|
1256
|
+
llmRequest.imageUrl = context.somScreenshot;
|
|
1257
|
+
this.logger?.(`[Orchestrator] Including SoM screenshot in exploratory LLM request`, 'log');
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
|
|
1261
|
+
|
|
1262
|
+
// Report token usage
|
|
1263
|
+
if (response.usage && this.progressReporter?.onTokensUsed) {
|
|
1264
|
+
await this.progressReporter.onTokensUsed({
|
|
1265
|
+
jobId,
|
|
1266
|
+
stepNumber,
|
|
1267
|
+
iteration: 1,
|
|
1268
|
+
inputTokens: response.usage.inputTokens,
|
|
1269
|
+
outputTokens: response.usage.outputTokens,
|
|
1270
|
+
includesImage: false,
|
|
1271
|
+
model: DEFAULT_MODEL,
|
|
1272
|
+
timestamp: Date.now()
|
|
1273
|
+
});
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
// Parse response (same JSON format as regular mode)
|
|
1277
|
+
const decision = this.decisionParser.parse(response.answer);
|
|
1278
|
+
return decision;
|
|
1279
|
+
}
|
|
860
1280
|
}
|
|
861
1281
|
|
|
862
1282
|
|