testchimp-runner-core 0.0.35 → 0.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +7 -4
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.js +73 -15
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
- package/dist/orchestrator/page-som-handler.d.ts +1 -2
- package/dist/orchestrator/page-som-handler.d.ts.map +1 -1
- package/dist/orchestrator/page-som-handler.js +51 -25
- package/dist/orchestrator/page-som-handler.js.map +1 -1
- package/package.json +6 -1
- package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
- package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
- package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
- package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
- package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
- package/plandocs/INTEGRATION_COMPLETE.md +0 -322
- package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
- package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
- package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
- package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
- package/plandocs/PHASE_1_COMPLETE.md +0 -165
- package/plandocs/PHASE_1_SUMMARY.md +0 -184
- package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
- package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
- package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
- package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
- package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
- package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
- package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
- package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
- package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
- package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
- package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
- package/plandocs/exploratory-mode-support-v2.plan.md +0 -953
- package/plandocs/exploratory-mode-support.plan.md +0 -928
- package/plandocs/journey-id-tracking-addendum.md +0 -227
- package/releasenotes/RELEASE_0.0.26.md +0 -165
- package/releasenotes/RELEASE_0.0.27.md +0 -236
- package/releasenotes/RELEASE_0.0.28.md +0 -286
- package/src/auth-config.ts +0 -84
- package/src/credit-usage-service.ts +0 -188
- package/src/env-loader.ts +0 -103
- package/src/execution-service.ts +0 -996
- package/src/file-handler.ts +0 -104
- package/src/index.ts +0 -432
- package/src/llm-facade.ts +0 -821
- package/src/llm-provider.ts +0 -53
- package/src/model-constants.ts +0 -35
- package/src/orchestrator/decision-parser.ts +0 -139
- package/src/orchestrator/index.ts +0 -58
- package/src/orchestrator/orchestrator-agent.ts +0 -1282
- package/src/orchestrator/orchestrator-prompts.ts +0 -786
- package/src/orchestrator/page-som-handler.ts +0 -1565
- package/src/orchestrator/som-types.ts +0 -188
- package/src/orchestrator/tool-registry.ts +0 -184
- package/src/orchestrator/tools/check-page-ready.ts +0 -75
- package/src/orchestrator/tools/extract-data.ts +0 -92
- package/src/orchestrator/tools/index.ts +0 -15
- package/src/orchestrator/tools/inspect-page.ts +0 -42
- package/src/orchestrator/tools/recall-history.ts +0 -72
- package/src/orchestrator/tools/refresh-som-markers.ts +0 -69
- package/src/orchestrator/tools/take-screenshot.ts +0 -128
- package/src/orchestrator/tools/verify-action-result.ts +0 -159
- package/src/orchestrator/tools/view-previous-screenshot.ts +0 -103
- package/src/orchestrator/types.ts +0 -291
- package/src/playwright-mcp-service.ts +0 -224
- package/src/progress-reporter.ts +0 -144
- package/src/prompts.ts +0 -842
- package/src/providers/backend-proxy-llm-provider.ts +0 -91
- package/src/providers/local-llm-provider.ts +0 -38
- package/src/scenario-service.ts +0 -252
- package/src/scenario-worker-class.ts +0 -1110
- package/src/script-utils.ts +0 -203
- package/src/types.ts +0 -239
- package/src/utils/browser-utils.ts +0 -348
- package/src/utils/coordinate-converter.ts +0 -162
- package/src/utils/page-info-retry.ts +0 -65
- package/src/utils/page-info-utils.ts +0 -285
- package/testchimp-runner-core-0.0.35.tgz +0 -0
- package/tsconfig.json +0 -19
|
@@ -1,1282 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Orchestrator Agent
|
|
3
|
-
* Single agent with tool-use capabilities that maintains journey memory and executes scenarios
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { expect } from '@playwright/test';
|
|
7
|
-
import { LLMFacade } from '../llm-facade';
|
|
8
|
-
import { ProgressReporter, TokenUsage, StepExecutionStatus } from '../progress-reporter';
|
|
9
|
-
import { getEnhancedPageInfo, PageInfo } from '../utils/page-info-utils';
|
|
10
|
-
import { CoordinateConverter } from '../utils/coordinate-converter';
|
|
11
|
-
import { ToolRegistry, ToolExecutionContext } from './tool-registry';
|
|
12
|
-
import { DEFAULT_MODEL } from '../model-constants';
|
|
13
|
-
import {
|
|
14
|
-
AgentConfig,
|
|
15
|
-
AgentContext,
|
|
16
|
-
AgentDecision,
|
|
17
|
-
JourneyMemory,
|
|
18
|
-
MemoryStep,
|
|
19
|
-
OrchestratorStepResult,
|
|
20
|
-
SelfReflection,
|
|
21
|
-
NoteToFutureSelf,
|
|
22
|
-
CoordinateAction,
|
|
23
|
-
ExplorationMode,
|
|
24
|
-
DEFAULT_AGENT_CONFIG
|
|
25
|
-
} from './types';
|
|
26
|
-
import { OrchestratorPrompts } from './orchestrator-prompts';
|
|
27
|
-
import { PageInfoRetry } from '../utils/page-info-retry';
|
|
28
|
-
import { DecisionParser } from './decision-parser';
|
|
29
|
-
import { PageSoMHandler } from './page-som-handler';
|
|
30
|
-
import { SomCommand, CommandRunStatus, InteractionAction, isSomVerification, isSomCommand, SomVerification } from './som-types';
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Orchestrator Agent - manages step execution with tool use and memory
|
|
34
|
-
*/
|
|
35
|
-
export class OrchestratorAgent {
|
|
36
|
-
private llmFacade: LLMFacade;
|
|
37
|
-
private toolRegistry: ToolRegistry;
|
|
38
|
-
private progressReporter?: ProgressReporter;
|
|
39
|
-
private config: Required<AgentConfig>;
|
|
40
|
-
private logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void;
|
|
41
|
-
private debugMode: boolean = false;
|
|
42
|
-
private decisionParser: DecisionParser;
|
|
43
|
-
private somHandler?: PageSoMHandler;
|
|
44
|
-
private previousSomScreenshot?: string; // Track previous iteration's screenshot
|
|
45
|
-
|
|
46
|
-
constructor(
|
|
47
|
-
llmFacade: LLMFacade,
|
|
48
|
-
toolRegistry: ToolRegistry,
|
|
49
|
-
config?: Partial<AgentConfig>,
|
|
50
|
-
progressReporter?: ProgressReporter,
|
|
51
|
-
logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void,
|
|
52
|
-
debugMode?: boolean
|
|
53
|
-
) {
|
|
54
|
-
this.llmFacade = llmFacade;
|
|
55
|
-
this.toolRegistry = toolRegistry;
|
|
56
|
-
this.config = { ...DEFAULT_AGENT_CONFIG, ...config };
|
|
57
|
-
this.progressReporter = progressReporter;
|
|
58
|
-
this.logger = logger;
|
|
59
|
-
this.debugMode = debugMode || false;
|
|
60
|
-
this.decisionParser = new DecisionParser(logger);
|
|
61
|
-
|
|
62
|
-
// Initialize SoM handler if enabled
|
|
63
|
-
if (this.config.useSoM) {
|
|
64
|
-
this.somHandler = new PageSoMHandler(null as any, this.logger);
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
setDebugMode(enabled: boolean): void {
|
|
69
|
-
this.debugMode = enabled;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/**
|
|
73
|
-
* Execute a single step of the scenario
|
|
74
|
-
*/
|
|
75
|
-
async executeStep(
|
|
76
|
-
page: any,
|
|
77
|
-
stepDescription: string,
|
|
78
|
-
stepNumber: number,
|
|
79
|
-
totalSteps: number,
|
|
80
|
-
scenarioSteps: string[],
|
|
81
|
-
memory: JourneyMemory,
|
|
82
|
-
jobId: string,
|
|
83
|
-
priorSteps?: string[], // NEW: For repair mode (undefined for script gen)
|
|
84
|
-
nextSteps?: string[] // NEW: For repair mode (undefined for script gen)
|
|
85
|
-
): Promise<OrchestratorStepResult> {
|
|
86
|
-
this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
|
|
87
|
-
this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
|
|
88
|
-
|
|
89
|
-
let iteration = 0;
|
|
90
|
-
let noteToSelf: NoteToFutureSelf | undefined = memory.latestNote; // Start with note from previous step
|
|
91
|
-
const commandsExecuted: string[] = [];
|
|
92
|
-
let consecutiveFailures = 0; // Track consecutive iterations with failed commands
|
|
93
|
-
let coordinateAttempts = 0; // Track coordinate mode attempts (max 2)
|
|
94
|
-
|
|
95
|
-
while (iteration < this.config.maxIterationsPerStep) {
|
|
96
|
-
iteration++;
|
|
97
|
-
|
|
98
|
-
this.logger?.(`\n[Orchestrator] === Iteration ${iteration}/${this.config.maxIterationsPerStep} ===`);
|
|
99
|
-
|
|
100
|
-
// Build context for agent
|
|
101
|
-
const context = await this.buildAgentContext(
|
|
102
|
-
page,
|
|
103
|
-
stepDescription,
|
|
104
|
-
stepNumber,
|
|
105
|
-
totalSteps,
|
|
106
|
-
scenarioSteps,
|
|
107
|
-
memory,
|
|
108
|
-
consecutiveFailures,
|
|
109
|
-
noteToSelf, // Pass note from previous iteration
|
|
110
|
-
priorSteps, // NEW: Pass repair context
|
|
111
|
-
nextSteps // NEW: Pass repair context
|
|
112
|
-
);
|
|
113
|
-
|
|
114
|
-
// Call agent to make decision
|
|
115
|
-
const decision = await this.callAgent(
|
|
116
|
-
context,
|
|
117
|
-
jobId,
|
|
118
|
-
stepNumber,
|
|
119
|
-
iteration,
|
|
120
|
-
consecutiveFailures
|
|
121
|
-
);
|
|
122
|
-
|
|
123
|
-
// Log agent's reasoning
|
|
124
|
-
this.decisionParser.log(decision, iteration);
|
|
125
|
-
|
|
126
|
-
// Report progress
|
|
127
|
-
await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
|
|
128
|
-
|
|
129
|
-
// Execute tools if requested (tools are READ-ONLY, they don't change state)
|
|
130
|
-
let toolResults: Record<string, any> = {};
|
|
131
|
-
|
|
132
|
-
// ANTI-LOOP: Detect and BLOCK screenshot loops (PER STEP)
|
|
133
|
-
const screenshotsThisStep = memory.history.filter(s =>
|
|
134
|
-
s.stepNumber === stepNumber &&
|
|
135
|
-
(s.code.includes('take_screenshot') || s.action.includes('Screenshot'))
|
|
136
|
-
);
|
|
137
|
-
const recentScreenshots = memory.history.slice(-3).filter(s =>
|
|
138
|
-
s.code.includes('take_screenshot') || s.action.includes('Screenshot')
|
|
139
|
-
);
|
|
140
|
-
|
|
141
|
-
if (screenshotsThisStep.length >= 3) {
|
|
142
|
-
this.logger?.(`[Orchestrator] 🚨 SCREENSHOT LOOP - ${screenshotsThisStep.length} screenshots THIS STEP! BLOCKING further screenshots`, 'error');
|
|
143
|
-
} else if (recentScreenshots.length >= 2 && iteration >= 3) {
|
|
144
|
-
this.logger?.(`[Orchestrator] ⚠️ WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
if (decision.toolCalls && decision.toolCalls.length > 0) {
|
|
148
|
-
// ENFORCE: Block screenshot tool calls if too many taken IN THIS STEP
|
|
149
|
-
if (screenshotsThisStep.length >= 3) {
|
|
150
|
-
decision.toolCalls = decision.toolCalls.filter(tc => tc.name !== 'take_screenshot');
|
|
151
|
-
if (decision.toolCalls.length === 0) {
|
|
152
|
-
this.logger?.(`[Orchestrator] 🚫 REJECTED screenshot tool call - loop detected. Agent must ACT.`, 'warn');
|
|
153
|
-
toolResults = [{
|
|
154
|
-
toolName: 'take_screenshot',
|
|
155
|
-
success: false,
|
|
156
|
-
error: 'SYSTEM BLOCKED: Too many screenshots taken. You must use existing DOM snapshots and execute commands now. Analysis paralysis detected.',
|
|
157
|
-
data: null
|
|
158
|
-
}];
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
if (decision.toolCalls.length > 0) {
|
|
163
|
-
toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber, context.currentPageInfo.refMap);
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
// If agent wants to wait for tool results before proceeding, call agent again
|
|
167
|
-
if (decision.needsToolResults) {
|
|
168
|
-
const updatedContext = { ...context, toolResults };
|
|
169
|
-
const continuedDecision = await this.callAgent(updatedContext, jobId, stepNumber, iteration, consecutiveFailures);
|
|
170
|
-
|
|
171
|
-
// Merge continued decision
|
|
172
|
-
decision.commands = continuedDecision.commands || decision.commands;
|
|
173
|
-
decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
|
|
174
|
-
decision.status = continuedDecision.status;
|
|
175
|
-
decision.statusReasoning = continuedDecision.statusReasoning;
|
|
176
|
-
decision.reasoning = continuedDecision.reasoning;
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
// Execute commands sequentially
|
|
181
|
-
let iterationHadFailure = false;
|
|
182
|
-
|
|
183
|
-
// Handle blocker if detected (clear blocker FIRST, then proceed with main commands)
|
|
184
|
-
if (decision.blockerDetected && decision.blockerDetected.clearingCommands && decision.blockerDetected.clearingCommands.length > 0) {
|
|
185
|
-
this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
|
|
186
|
-
this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
|
|
187
|
-
|
|
188
|
-
const blockerResult = await this.executeCommands(
|
|
189
|
-
decision.blockerDetected.clearingCommands,
|
|
190
|
-
page,
|
|
191
|
-
memory,
|
|
192
|
-
stepNumber,
|
|
193
|
-
iteration,
|
|
194
|
-
jobId
|
|
195
|
-
);
|
|
196
|
-
|
|
197
|
-
// Add blocker commands with comment to output
|
|
198
|
-
if (blockerResult.executed.length > 0) {
|
|
199
|
-
commandsExecuted.push(`// Blocker: ${decision.blockerDetected.description}`);
|
|
200
|
-
commandsExecuted.push(...blockerResult.executed);
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
// If blocker clearing failed, track it
|
|
204
|
-
if (!blockerResult.allSucceeded) {
|
|
205
|
-
this.logger?.(`[Orchestrator] ❌ Failed to clear blocker - continuing anyway`);
|
|
206
|
-
consecutiveFailures++;
|
|
207
|
-
iterationHadFailure = true;
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
// Execute main commands (only if no blocker failure)
|
|
212
|
-
if (!iterationHadFailure && decision.commands && decision.commands.length > 0) {
|
|
213
|
-
const executeResult = await this.executeCommands(
|
|
214
|
-
decision.commands,
|
|
215
|
-
page,
|
|
216
|
-
memory,
|
|
217
|
-
stepNumber,
|
|
218
|
-
iteration,
|
|
219
|
-
jobId
|
|
220
|
-
);
|
|
221
|
-
|
|
222
|
-
commandsExecuted.push(...executeResult.executed);
|
|
223
|
-
|
|
224
|
-
// Track failures
|
|
225
|
-
if (!executeResult.allSucceeded) {
|
|
226
|
-
this.logger?.(`[Orchestrator] ⚠ Command execution stopped at failure`);
|
|
227
|
-
consecutiveFailures++;
|
|
228
|
-
iterationHadFailure = true;
|
|
229
|
-
} else {
|
|
230
|
-
consecutiveFailures = 0; // Reset on success
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// Handle coordinate-based actions (NEW - fallback when selectors fail) - ONLY if enabled
|
|
235
|
-
if (this.config.enableCoordinateMode && decision.coordinateAction && !iterationHadFailure) {
|
|
236
|
-
coordinateAttempts++;
|
|
237
|
-
|
|
238
|
-
this.logger?.(`[Orchestrator] 🎯 Coordinate Action (attempt ${coordinateAttempts}/2): ${decision.coordinateAction.action} at (${decision.coordinateAction.xPercent}%, ${decision.coordinateAction.yPercent}%)`);
|
|
239
|
-
|
|
240
|
-
try {
|
|
241
|
-
// BEFORE screenshot for visual verification
|
|
242
|
-
const beforeScreenshot = await page.screenshot({ encoding: 'base64', fullPage: false, type: 'jpeg', quality: 60 });
|
|
243
|
-
const beforeDataUrl = `data:image/jpeg;base64,${beforeScreenshot}`;
|
|
244
|
-
|
|
245
|
-
// Generate Playwright commands from coordinate action
|
|
246
|
-
const coordCommands = await CoordinateConverter.generateCommands(decision.coordinateAction, page);
|
|
247
|
-
|
|
248
|
-
this.logger?.(`[Orchestrator] Generated commands:`);
|
|
249
|
-
coordCommands.forEach(cmd => this.logger?.(` ${cmd}`));
|
|
250
|
-
|
|
251
|
-
// Execute coordinate commands
|
|
252
|
-
const coordResult = await this.executeCommands(
|
|
253
|
-
coordCommands,
|
|
254
|
-
page,
|
|
255
|
-
memory,
|
|
256
|
-
stepNumber,
|
|
257
|
-
iteration,
|
|
258
|
-
jobId
|
|
259
|
-
);
|
|
260
|
-
|
|
261
|
-
commandsExecuted.push(...coordResult.executed);
|
|
262
|
-
|
|
263
|
-
if (!coordResult.allSucceeded) {
|
|
264
|
-
this.logger?.(`[Orchestrator] ❌ Coordinate action failed (Playwright error)`);
|
|
265
|
-
consecutiveFailures++;
|
|
266
|
-
iterationHadFailure = true;
|
|
267
|
-
|
|
268
|
-
// Give up after 2 coordinate attempts
|
|
269
|
-
if (coordinateAttempts >= 2) {
|
|
270
|
-
this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts) - marking stuck`);
|
|
271
|
-
return {
|
|
272
|
-
success: false,
|
|
273
|
-
commands: commandsExecuted,
|
|
274
|
-
iterations: iteration,
|
|
275
|
-
terminationReason: 'agent_stuck',
|
|
276
|
-
memory,
|
|
277
|
-
error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
|
|
278
|
-
};
|
|
279
|
-
}
|
|
280
|
-
} else {
|
|
281
|
-
this.logger?.(`[Orchestrator] ✅ Coordinate action succeeded (no Playwright error)`);
|
|
282
|
-
|
|
283
|
-
// CRITICAL: Verify visually if goal was achieved (coordinates might have clicked wrong place)
|
|
284
|
-
// Wait for network idle (smarter than fixed timeout - waits as needed, max 10s)
|
|
285
|
-
try {
|
|
286
|
-
await page.waitForLoadState('networkidle', { timeout: 10000 });
|
|
287
|
-
} catch (e) {
|
|
288
|
-
// Network idle timeout - page may still be loading, but proceed with verification
|
|
289
|
-
this.logger?.(`[Orchestrator] ⚠️ Network idle timeout after 10s, proceeding with verification`, 'warn');
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
const afterScreenshot = await page.screenshot({ encoding: 'base64', fullPage: false, type: 'jpeg', quality: 60 });
|
|
293
|
-
const afterDataUrl = `data:image/jpeg;base64,${afterScreenshot}`;
|
|
294
|
-
|
|
295
|
-
this.logger?.(`[Orchestrator] 📸 Verifying coordinate action visually...`);
|
|
296
|
-
|
|
297
|
-
const verificationRequest = {
|
|
298
|
-
model: 'gpt-5-mini',
|
|
299
|
-
systemPrompt: 'You are a visual verification expert for web automation. Compare before/after screenshots to determine if an action achieved its goal.',
|
|
300
|
-
userPrompt: `Goal: ${scenarioSteps[stepNumber - 1]}\n\nA coordinate-based action was just executed. Compare the BEFORE and AFTER screenshots.\n\nDid the action achieve the goal? Respond with JSON:\n{\n "verified": boolean,\n "reasoning": "What changed (or didn't change) between screenshots",\n "visibleChanges": ["List of UI changes observed"],\n "clickedWrongPlace": boolean\n}\n\nBe strict: Only return verified=true if you clearly see the expected change matching the goal.`,
|
|
301
|
-
images: [
|
|
302
|
-
{ label: 'BEFORE', dataUrl: beforeDataUrl },
|
|
303
|
-
{ label: 'AFTER', dataUrl: afterDataUrl }
|
|
304
|
-
]
|
|
305
|
-
};
|
|
306
|
-
|
|
307
|
-
const verificationResponse = await this.llmFacade.llmProvider.callLLM(verificationRequest);
|
|
308
|
-
const jsonMatch = verificationResponse.answer.match(/\{[\s\S]*\}/);
|
|
309
|
-
|
|
310
|
-
if (jsonMatch) {
|
|
311
|
-
const verificationResult = JSON.parse(jsonMatch[0]);
|
|
312
|
-
const verified = verificationResult.verified === true;
|
|
313
|
-
const reasoning = verificationResult.reasoning || 'No reasoning provided';
|
|
314
|
-
const clickedWrongPlace = verificationResult.clickedWrongPlace === true;
|
|
315
|
-
|
|
316
|
-
this.logger?.(`[Orchestrator] 📊 Visual verification: ${verified ? '✅ VERIFIED' : '❌ NOT VERIFIED'}`);
|
|
317
|
-
this.logger?.(`[Orchestrator] 💭 Reasoning: ${reasoning}`);
|
|
318
|
-
|
|
319
|
-
if (verified) {
|
|
320
|
-
// Goal achieved! Can mark complete
|
|
321
|
-
consecutiveFailures = 0;
|
|
322
|
-
|
|
323
|
-
// Store note for context
|
|
324
|
-
noteToSelf = {
|
|
325
|
-
fromIteration: iteration,
|
|
326
|
-
content: `Coordinate action verified successful: ${reasoning}`
|
|
327
|
-
};
|
|
328
|
-
memory.latestNote = noteToSelf;
|
|
329
|
-
} else {
|
|
330
|
-
// Goal NOT achieved despite no error
|
|
331
|
-
this.logger?.(`[Orchestrator] ⚠️ Coordinate click succeeded but goal NOT achieved`, 'warn');
|
|
332
|
-
consecutiveFailures++;
|
|
333
|
-
iterationHadFailure = true;
|
|
334
|
-
|
|
335
|
-
// Store diagnostic note for next attempt
|
|
336
|
-
const diagnostic = clickedWrongPlace
|
|
337
|
-
? `Clicked wrong place. ${reasoning}. Try different coordinates.`
|
|
338
|
-
: `Action executed but goal not achieved. ${reasoning}. May need different approach.`;
|
|
339
|
-
|
|
340
|
-
noteToSelf = {
|
|
341
|
-
fromIteration: iteration,
|
|
342
|
-
content: diagnostic
|
|
343
|
-
};
|
|
344
|
-
memory.latestNote = noteToSelf;
|
|
345
|
-
|
|
346
|
-
// Give up after 2 coordinate attempts
|
|
347
|
-
if (coordinateAttempts >= 2) {
|
|
348
|
-
this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts, none achieved goal) - marking stuck`);
|
|
349
|
-
return {
|
|
350
|
-
success: false,
|
|
351
|
-
commands: commandsExecuted,
|
|
352
|
-
iterations: iteration,
|
|
353
|
-
terminationReason: 'agent_stuck',
|
|
354
|
-
memory,
|
|
355
|
-
error: `Coordinate actions clicked but didn't achieve goal: ${reasoning}`
|
|
356
|
-
};
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
} else {
|
|
360
|
-
this.logger?.(`[Orchestrator] ⚠️ Could not parse verification response - treating as unverified`, 'warn');
|
|
361
|
-
consecutiveFailures++;
|
|
362
|
-
iterationHadFailure = true;
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
|
|
366
|
-
} catch (error: any) {
|
|
367
|
-
this.logger?.(`[Orchestrator] ❌ Coordinate action error: ${error.message}`, 'error');
|
|
368
|
-
consecutiveFailures++;
|
|
369
|
-
iterationHadFailure = true;
|
|
370
|
-
|
|
371
|
-
// Give up after 2 coordinate attempts
|
|
372
|
-
if (coordinateAttempts >= 2) {
|
|
373
|
-
this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts) - marking stuck`);
|
|
374
|
-
return {
|
|
375
|
-
success: false,
|
|
376
|
-
commands: commandsExecuted,
|
|
377
|
-
iterations: iteration,
|
|
378
|
-
terminationReason: 'agent_stuck',
|
|
379
|
-
memory,
|
|
380
|
-
error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
|
|
381
|
-
};
|
|
382
|
-
}
|
|
383
|
-
}
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
// System-enforced stuck detection (agent might not detect it)
|
|
387
|
-
// Allow 5 failures: 3 selector attempts + 2 coordinate attempts
|
|
388
|
-
if (consecutiveFailures >= 5) {
|
|
389
|
-
this.logger?.(`[Orchestrator] 🛑 SYSTEM: ${consecutiveFailures} consecutive failures detected - forcing stuck`, 'warn');
|
|
390
|
-
return {
|
|
391
|
-
success: false,
|
|
392
|
-
commands: commandsExecuted,
|
|
393
|
-
iterations: iteration,
|
|
394
|
-
terminationReason: 'agent_stuck',
|
|
395
|
-
memory,
|
|
396
|
-
error: `Failed ${consecutiveFailures} iterations in a row - unable to proceed`
|
|
397
|
-
};
|
|
398
|
-
}
|
|
399
|
-
|
|
400
|
-
// Update memory with experiences
|
|
401
|
-
if (decision.experiences && decision.experiences.length > 0) {
|
|
402
|
-
for (const exp of decision.experiences) {
|
|
403
|
-
// Deduplicate - don't add if very similar experience exists
|
|
404
|
-
const exists = memory.experiences.some(existing =>
|
|
405
|
-
existing.toLowerCase().includes(exp.toLowerCase()) ||
|
|
406
|
-
exp.toLowerCase().includes(existing.toLowerCase())
|
|
407
|
-
);
|
|
408
|
-
|
|
409
|
-
if (!exists) {
|
|
410
|
-
memory.experiences.push(exp);
|
|
411
|
-
this.logger?.(`[Orchestrator] 📚 Experience: ${exp}`);
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
// Cap experiences
|
|
416
|
-
if (memory.experiences.length > this.config.maxExperiences) {
|
|
417
|
-
memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
|
|
418
|
-
}
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
// Store note to future self (tactical memory across iterations AND steps)
|
|
422
|
-
if (decision.noteToFutureSelf) {
|
|
423
|
-
noteToSelf = {
|
|
424
|
-
fromIteration: iteration,
|
|
425
|
-
content: decision.noteToFutureSelf
|
|
426
|
-
};
|
|
427
|
-
memory.latestNote = noteToSelf; // Persist in journey memory across steps
|
|
428
|
-
this.logger?.(`[Orchestrator] 📝 Note to self: ${decision.noteToFutureSelf}`);
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
// Check termination
|
|
432
|
-
if (decision.status !== 'continue') {
|
|
433
|
-
this.logger?.(`[Orchestrator] 🎯 Status: ${decision.status}`);
|
|
434
|
-
this.logger?.(`[Orchestrator] 💭 Reason: ${decision.statusReasoning}`);
|
|
435
|
-
|
|
436
|
-
// SAFETY CHECK: Don't allow "complete" if commands failed this iteration
|
|
437
|
-
if (decision.status === 'complete' && iterationHadFailure) {
|
|
438
|
-
this.logger?.(`[Orchestrator] ⚠️ OVERRIDE: Agent said "complete" but commands FAILED - forcing "continue"`, 'warn');
|
|
439
|
-
this.logger?.(`[Orchestrator] 💭 System: Commands must succeed before marking complete`);
|
|
440
|
-
// Don't return - continue to next iteration
|
|
441
|
-
} else {
|
|
442
|
-
// Valid termination
|
|
443
|
-
return {
|
|
444
|
-
success: decision.status === 'complete',
|
|
445
|
-
commands: commandsExecuted,
|
|
446
|
-
iterations: iteration,
|
|
447
|
-
terminationReason: decision.status === 'complete' ? 'complete' :
|
|
448
|
-
decision.status === 'stuck' ? 'agent_stuck' :
|
|
449
|
-
'infeasible',
|
|
450
|
-
memory
|
|
451
|
-
};
|
|
452
|
-
}
|
|
453
|
-
}
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
// Hit iteration limit
|
|
457
|
-
this.logger?.(`[Orchestrator] ⚠ Maximum iterations reached (${this.config.maxIterationsPerStep})`);
|
|
458
|
-
|
|
459
|
-
return {
|
|
460
|
-
success: false,
|
|
461
|
-
commands: commandsExecuted,
|
|
462
|
-
iterations: iteration,
|
|
463
|
-
terminationReason: 'system_limit',
|
|
464
|
-
memory,
|
|
465
|
-
error: 'Maximum iterations reached'
|
|
466
|
-
};
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
/**
|
|
470
|
-
* Build context for agent
|
|
471
|
-
*/
|
|
472
|
-
private async buildAgentContext(
|
|
473
|
-
page: any,
|
|
474
|
-
currentStepGoal: string,
|
|
475
|
-
stepNumber: number,
|
|
476
|
-
totalSteps: number,
|
|
477
|
-
scenarioSteps: string[],
|
|
478
|
-
memory: JourneyMemory,
|
|
479
|
-
consecutiveFailures?: number,
|
|
480
|
-
noteFromPreviousIteration?: NoteToFutureSelf,
|
|
481
|
-
priorSteps?: string[], // NEW: For repair mode
|
|
482
|
-
nextSteps?: string[] // NEW: For repair mode
|
|
483
|
-
): Promise<AgentContext> {
|
|
484
|
-
// Get fresh DOM
|
|
485
|
-
const currentPageInfo = await getEnhancedPageInfo(page);
|
|
486
|
-
const currentURL = page.url();
|
|
487
|
-
|
|
488
|
-
// Get recent steps
|
|
489
|
-
const recentSteps = memory.history.slice(-this.config.recentStepsCount);
|
|
490
|
-
|
|
491
|
-
// SoM integration: Update markers and capture screenshot with visual IDs
|
|
492
|
-
let somScreenshot: string | undefined = undefined;
|
|
493
|
-
let somElementMap: string | undefined = undefined;
|
|
494
|
-
if (this.config.useSoM && this.somHandler) {
|
|
495
|
-
try {
|
|
496
|
-
if (!this.somHandler) {
|
|
497
|
-
this.somHandler = new PageSoMHandler(page, this.logger);
|
|
498
|
-
} else {
|
|
499
|
-
this.somHandler.setPage(page);
|
|
500
|
-
}
|
|
501
|
-
|
|
502
|
-
// Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
|
|
503
|
-
try {
|
|
504
|
-
await page.waitForLoadState('domcontentloaded', { timeout: 5000 });
|
|
505
|
-
} catch (error: any) {
|
|
506
|
-
// Page already loaded or timeout - continue
|
|
507
|
-
}
|
|
508
|
-
|
|
509
|
-
// Update SoM markers
|
|
510
|
-
await this.somHandler.updateSom();
|
|
511
|
-
|
|
512
|
-
// Get screenshot WITH markers (viewport only - agent can scroll or use take_screenshot for full page)
|
|
513
|
-
somScreenshot = await this.somHandler.getScreenshot(true, false, 60);
|
|
514
|
-
|
|
515
|
-
// Get element map for disambiguation
|
|
516
|
-
somElementMap = this.somHandler.getSomElementMap();
|
|
517
|
-
|
|
518
|
-
this.logger?.(`[Orchestrator] SoM screenshot captured for agent decision-making`, 'log');
|
|
519
|
-
} catch (error: any) {
|
|
520
|
-
this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
|
|
521
|
-
}
|
|
522
|
-
}
|
|
523
|
-
|
|
524
|
-
// Build context
|
|
525
|
-
const context = {
|
|
526
|
-
overallGoal: scenarioSteps.join('\n'),
|
|
527
|
-
currentStepGoal,
|
|
528
|
-
stepNumber,
|
|
529
|
-
totalSteps,
|
|
530
|
-
completedSteps: scenarioSteps.slice(0, stepNumber - 1),
|
|
531
|
-
remainingSteps: scenarioSteps.slice(stepNumber),
|
|
532
|
-
currentPageInfo,
|
|
533
|
-
currentURL,
|
|
534
|
-
recentSteps,
|
|
535
|
-
experiences: memory.experiences,
|
|
536
|
-
extractedData: memory.extractedData,
|
|
537
|
-
noteFromPreviousIteration, // Pass tactical note from previous iteration
|
|
538
|
-
somScreenshot, // SoM screenshot with visual markers (current)
|
|
539
|
-
somElementMap, // SoM element details for disambiguation
|
|
540
|
-
priorSteps, // NEW: Repair context (undefined for script gen)
|
|
541
|
-
nextSteps // NEW: Repair context (undefined for script gen)
|
|
542
|
-
};
|
|
543
|
-
|
|
544
|
-
// Save current screenshot as previous for next iteration (for tool access)
|
|
545
|
-
if (somScreenshot) {
|
|
546
|
-
this.previousSomScreenshot = somScreenshot;
|
|
547
|
-
}
|
|
548
|
-
|
|
549
|
-
return context;
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
/**
|
|
553
|
-
* Call agent to make decision
|
|
554
|
-
*/
|
|
555
|
-
private async callAgent(
|
|
556
|
-
context: AgentContext,
|
|
557
|
-
jobId: string,
|
|
558
|
-
stepNumber: number,
|
|
559
|
-
iteration: number,
|
|
560
|
-
consecutiveFailures?: number
|
|
561
|
-
): Promise<AgentDecision> {
|
|
562
|
-
// Detect if coordinate mode should be activated
|
|
563
|
-
// Phase 1: Only 2 tiers (selectors → coordinates), so activate after 3 failures
|
|
564
|
-
// Phase 2: Will have 3 tiers (selectors → index → coordinates), threshold will be 5
|
|
565
|
-
const useCoordinateMode = consecutiveFailures !== undefined && consecutiveFailures >= 3;
|
|
566
|
-
|
|
567
|
-
// Build appropriate system prompt based on mode
|
|
568
|
-
const toolDescriptions = this.toolRegistry.generateToolDescriptions();
|
|
569
|
-
let systemPrompt: string;
|
|
570
|
-
|
|
571
|
-
if (this.config.useSoM) {
|
|
572
|
-
// SoM mode: Use visual element identification
|
|
573
|
-
systemPrompt = OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates);
|
|
574
|
-
} else if (useCoordinateMode) {
|
|
575
|
-
// Coordinate mode: Fallback when selectors fail
|
|
576
|
-
systemPrompt = OrchestratorPrompts.buildCoordinateSystemPrompt();
|
|
577
|
-
} else {
|
|
578
|
-
// Standard mode: DOM-based selectors
|
|
579
|
-
systemPrompt = OrchestratorPrompts.buildSystemPrompt(toolDescriptions, this.config.enableCoordinateMode);
|
|
580
|
-
}
|
|
581
|
-
|
|
582
|
-
const userPrompt = OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures, this.config.enableCoordinateMode);
|
|
583
|
-
|
|
584
|
-
// Log prompt lengths for monitoring
|
|
585
|
-
const systemLength = systemPrompt.length;
|
|
586
|
-
const userLength = userPrompt.length;
|
|
587
|
-
const totalLength = systemLength + userLength;
|
|
588
|
-
const estimatedTokens = Math.ceil(totalLength / 4); // Rough estimate: 4 chars per token
|
|
589
|
-
|
|
590
|
-
this.logger?.(`[Orchestrator] 📊 Prompt lengths: system=${systemLength} chars, user=${userLength} chars, total=${totalLength} chars (~${estimatedTokens} tokens)`, 'log');
|
|
591
|
-
|
|
592
|
-
try {
|
|
593
|
-
// Call LLM directly via provider
|
|
594
|
-
const llmRequest: any = {
|
|
595
|
-
model: DEFAULT_MODEL,
|
|
596
|
-
systemPrompt,
|
|
597
|
-
userPrompt
|
|
598
|
-
};
|
|
599
|
-
|
|
600
|
-
// Include current SoM screenshot as image
|
|
601
|
-
if (context.somScreenshot) {
|
|
602
|
-
llmRequest.imageUrl = context.somScreenshot;
|
|
603
|
-
this.logger?.(`[Orchestrator] Including SoM screenshot in LLM request`, 'log');
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
|
|
607
|
-
|
|
608
|
-
// Report token usage
|
|
609
|
-
if (response.usage && this.progressReporter?.onTokensUsed) {
|
|
610
|
-
const tokenUsage: TokenUsage = {
|
|
611
|
-
jobId,
|
|
612
|
-
stepNumber,
|
|
613
|
-
iteration,
|
|
614
|
-
inputTokens: response.usage.inputTokens,
|
|
615
|
-
outputTokens: response.usage.outputTokens,
|
|
616
|
-
includesImage: false,
|
|
617
|
-
model: DEFAULT_MODEL,
|
|
618
|
-
timestamp: Date.now()
|
|
619
|
-
};
|
|
620
|
-
this.logger?.(`[Orchestrator] 💰 Reporting token usage: ${tokenUsage.inputTokens} + ${tokenUsage.outputTokens}`, 'log');
|
|
621
|
-
await this.progressReporter.onTokensUsed(tokenUsage);
|
|
622
|
-
} else if (!response.usage) {
|
|
623
|
-
this.logger?.(`[Orchestrator] ⚠ No usage data in LLM response`, 'warn');
|
|
624
|
-
}
|
|
625
|
-
|
|
626
|
-
// Parse response
|
|
627
|
-
return this.decisionParser.parse(response.answer);
|
|
628
|
-
|
|
629
|
-
} catch (error: any) {
|
|
630
|
-
this.logger?.(`[Orchestrator] ✗ Agent call failed: ${error.message}`, 'error');
|
|
631
|
-
|
|
632
|
-
// Return fallback decision
|
|
633
|
-
return {
|
|
634
|
-
status: 'stuck',
|
|
635
|
-
statusReasoning: `Agent call failed: ${error.message}`,
|
|
636
|
-
reasoning: 'LLM call failed'
|
|
637
|
-
};
|
|
638
|
-
}
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
/**
|
|
642
|
-
* Execute tools
|
|
643
|
-
*/
|
|
644
|
-
private async executeTools(
|
|
645
|
-
toolCalls: any[],
|
|
646
|
-
page: any,
|
|
647
|
-
memory: JourneyMemory,
|
|
648
|
-
stepNumber: number,
|
|
649
|
-
refMap?: Map<string, any>
|
|
650
|
-
): Promise<Record<string, any>> {
|
|
651
|
-
this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
|
|
652
|
-
|
|
653
|
-
const results: Record<string, any> = {};
|
|
654
|
-
const toolContext: ToolExecutionContext & { refMap?: Map<string, any>; previousSomScreenshot?: string; somHandler?: any } = {
|
|
655
|
-
page,
|
|
656
|
-
memory,
|
|
657
|
-
stepNumber,
|
|
658
|
-
logger: this.logger,
|
|
659
|
-
refMap, // Pass refMap for interact_with_ref tool
|
|
660
|
-
previousSomScreenshot: this.previousSomScreenshot, // For view_previous_screenshot tool
|
|
661
|
-
somHandler: this.somHandler // For refresh_som_markers tool
|
|
662
|
-
};
|
|
663
|
-
|
|
664
|
-
for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
|
|
665
|
-
this.logger?.(`[Orchestrator] ▶ ${toolCall.name}(${JSON.stringify(toolCall.params).substring(0, 50)}...)`);
|
|
666
|
-
|
|
667
|
-
const result = await this.toolRegistry.execute(toolCall, toolContext);
|
|
668
|
-
results[toolCall.name] = result;
|
|
669
|
-
|
|
670
|
-
if (result.success) {
|
|
671
|
-
this.logger?.(`[Orchestrator] ✓ ${toolCall.name} succeeded`);
|
|
672
|
-
} else {
|
|
673
|
-
this.logger?.(`[Orchestrator] ✗ ${toolCall.name} failed: ${result.error}`, 'error');
|
|
674
|
-
}
|
|
675
|
-
}
|
|
676
|
-
|
|
677
|
-
return results;
|
|
678
|
-
}
|
|
679
|
-
|
|
680
|
-
/**
|
|
681
|
-
* Parse SomCommand from command object
|
|
682
|
-
*/
|
|
683
|
-
private parseSomCommand(cmd: any): SomCommand | null {
|
|
684
|
-
if (typeof cmd === 'object' && cmd.action) {
|
|
685
|
-
// Valid if: has elementRef, OR has coord, OR is navigation action
|
|
686
|
-
const isNavigationAction = ['navigate', 'goBack', 'goForward', 'reload'].includes(cmd.action);
|
|
687
|
-
const hasValidTarget = cmd.elementRef || cmd.coord || isNavigationAction;
|
|
688
|
-
|
|
689
|
-
if (hasValidTarget) {
|
|
690
|
-
return {
|
|
691
|
-
elementRef: cmd.elementRef,
|
|
692
|
-
coord: cmd.coord,
|
|
693
|
-
action: cmd.action,
|
|
694
|
-
value: cmd.value,
|
|
695
|
-
fromCoord: cmd.fromCoord,
|
|
696
|
-
toCoord: cmd.toCoord,
|
|
697
|
-
force: cmd.force,
|
|
698
|
-
scrollAmount: cmd.scrollAmount,
|
|
699
|
-
scrollDirection: cmd.scrollDirection,
|
|
700
|
-
button: cmd.button,
|
|
701
|
-
clickCount: cmd.clickCount,
|
|
702
|
-
modifiers: cmd.modifiers,
|
|
703
|
-
delay: cmd.delay,
|
|
704
|
-
timeout: cmd.timeout
|
|
705
|
-
};
|
|
706
|
-
}
|
|
707
|
-
}
|
|
708
|
-
return null;
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
/**
|
|
712
|
-
* Execute commands (mix of ref and playwright commands)
|
|
713
|
-
*/
|
|
714
|
-
private async executeCommands(
|
|
715
|
-
commands: string[] | any[],
|
|
716
|
-
page: any,
|
|
717
|
-
memory: JourneyMemory,
|
|
718
|
-
stepNumber: number,
|
|
719
|
-
iteration: number,
|
|
720
|
-
jobId: string
|
|
721
|
-
): Promise<{ executed: string[]; allSucceeded: boolean }> {
|
|
722
|
-
this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)`);
|
|
723
|
-
|
|
724
|
-
const executed: string[] = [];
|
|
725
|
-
|
|
726
|
-
if (commands.length === 0) {
|
|
727
|
-
return { executed: [], allSucceeded: true };
|
|
728
|
-
}
|
|
729
|
-
|
|
730
|
-
// SoM mode: Execute commands through PageSoMHandler
|
|
731
|
-
if (this.config.useSoM && this.somHandler) {
|
|
732
|
-
this.logger?.(`[Orchestrator] Using SoM mode for command execution`, 'log');
|
|
733
|
-
|
|
734
|
-
for (let i = 0; i < commands.length; i++) {
|
|
735
|
-
const cmd = commands[i];
|
|
736
|
-
|
|
737
|
-
// Check if verification or action command
|
|
738
|
-
if (isSomVerification(cmd)) {
|
|
739
|
-
// Handle verification command
|
|
740
|
-
try {
|
|
741
|
-
const result = await this.somHandler.executeVerification(cmd);
|
|
742
|
-
|
|
743
|
-
// Always add command to executed array (even if verification failed)
|
|
744
|
-
// Scripts should contain the expect even if it fails during generation
|
|
745
|
-
if (result.playwrightCommand) {
|
|
746
|
-
executed.push(result.playwrightCommand);
|
|
747
|
-
}
|
|
748
|
-
|
|
749
|
-
if (result.success) {
|
|
750
|
-
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Verification passed`, 'log');
|
|
751
|
-
|
|
752
|
-
memory.history.push({
|
|
753
|
-
stepNumber,
|
|
754
|
-
iteration,
|
|
755
|
-
action: `Verification ${i + 1}/${commands.length}: ${cmd.verificationType}`,
|
|
756
|
-
code: result.playwrightCommand,
|
|
757
|
-
result: 'success',
|
|
758
|
-
observation: `Verified: ${cmd.description || cmd.expected}`,
|
|
759
|
-
url: page.url(),
|
|
760
|
-
timestamp: Date.now()
|
|
761
|
-
});
|
|
762
|
-
} else {
|
|
763
|
-
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification failed (non-fatal): ${result.error}`, 'warn');
|
|
764
|
-
|
|
765
|
-
memory.history.push({
|
|
766
|
-
stepNumber,
|
|
767
|
-
iteration,
|
|
768
|
-
action: `Verification ${i + 1}/${commands.length} - FAILED`,
|
|
769
|
-
code: result.playwrightCommand || JSON.stringify(cmd),
|
|
770
|
-
result: 'failure',
|
|
771
|
-
observation: `Failed: ${result.error}`,
|
|
772
|
-
error: result.error,
|
|
773
|
-
url: page.url(),
|
|
774
|
-
timestamp: Date.now()
|
|
775
|
-
});
|
|
776
|
-
|
|
777
|
-
// Continue anyway - verification failures are non-blocking for script generation
|
|
778
|
-
}
|
|
779
|
-
|
|
780
|
-
// Small delay between commands
|
|
781
|
-
if (i < commands.length - 1) {
|
|
782
|
-
await page.waitForTimeout(300);
|
|
783
|
-
}
|
|
784
|
-
|
|
785
|
-
} catch (error: any) {
|
|
786
|
-
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification exception: ${error.message}`, 'error');
|
|
787
|
-
}
|
|
788
|
-
|
|
789
|
-
} else if (isSomCommand(cmd)) {
|
|
790
|
-
// Handle action command (existing logic)
|
|
791
|
-
const somCommand = cmd as SomCommand;
|
|
792
|
-
|
|
793
|
-
try {
|
|
794
|
-
const result = await this.somHandler.runCommand(
|
|
795
|
-
somCommand,
|
|
796
|
-
this.config.somUseSomIdBasedCommands || false
|
|
797
|
-
);
|
|
798
|
-
|
|
799
|
-
if (result.status === CommandRunStatus.SUCCESS && result.successAttempt) {
|
|
800
|
-
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] SoM action succeeded`, 'log');
|
|
801
|
-
executed.push(result.successAttempt.command!);
|
|
802
|
-
|
|
803
|
-
memory.history.push({
|
|
804
|
-
stepNumber,
|
|
805
|
-
iteration,
|
|
806
|
-
action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action}`,
|
|
807
|
-
code: result.successAttempt.command!,
|
|
808
|
-
result: 'success',
|
|
809
|
-
observation: 'Executed successfully',
|
|
810
|
-
url: page.url(),
|
|
811
|
-
timestamp: Date.now()
|
|
812
|
-
});
|
|
813
|
-
|
|
814
|
-
// Small delay for form validation/animations
|
|
815
|
-
if (i < commands.length - 1) {
|
|
816
|
-
await page.waitForTimeout(300);
|
|
817
|
-
}
|
|
818
|
-
} else {
|
|
819
|
-
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action failed: ${result.error}`, 'error');
|
|
820
|
-
|
|
821
|
-
memory.history.push({
|
|
822
|
-
stepNumber,
|
|
823
|
-
iteration,
|
|
824
|
-
action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action} - FAILED`,
|
|
825
|
-
code: JSON.stringify(somCommand),
|
|
826
|
-
result: 'failure',
|
|
827
|
-
observation: `Failed: ${result.error}`,
|
|
828
|
-
error: result.error,
|
|
829
|
-
url: page.url(),
|
|
830
|
-
timestamp: Date.now()
|
|
831
|
-
});
|
|
832
|
-
|
|
833
|
-
// Refresh SoM after batch (DOM may have changed)
|
|
834
|
-
if (this.somHandler && page) {
|
|
835
|
-
this.somHandler.setPage(page);
|
|
836
|
-
await this.somHandler.updateSom();
|
|
837
|
-
}
|
|
838
|
-
|
|
839
|
-
return { executed, allSucceeded: false };
|
|
840
|
-
}
|
|
841
|
-
} catch (error: any) {
|
|
842
|
-
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action exception: ${error.message}`, 'error');
|
|
843
|
-
|
|
844
|
-
memory.history.push({
|
|
845
|
-
stepNumber,
|
|
846
|
-
iteration,
|
|
847
|
-
action: `SoM Action ${i + 1}/${commands.length} - EXCEPTION`,
|
|
848
|
-
code: JSON.stringify(somCommand),
|
|
849
|
-
result: 'failure',
|
|
850
|
-
observation: `Exception: ${error.message}`,
|
|
851
|
-
error: error.message,
|
|
852
|
-
url: page.url(),
|
|
853
|
-
timestamp: Date.now()
|
|
854
|
-
});
|
|
855
|
-
|
|
856
|
-
// Refresh SoM after batch (DOM may have changed)
|
|
857
|
-
if (this.somHandler && page) {
|
|
858
|
-
this.somHandler.setPage(page);
|
|
859
|
-
await this.somHandler.updateSom();
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
return { executed, allSucceeded: false };
|
|
863
|
-
}
|
|
864
|
-
} else {
|
|
865
|
-
this.logger?.(`[Orchestrator] ⚠ [${i + 1}/${commands.length}] Not a valid SoM command/verification, skipping`, 'warn');
|
|
866
|
-
}
|
|
867
|
-
}
|
|
868
|
-
|
|
869
|
-
// Always wait for page to stabilize after command batch
|
|
870
|
-
// This handles both explicit navigation AND clicks that trigger navigation/SPA routes
|
|
871
|
-
try {
|
|
872
|
-
this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
|
|
873
|
-
// Use networkidle with short timeout to catch navigation without blocking on SPAs with continuous requests
|
|
874
|
-
await page.waitForLoadState('networkidle', { timeout: 3000 });
|
|
875
|
-
this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
|
|
876
|
-
} catch (error: any) {
|
|
877
|
-
// If networkidle times out, fall back to domcontentloaded
|
|
878
|
-
try {
|
|
879
|
-
await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
|
|
880
|
-
this.logger?.(`[Orchestrator] Page loaded (domcontentloaded)`, 'log');
|
|
881
|
-
} catch (error2: any) {
|
|
882
|
-
this.logger?.(`[Orchestrator] Page load wait timeout (continuing anyway)`, 'warn');
|
|
883
|
-
}
|
|
884
|
-
}
|
|
885
|
-
|
|
886
|
-
// Refresh SoM after batch (DOM may have changed and page is now stable)
|
|
887
|
-
if (this.somHandler && page) {
|
|
888
|
-
this.somHandler.setPage(page);
|
|
889
|
-
await this.somHandler.updateSom();
|
|
890
|
-
}
|
|
891
|
-
|
|
892
|
-
return { executed, allSucceeded: true };
|
|
893
|
-
}
|
|
894
|
-
|
|
895
|
-
// Standard mode: Execute all commands in sequence with small delay between them
|
|
896
|
-
// Delay helps with form validation, button enabling, and animations
|
|
897
|
-
const wrappedCode = (commands as string[]).map((cmd, i) => `
|
|
898
|
-
// Command ${i + 1}/${commands.length}
|
|
899
|
-
try {
|
|
900
|
-
${cmd}
|
|
901
|
-
__results.push({ index: ${i}, success: true });
|
|
902
|
-
${i < commands.length - 1 ? 'await page.waitForTimeout(300);' : ''} // Small delay for form validation/animations
|
|
903
|
-
} catch (error) {
|
|
904
|
-
__results.push({ index: ${i}, success: false, error: error.message });
|
|
905
|
-
throw error;
|
|
906
|
-
}`).join('\n');
|
|
907
|
-
|
|
908
|
-
const fullCode = `const __results = []; ${wrappedCode} return __results;`;
|
|
909
|
-
|
|
910
|
-
try {
|
|
911
|
-
const func = new Function('page', 'expect', 'return (async () => { ' + fullCode + ' })()');
|
|
912
|
-
const results = await func(page, (global as any).expect);
|
|
913
|
-
|
|
914
|
-
for (let i = 0; i < commands.length; i++) {
|
|
915
|
-
const cmd = commands[i];
|
|
916
|
-
const result = results[i];
|
|
917
|
-
|
|
918
|
-
if (result && result.success) {
|
|
919
|
-
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Success`);
|
|
920
|
-
memory.history.push({
|
|
921
|
-
stepNumber,
|
|
922
|
-
iteration,
|
|
923
|
-
action: `Command ${i + 1}/${commands.length}`,
|
|
924
|
-
code: cmd,
|
|
925
|
-
result: 'success',
|
|
926
|
-
observation: 'Executed successfully',
|
|
927
|
-
url: page.url(),
|
|
928
|
-
timestamp: Date.now()
|
|
929
|
-
});
|
|
930
|
-
executed.push(cmd);
|
|
931
|
-
}
|
|
932
|
-
}
|
|
933
|
-
|
|
934
|
-
if (memory.history.length > this.config.maxHistorySize) {
|
|
935
|
-
memory.history = memory.history.slice(-this.config.maxHistorySize);
|
|
936
|
-
}
|
|
937
|
-
|
|
938
|
-
return { executed, allSucceeded: true };
|
|
939
|
-
|
|
940
|
-
} catch (error: any) {
|
|
941
|
-
const errorMessage = error.message || String(error);
|
|
942
|
-
this.logger?.(`[Orchestrator] ❌ Command execution failed: ${errorMessage}`, 'error');
|
|
943
|
-
|
|
944
|
-
memory.history.push({
|
|
945
|
-
stepNumber,
|
|
946
|
-
iteration,
|
|
947
|
-
action: `Command - FAILED`,
|
|
948
|
-
code: commands[executed.length] || '',
|
|
949
|
-
result: 'failure',
|
|
950
|
-
observation: `Failed: ${errorMessage}`,
|
|
951
|
-
error: errorMessage,
|
|
952
|
-
url: page.url(),
|
|
953
|
-
timestamp: Date.now()
|
|
954
|
-
});
|
|
955
|
-
|
|
956
|
-
return { executed, allSucceeded: false };
|
|
957
|
-
}
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
/**
|
|
961
|
-
* Report step progress
|
|
962
|
-
*/
|
|
963
|
-
private async reportStepProgress(
|
|
964
|
-
jobId: string,
|
|
965
|
-
stepNumber: number,
|
|
966
|
-
description: string,
|
|
967
|
-
decision: AgentDecision,
|
|
968
|
-
iteration: number
|
|
969
|
-
): Promise<void> {
|
|
970
|
-
if (!this.progressReporter?.onStepProgress) return;
|
|
971
|
-
|
|
972
|
-
await this.progressReporter.onStepProgress({
|
|
973
|
-
jobId,
|
|
974
|
-
stepNumber,
|
|
975
|
-
description,
|
|
976
|
-
status: decision.status === 'complete' ? StepExecutionStatus.SUCCESS :
|
|
977
|
-
decision.status === 'stuck' || decision.status === 'infeasible' ? StepExecutionStatus.FAILURE :
|
|
978
|
-
StepExecutionStatus.IN_PROGRESS,
|
|
979
|
-
code: decision.commands?.join('\n'),
|
|
980
|
-
// Include agent metadata for transparency
|
|
981
|
-
agentIteration: iteration,
|
|
982
|
-
agentReasoning: decision.reasoning,
|
|
983
|
-
agentSelfReflection: decision.selfReflection,
|
|
984
|
-
agentExperiences: decision.experiences,
|
|
985
|
-
agentToolsUsed: decision.toolCalls?.map(t => t.name),
|
|
986
|
-
agentStatus: decision.status
|
|
987
|
-
});
|
|
988
|
-
}
|
|
989
|
-
|
|
990
|
-
/**
|
|
991
|
-
* Execute exploration mode - agent autonomously explores to achieve journey goal
|
|
992
|
-
* Fires onStepProgress callbacks for each autonomous action (transparent to caller)
|
|
993
|
-
*/
|
|
994
|
-
async executeExploration(
|
|
995
|
-
page: any,
|
|
996
|
-
explorationConfig: ExplorationMode,
|
|
997
|
-
jobId: string
|
|
998
|
-
): Promise<OrchestratorStepResult> {
|
|
999
|
-
this.logger?.(`\n[Orchestrator] ========== EXPLORATION MODE ==========`);
|
|
1000
|
-
this.logger?.(`[Orchestrator] 🎯 Journey Goal: ${explorationConfig.explorationPrompt}`);
|
|
1001
|
-
if (explorationConfig.testDataPrompt) {
|
|
1002
|
-
this.logger?.(`[Orchestrator] 📋 Test Data: ${explorationConfig.testDataPrompt}`);
|
|
1003
|
-
}
|
|
1004
|
-
|
|
1005
|
-
const memory: JourneyMemory = {
|
|
1006
|
-
history: [],
|
|
1007
|
-
experiences: [],
|
|
1008
|
-
extractedData: {}
|
|
1009
|
-
};
|
|
1010
|
-
|
|
1011
|
-
const maxSteps = explorationConfig.maxExplorationSteps || 50;
|
|
1012
|
-
let stepNumber = 0;
|
|
1013
|
-
const commandsExecuted: string[] = [];
|
|
1014
|
-
|
|
1015
|
-
while (stepNumber < maxSteps) {
|
|
1016
|
-
stepNumber++;
|
|
1017
|
-
|
|
1018
|
-
this.logger?.(`\n[Orchestrator] === Exploration Step ${stepNumber}/${maxSteps} ===`);
|
|
1019
|
-
|
|
1020
|
-
// Build exploratory context
|
|
1021
|
-
const context = await this.buildExploratoryContext(
|
|
1022
|
-
page,
|
|
1023
|
-
explorationConfig.explorationPrompt,
|
|
1024
|
-
explorationConfig.testDataPrompt,
|
|
1025
|
-
memory,
|
|
1026
|
-
stepNumber,
|
|
1027
|
-
maxSteps
|
|
1028
|
-
);
|
|
1029
|
-
|
|
1030
|
-
// Call agent with exploratory prompt
|
|
1031
|
-
const decision = await this.callExploratoryAgent(
|
|
1032
|
-
context,
|
|
1033
|
-
jobId,
|
|
1034
|
-
stepNumber
|
|
1035
|
-
);
|
|
1036
|
-
|
|
1037
|
-
this.decisionParser.log(decision, stepNumber);
|
|
1038
|
-
|
|
1039
|
-
// Report step start (fires JourneyRunner's beforeStepStart callback)
|
|
1040
|
-
if (this.progressReporter?.onStepProgress) {
|
|
1041
|
-
const stepInfo = {
|
|
1042
|
-
jobId,
|
|
1043
|
-
stepNumber,
|
|
1044
|
-
stepId: `exploration-${stepNumber}-${Date.now()}`,
|
|
1045
|
-
description: decision.reasoning,
|
|
1046
|
-
code: '', // Will be filled after commands execute
|
|
1047
|
-
status: StepExecutionStatus.IN_PROGRESS,
|
|
1048
|
-
wasRepaired: false
|
|
1049
|
-
};
|
|
1050
|
-
await this.progressReporter.onStepProgress(stepInfo);
|
|
1051
|
-
}
|
|
1052
|
-
|
|
1053
|
-
// Execute tools if requested
|
|
1054
|
-
if (decision.toolCalls && decision.toolCalls.length > 0) {
|
|
1055
|
-
const toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
|
|
1056
|
-
|
|
1057
|
-
// If needs tool results, call agent again
|
|
1058
|
-
if (decision.needsToolResults) {
|
|
1059
|
-
const updatedContext = { ...context, toolResults };
|
|
1060
|
-
const continuedDecision = await this.callExploratoryAgent(updatedContext, jobId, stepNumber);
|
|
1061
|
-
|
|
1062
|
-
decision.commands = continuedDecision.commands || decision.commands;
|
|
1063
|
-
decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
|
|
1064
|
-
decision.status = continuedDecision.status;
|
|
1065
|
-
}
|
|
1066
|
-
}
|
|
1067
|
-
|
|
1068
|
-
// Handle blocker clearing
|
|
1069
|
-
if (decision.blockerDetected && decision.blockerDetected.clearingCommands) {
|
|
1070
|
-
this.logger?.(`[Orchestrator] 🚧 Clearing blocker: ${decision.blockerDetected.description}`);
|
|
1071
|
-
const blockerResult = await this.executeCommands(
|
|
1072
|
-
decision.blockerDetected.clearingCommands,
|
|
1073
|
-
page,
|
|
1074
|
-
memory,
|
|
1075
|
-
stepNumber,
|
|
1076
|
-
1,
|
|
1077
|
-
jobId
|
|
1078
|
-
);
|
|
1079
|
-
commandsExecuted.push(...blockerResult.executed);
|
|
1080
|
-
}
|
|
1081
|
-
|
|
1082
|
-
// Execute exploration commands
|
|
1083
|
-
let commandsSucceeded = true;
|
|
1084
|
-
if (decision.commands && decision.commands.length > 0) {
|
|
1085
|
-
const executeResult = await this.executeCommands(
|
|
1086
|
-
decision.commands,
|
|
1087
|
-
page,
|
|
1088
|
-
memory,
|
|
1089
|
-
stepNumber,
|
|
1090
|
-
1,
|
|
1091
|
-
jobId
|
|
1092
|
-
);
|
|
1093
|
-
commandsExecuted.push(...executeResult.executed);
|
|
1094
|
-
commandsSucceeded = executeResult.allSucceeded;
|
|
1095
|
-
}
|
|
1096
|
-
|
|
1097
|
-
// Report step completion (fires JourneyRunner's onStepComplete callback)
|
|
1098
|
-
if (this.progressReporter?.onStepProgress) {
|
|
1099
|
-
const stepInfo = {
|
|
1100
|
-
jobId,
|
|
1101
|
-
stepNumber,
|
|
1102
|
-
stepId: `exploration-${stepNumber}-${Date.now()}`,
|
|
1103
|
-
description: decision.reasoning,
|
|
1104
|
-
code: decision.commands?.join('\n') || '',
|
|
1105
|
-
status: commandsSucceeded ? StepExecutionStatus.SUCCESS : StepExecutionStatus.FAILURE,
|
|
1106
|
-
error: commandsSucceeded ? undefined : 'Command execution failed',
|
|
1107
|
-
wasRepaired: false
|
|
1108
|
-
};
|
|
1109
|
-
await this.progressReporter.onStepProgress(stepInfo);
|
|
1110
|
-
}
|
|
1111
|
-
|
|
1112
|
-
// Add experiences (both app patterns AND exploration progress)
|
|
1113
|
-
if (decision.experiences) {
|
|
1114
|
-
memory.experiences.push(...decision.experiences);
|
|
1115
|
-
if (memory.experiences.length > this.config.maxExperiences) {
|
|
1116
|
-
memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
|
|
1117
|
-
}
|
|
1118
|
-
}
|
|
1119
|
-
|
|
1120
|
-
// Store note for next iteration
|
|
1121
|
-
if (decision.noteToFutureSelf) {
|
|
1122
|
-
memory.latestNote = {
|
|
1123
|
-
fromIteration: stepNumber,
|
|
1124
|
-
content: decision.noteToFutureSelf
|
|
1125
|
-
};
|
|
1126
|
-
}
|
|
1127
|
-
|
|
1128
|
-
// Check termination
|
|
1129
|
-
if (decision.status === 'complete') {
|
|
1130
|
-
this.logger?.(`[Orchestrator] ✅ Journey exploration complete: ${decision.statusReasoning}`);
|
|
1131
|
-
return {
|
|
1132
|
-
success: true,
|
|
1133
|
-
commands: commandsExecuted,
|
|
1134
|
-
iterations: stepNumber,
|
|
1135
|
-
terminationReason: 'complete',
|
|
1136
|
-
memory
|
|
1137
|
-
};
|
|
1138
|
-
} else if (decision.status === 'stuck') {
|
|
1139
|
-
this.logger?.(`[Orchestrator] ❌ Exploration stuck: ${decision.statusReasoning}`);
|
|
1140
|
-
return {
|
|
1141
|
-
success: false,
|
|
1142
|
-
commands: commandsExecuted,
|
|
1143
|
-
iterations: stepNumber,
|
|
1144
|
-
terminationReason: 'agent_stuck',
|
|
1145
|
-
memory,
|
|
1146
|
-
error: decision.statusReasoning
|
|
1147
|
-
};
|
|
1148
|
-
}
|
|
1149
|
-
}
|
|
1150
|
-
|
|
1151
|
-
// Hit max steps - not necessarily a failure
|
|
1152
|
-
this.logger?.(`[Orchestrator] ⚠ Maximum exploration steps reached (budget limit)`);
|
|
1153
|
-
return {
|
|
1154
|
-
success: true, // Not a failure - just budget limit
|
|
1155
|
-
commands: commandsExecuted,
|
|
1156
|
-
iterations: stepNumber,
|
|
1157
|
-
terminationReason: 'system_limit',
|
|
1158
|
-
memory
|
|
1159
|
-
};
|
|
1160
|
-
}
|
|
1161
|
-
|
|
1162
|
-
private async buildExploratoryContext(
|
|
1163
|
-
page: any,
|
|
1164
|
-
explorationPrompt: string,
|
|
1165
|
-
testDataPrompt: string | undefined,
|
|
1166
|
-
memory: JourneyMemory,
|
|
1167
|
-
stepNumber: number,
|
|
1168
|
-
maxSteps: number
|
|
1169
|
-
): Promise<AgentContext> {
|
|
1170
|
-
// Wait for page to be ready and elements to appear (especially important after navigation)
|
|
1171
|
-
const currentPageInfo = await PageInfoRetry.getWithRetry(page);
|
|
1172
|
-
const currentURL = page.url();
|
|
1173
|
-
const recentSteps = memory.history.slice(-this.config.recentStepsCount);
|
|
1174
|
-
|
|
1175
|
-
// SoM integration for exploratory mode
|
|
1176
|
-
let somScreenshot: string | undefined = undefined;
|
|
1177
|
-
let somElementMap: string | undefined = undefined;
|
|
1178
|
-
if (this.config.useSoM && this.somHandler) {
|
|
1179
|
-
try {
|
|
1180
|
-
this.somHandler.setPage(page);
|
|
1181
|
-
|
|
1182
|
-
// Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
|
|
1183
|
-
try {
|
|
1184
|
-
await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
|
|
1185
|
-
} catch (error: any) {
|
|
1186
|
-
// Page already loaded or timeout - continue
|
|
1187
|
-
}
|
|
1188
|
-
|
|
1189
|
-
// Update SoM markers
|
|
1190
|
-
await this.somHandler.updateSom();
|
|
1191
|
-
somScreenshot = await this.somHandler.getScreenshot(true, false, 60); // Viewport only - agent can scroll or request full page
|
|
1192
|
-
|
|
1193
|
-
// Get element map for disambiguation
|
|
1194
|
-
somElementMap = this.somHandler.getSomElementMap();
|
|
1195
|
-
|
|
1196
|
-
this.logger?.(`[Orchestrator] SoM screenshot captured for exploratory agent`, 'log');
|
|
1197
|
-
} catch (error: any) {
|
|
1198
|
-
this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
|
|
1199
|
-
}
|
|
1200
|
-
}
|
|
1201
|
-
|
|
1202
|
-
const context = {
|
|
1203
|
-
overallGoal: explorationPrompt,
|
|
1204
|
-
currentStepGoal: explorationPrompt, // Same as overall for single journey
|
|
1205
|
-
stepNumber,
|
|
1206
|
-
totalSteps: maxSteps,
|
|
1207
|
-
completedSteps: [],
|
|
1208
|
-
remainingSteps: [],
|
|
1209
|
-
currentPageInfo,
|
|
1210
|
-
currentURL,
|
|
1211
|
-
recentSteps,
|
|
1212
|
-
experiences: memory.experiences,
|
|
1213
|
-
extractedData: memory.extractedData,
|
|
1214
|
-
noteFromPreviousIteration: memory.latestNote,
|
|
1215
|
-
testDataPrompt, // CRITICAL: Store testDataPrompt in context
|
|
1216
|
-
somScreenshot, // SoM screenshot for exploratory mode (current)
|
|
1217
|
-
somElementMap // SoM element details for disambiguation
|
|
1218
|
-
};
|
|
1219
|
-
|
|
1220
|
-
// Save current screenshot as previous for next iteration (for tool access)
|
|
1221
|
-
if (somScreenshot) {
|
|
1222
|
-
this.previousSomScreenshot = somScreenshot;
|
|
1223
|
-
}
|
|
1224
|
-
|
|
1225
|
-
return context;
|
|
1226
|
-
}
|
|
1227
|
-
|
|
1228
|
-
private async callExploratoryAgent(
|
|
1229
|
-
context: AgentContext,
|
|
1230
|
-
jobId: string,
|
|
1231
|
-
stepNumber: number
|
|
1232
|
-
): Promise<AgentDecision> {
|
|
1233
|
-
const toolDescriptions = this.toolRegistry.generateToolDescriptions();
|
|
1234
|
-
|
|
1235
|
-
// Use SoM system prompt if in SoM mode, otherwise use standard exploratory prompt
|
|
1236
|
-
const systemPrompt = this.config.useSoM
|
|
1237
|
-
? OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates)
|
|
1238
|
-
: OrchestratorPrompts.buildExploratorySystemPrompt(toolDescriptions);
|
|
1239
|
-
|
|
1240
|
-
const userPrompt = OrchestratorPrompts.buildExploratoryUserPrompt(
|
|
1241
|
-
context,
|
|
1242
|
-
context.overallGoal,
|
|
1243
|
-
context.testDataPrompt, // Pass testDataPrompt from context
|
|
1244
|
-
stepNumber,
|
|
1245
|
-
context.totalSteps
|
|
1246
|
-
);
|
|
1247
|
-
|
|
1248
|
-
const llmRequest: any = {
|
|
1249
|
-
model: DEFAULT_MODEL,
|
|
1250
|
-
systemPrompt,
|
|
1251
|
-
userPrompt
|
|
1252
|
-
};
|
|
1253
|
-
|
|
1254
|
-
// Include current SoM screenshot as image
|
|
1255
|
-
if (context.somScreenshot) {
|
|
1256
|
-
llmRequest.imageUrl = context.somScreenshot;
|
|
1257
|
-
this.logger?.(`[Orchestrator] Including SoM screenshot in exploratory LLM request`, 'log');
|
|
1258
|
-
}
|
|
1259
|
-
|
|
1260
|
-
const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
|
|
1261
|
-
|
|
1262
|
-
// Report token usage
|
|
1263
|
-
if (response.usage && this.progressReporter?.onTokensUsed) {
|
|
1264
|
-
await this.progressReporter.onTokensUsed({
|
|
1265
|
-
jobId,
|
|
1266
|
-
stepNumber,
|
|
1267
|
-
iteration: 1,
|
|
1268
|
-
inputTokens: response.usage.inputTokens,
|
|
1269
|
-
outputTokens: response.usage.outputTokens,
|
|
1270
|
-
includesImage: false,
|
|
1271
|
-
model: DEFAULT_MODEL,
|
|
1272
|
-
timestamp: Date.now()
|
|
1273
|
-
});
|
|
1274
|
-
}
|
|
1275
|
-
|
|
1276
|
-
// Parse response (same JSON format as regular mode)
|
|
1277
|
-
const decision = this.decisionParser.parse(response.answer);
|
|
1278
|
-
return decision;
|
|
1279
|
-
}
|
|
1280
|
-
}
|
|
1281
|
-
|
|
1282
|
-
|