testchimp-runner-core 0.0.34 → 0.0.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-service.d.ts +1 -4
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +155 -468
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -1
- package/dist/index.js.map +1 -1
- package/dist/orchestrator/decision-parser.d.ts +18 -0
- package/dist/orchestrator/decision-parser.d.ts.map +1 -0
- package/dist/orchestrator/decision-parser.js +127 -0
- package/dist/orchestrator/decision-parser.js.map +1 -0
- package/dist/orchestrator/index.d.ts +4 -2
- package/dist/orchestrator/index.d.ts.map +1 -1
- package/dist/orchestrator/index.js +14 -2
- package/dist/orchestrator/index.js.map +1 -1
- package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +534 -204
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.js +529 -247
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
- package/dist/orchestrator/page-som-handler.d.ts +106 -0
- package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
- package/dist/orchestrator/page-som-handler.js +1353 -0
- package/dist/orchestrator/page-som-handler.js.map +1 -0
- package/dist/orchestrator/som-types.d.ts +149 -0
- package/dist/orchestrator/som-types.d.ts.map +1 -0
- package/dist/orchestrator/som-types.js +87 -0
- package/dist/orchestrator/som-types.js.map +1 -0
- package/dist/orchestrator/tool-registry.d.ts +2 -0
- package/dist/orchestrator/tool-registry.d.ts.map +1 -1
- package/dist/orchestrator/tool-registry.js.map +1 -1
- package/dist/orchestrator/tools/index.d.ts +4 -1
- package/dist/orchestrator/tools/index.d.ts.map +1 -1
- package/dist/orchestrator/tools/index.js +7 -2
- package/dist/orchestrator/tools/index.js.map +1 -1
- package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
- package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
- package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
- package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
- package/dist/orchestrator/types.d.ts +23 -1
- package/dist/orchestrator/types.d.ts.map +1 -1
- package/dist/orchestrator/types.js +11 -1
- package/dist/orchestrator/types.js.map +1 -1
- package/dist/scenario-service.d.ts +5 -0
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +17 -0
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +4 -0
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +18 -3
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/testing/agent-tester.d.ts +35 -0
- package/dist/testing/agent-tester.d.ts.map +1 -0
- package/dist/testing/agent-tester.js +84 -0
- package/dist/testing/agent-tester.js.map +1 -0
- package/dist/testing/ref-translator-tester.d.ts +44 -0
- package/dist/testing/ref-translator-tester.d.ts.map +1 -0
- package/dist/testing/ref-translator-tester.js +104 -0
- package/dist/testing/ref-translator-tester.js.map +1 -0
- package/dist/utils/hierarchical-selector.d.ts +47 -0
- package/dist/utils/hierarchical-selector.d.ts.map +1 -0
- package/dist/utils/hierarchical-selector.js +212 -0
- package/dist/utils/hierarchical-selector.js.map +1 -0
- package/dist/utils/page-info-retry.d.ts +14 -0
- package/dist/utils/page-info-retry.d.ts.map +1 -0
- package/dist/utils/page-info-retry.js +60 -0
- package/dist/utils/page-info-retry.js.map +1 -0
- package/dist/utils/page-info-utils.d.ts +1 -0
- package/dist/utils/page-info-utils.d.ts.map +1 -1
- package/dist/utils/page-info-utils.js +46 -18
- package/dist/utils/page-info-utils.js.map +1 -1
- package/dist/utils/ref-attacher.d.ts +21 -0
- package/dist/utils/ref-attacher.d.ts.map +1 -0
- package/dist/utils/ref-attacher.js +149 -0
- package/dist/utils/ref-attacher.js.map +1 -0
- package/dist/utils/ref-translator.d.ts +49 -0
- package/dist/utils/ref-translator.d.ts.map +1 -0
- package/dist/utils/ref-translator.js +276 -0
- package/dist/utils/ref-translator.js.map +1 -0
- package/package.json +6 -1
- package/RELEASE_0.0.26.md +0 -165
- package/RELEASE_0.0.27.md +0 -236
- package/RELEASE_0.0.28.md +0 -286
- package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
- package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
- package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
- package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
- package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
- package/plandocs/INTEGRATION_COMPLETE.md +0 -322
- package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
- package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
- package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
- package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
- package/plandocs/PHASE_1_COMPLETE.md +0 -165
- package/plandocs/PHASE_1_SUMMARY.md +0 -184
- package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
- package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
- package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
- package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
- package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
- package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
- package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
- package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
- package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
- package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
- package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
- package/src/auth-config.ts +0 -84
- package/src/credit-usage-service.ts +0 -188
- package/src/env-loader.ts +0 -103
- package/src/execution-service.ts +0 -1413
- package/src/file-handler.ts +0 -104
- package/src/index.ts +0 -422
- package/src/llm-facade.ts +0 -821
- package/src/llm-provider.ts +0 -53
- package/src/model-constants.ts +0 -35
- package/src/orchestrator/index.ts +0 -34
- package/src/orchestrator/orchestrator-agent.ts +0 -862
- package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
- package/src/orchestrator/orchestrator-prompts.ts +0 -474
- package/src/orchestrator/tool-registry.ts +0 -182
- package/src/orchestrator/tools/check-page-ready.ts +0 -75
- package/src/orchestrator/tools/extract-data.ts +0 -92
- package/src/orchestrator/tools/index.ts +0 -12
- package/src/orchestrator/tools/inspect-page.ts +0 -42
- package/src/orchestrator/tools/recall-history.ts +0 -72
- package/src/orchestrator/tools/take-screenshot.ts +0 -128
- package/src/orchestrator/tools/verify-action-result.ts +0 -159
- package/src/orchestrator/types.ts +0 -248
- package/src/playwright-mcp-service.ts +0 -224
- package/src/progress-reporter.ts +0 -144
- package/src/prompts.ts +0 -842
- package/src/providers/backend-proxy-llm-provider.ts +0 -91
- package/src/providers/local-llm-provider.ts +0 -38
- package/src/scenario-service.ts +0 -232
- package/src/scenario-worker-class.ts +0 -1089
- package/src/script-utils.ts +0 -203
- package/src/types.ts +0 -239
- package/src/utils/browser-utils.ts +0 -348
- package/src/utils/coordinate-converter.ts +0 -162
- package/src/utils/page-info-utils.ts +0 -250
- package/testchimp-runner-core-0.0.33.tgz +0 -0
- package/tsconfig.json +0 -19
|
@@ -5,13 +5,16 @@
|
|
|
5
5
|
*/
|
|
6
6
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
7
|
exports.OrchestratorAgent = void 0;
|
|
8
|
-
const test_1 = require("@playwright/test");
|
|
9
8
|
const progress_reporter_1 = require("../progress-reporter");
|
|
10
9
|
const page_info_utils_1 = require("../utils/page-info-utils");
|
|
11
10
|
const coordinate_converter_1 = require("../utils/coordinate-converter");
|
|
12
11
|
const model_constants_1 = require("../model-constants");
|
|
13
12
|
const types_1 = require("./types");
|
|
14
13
|
const orchestrator_prompts_1 = require("./orchestrator-prompts");
|
|
14
|
+
const page_info_retry_1 = require("../utils/page-info-retry");
|
|
15
|
+
const decision_parser_1 = require("./decision-parser");
|
|
16
|
+
const page_som_handler_1 = require("./page-som-handler");
|
|
17
|
+
const som_types_1 = require("./som-types");
|
|
15
18
|
/**
|
|
16
19
|
* Orchestrator Agent - manages step execution with tool use and memory
|
|
17
20
|
*/
|
|
@@ -24,6 +27,11 @@ class OrchestratorAgent {
|
|
|
24
27
|
this.progressReporter = progressReporter;
|
|
25
28
|
this.logger = logger;
|
|
26
29
|
this.debugMode = debugMode || false;
|
|
30
|
+
this.decisionParser = new decision_parser_1.DecisionParser(logger);
|
|
31
|
+
// Initialize SoM handler if enabled
|
|
32
|
+
if (this.config.useSoM) {
|
|
33
|
+
this.somHandler = new page_som_handler_1.PageSoMHandler(null, this.logger);
|
|
34
|
+
}
|
|
27
35
|
}
|
|
28
36
|
setDebugMode(enabled) {
|
|
29
37
|
this.debugMode = enabled;
|
|
@@ -31,11 +39,12 @@ class OrchestratorAgent {
|
|
|
31
39
|
/**
|
|
32
40
|
* Execute a single step of the scenario
|
|
33
41
|
*/
|
|
34
|
-
async executeStep(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, jobId)
|
|
42
|
+
async executeStep(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, jobId, priorSteps, // NEW: For repair mode (undefined for script gen)
|
|
43
|
+
nextSteps // NEW: For repair mode (undefined for script gen)
|
|
44
|
+
) {
|
|
35
45
|
this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
|
|
36
46
|
this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
|
|
37
47
|
let iteration = 0;
|
|
38
|
-
let previousReflection = undefined;
|
|
39
48
|
let noteToSelf = memory.latestNote; // Start with note from previous step
|
|
40
49
|
const commandsExecuted = [];
|
|
41
50
|
let consecutiveFailures = 0; // Track consecutive iterations with failed commands
|
|
@@ -44,24 +53,45 @@ class OrchestratorAgent {
|
|
|
44
53
|
iteration++;
|
|
45
54
|
this.logger?.(`\n[Orchestrator] === Iteration ${iteration}/${this.config.maxIterationsPerStep} ===`);
|
|
46
55
|
// Build context for agent
|
|
47
|
-
const context = await this.buildAgentContext(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory,
|
|
56
|
+
const context = await this.buildAgentContext(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, consecutiveFailures, noteToSelf, // Pass note from previous iteration
|
|
57
|
+
priorSteps, // NEW: Pass repair context
|
|
58
|
+
nextSteps // NEW: Pass repair context
|
|
48
59
|
);
|
|
49
60
|
// Call agent to make decision
|
|
50
61
|
const decision = await this.callAgent(context, jobId, stepNumber, iteration, consecutiveFailures);
|
|
51
62
|
// Log agent's reasoning
|
|
52
|
-
this.
|
|
63
|
+
this.decisionParser.log(decision, iteration);
|
|
53
64
|
// Report progress
|
|
54
65
|
await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
|
|
55
66
|
// Execute tools if requested (tools are READ-ONLY, they don't change state)
|
|
56
67
|
let toolResults = {};
|
|
57
|
-
// ANTI-LOOP: Detect
|
|
68
|
+
// ANTI-LOOP: Detect and BLOCK screenshot loops (PER STEP)
|
|
69
|
+
const screenshotsThisStep = memory.history.filter(s => s.stepNumber === stepNumber &&
|
|
70
|
+
(s.code.includes('take_screenshot') || s.action.includes('Screenshot')));
|
|
58
71
|
const recentScreenshots = memory.history.slice(-3).filter(s => s.code.includes('take_screenshot') || s.action.includes('Screenshot'));
|
|
59
|
-
if (
|
|
72
|
+
if (screenshotsThisStep.length >= 3) {
|
|
73
|
+
this.logger?.(`[Orchestrator] 🚨 SCREENSHOT LOOP - ${screenshotsThisStep.length} screenshots THIS STEP! BLOCKING further screenshots`, 'error');
|
|
74
|
+
}
|
|
75
|
+
else if (recentScreenshots.length >= 2 && iteration >= 3) {
|
|
60
76
|
this.logger?.(`[Orchestrator] ⚠️ WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
|
|
61
|
-
this.logger?.(`[Orchestrator] 💭 System: Stop gathering info, START ACTING with available selectors`);
|
|
62
77
|
}
|
|
63
78
|
if (decision.toolCalls && decision.toolCalls.length > 0) {
|
|
64
|
-
|
|
79
|
+
// ENFORCE: Block screenshot tool calls if too many taken IN THIS STEP
|
|
80
|
+
if (screenshotsThisStep.length >= 3) {
|
|
81
|
+
decision.toolCalls = decision.toolCalls.filter(tc => tc.name !== 'take_screenshot');
|
|
82
|
+
if (decision.toolCalls.length === 0) {
|
|
83
|
+
this.logger?.(`[Orchestrator] 🚫 REJECTED screenshot tool call - loop detected. Agent must ACT.`, 'warn');
|
|
84
|
+
toolResults = [{
|
|
85
|
+
toolName: 'take_screenshot',
|
|
86
|
+
success: false,
|
|
87
|
+
error: 'SYSTEM BLOCKED: Too many screenshots taken. You must use existing DOM snapshots and execute commands now. Analysis paralysis detected.',
|
|
88
|
+
data: null
|
|
89
|
+
}];
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (decision.toolCalls.length > 0) {
|
|
93
|
+
toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber, context.currentPageInfo.refMap);
|
|
94
|
+
}
|
|
65
95
|
// If agent wants to wait for tool results before proceeding, call agent again
|
|
66
96
|
if (decision.needsToolResults) {
|
|
67
97
|
const updatedContext = { ...context, toolResults };
|
|
@@ -80,7 +110,7 @@ class OrchestratorAgent {
|
|
|
80
110
|
if (decision.blockerDetected && decision.blockerDetected.clearingCommands && decision.blockerDetected.clearingCommands.length > 0) {
|
|
81
111
|
this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
|
|
82
112
|
this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
|
|
83
|
-
const blockerResult = await this.
|
|
113
|
+
const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, iteration, jobId);
|
|
84
114
|
// Add blocker commands with comment to output
|
|
85
115
|
if (blockerResult.executed.length > 0) {
|
|
86
116
|
commandsExecuted.push(`// Blocker: ${decision.blockerDetected.description}`);
|
|
@@ -94,8 +124,8 @@ class OrchestratorAgent {
|
|
|
94
124
|
}
|
|
95
125
|
}
|
|
96
126
|
// Execute main commands (only if no blocker failure)
|
|
97
|
-
if (decision.commands && decision.commands.length > 0
|
|
98
|
-
const executeResult = await this.
|
|
127
|
+
if (!iterationHadFailure && decision.commands && decision.commands.length > 0) {
|
|
128
|
+
const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, iteration, jobId);
|
|
99
129
|
commandsExecuted.push(...executeResult.executed);
|
|
100
130
|
// Track failures
|
|
101
131
|
if (!executeResult.allSucceeded) {
|
|
@@ -107,8 +137,8 @@ class OrchestratorAgent {
|
|
|
107
137
|
consecutiveFailures = 0; // Reset on success
|
|
108
138
|
}
|
|
109
139
|
}
|
|
110
|
-
// Handle coordinate-based actions (NEW - fallback when selectors fail)
|
|
111
|
-
if (decision.coordinateAction && !iterationHadFailure) {
|
|
140
|
+
// Handle coordinate-based actions (NEW - fallback when selectors fail) - ONLY if enabled
|
|
141
|
+
if (this.config.enableCoordinateMode && decision.coordinateAction && !iterationHadFailure) {
|
|
112
142
|
coordinateAttempts++;
|
|
113
143
|
this.logger?.(`[Orchestrator] 🎯 Coordinate Action (attempt ${coordinateAttempts}/2): ${decision.coordinateAction.action} at (${decision.coordinateAction.xPercent}%, ${decision.coordinateAction.yPercent}%)`);
|
|
114
144
|
try {
|
|
@@ -120,7 +150,7 @@ class OrchestratorAgent {
|
|
|
120
150
|
this.logger?.(`[Orchestrator] Generated commands:`);
|
|
121
151
|
coordCommands.forEach(cmd => this.logger?.(` ${cmd}`));
|
|
122
152
|
// Execute coordinate commands
|
|
123
|
-
const coordResult = await this.
|
|
153
|
+
const coordResult = await this.executeCommands(coordCommands, page, memory, stepNumber, iteration, jobId);
|
|
124
154
|
commandsExecuted.push(...coordResult.executed);
|
|
125
155
|
if (!coordResult.allSucceeded) {
|
|
126
156
|
this.logger?.(`[Orchestrator] ❌ Coordinate action failed (Playwright error)`);
|
|
@@ -263,9 +293,7 @@ class OrchestratorAgent {
|
|
|
263
293
|
memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
|
|
264
294
|
}
|
|
265
295
|
}
|
|
266
|
-
// Store self
|
|
267
|
-
previousReflection = decision.selfReflection;
|
|
268
|
-
// Store note to future self (NEW - tactical memory across iterations AND steps)
|
|
296
|
+
// Store note to future self (tactical memory across iterations AND steps)
|
|
269
297
|
if (decision.noteToFutureSelf) {
|
|
270
298
|
noteToSelf = {
|
|
271
299
|
fromIteration: iteration,
|
|
@@ -312,15 +340,46 @@ class OrchestratorAgent {
|
|
|
312
340
|
/**
|
|
313
341
|
* Build context for agent
|
|
314
342
|
*/
|
|
315
|
-
async buildAgentContext(page, currentStepGoal, stepNumber, totalSteps, scenarioSteps, memory,
|
|
343
|
+
async buildAgentContext(page, currentStepGoal, stepNumber, totalSteps, scenarioSteps, memory, consecutiveFailures, noteFromPreviousIteration, priorSteps, // NEW: For repair mode
|
|
344
|
+
nextSteps // NEW: For repair mode
|
|
316
345
|
) {
|
|
317
346
|
// Get fresh DOM
|
|
318
347
|
const currentPageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(page);
|
|
319
348
|
const currentURL = page.url();
|
|
320
349
|
// Get recent steps
|
|
321
350
|
const recentSteps = memory.history.slice(-this.config.recentStepsCount);
|
|
351
|
+
// SoM integration: Update markers and capture screenshot with visual IDs
|
|
352
|
+
let somScreenshot = undefined;
|
|
353
|
+
let somElementMap = undefined;
|
|
354
|
+
if (this.config.useSoM && this.somHandler) {
|
|
355
|
+
try {
|
|
356
|
+
if (!this.somHandler) {
|
|
357
|
+
this.somHandler = new page_som_handler_1.PageSoMHandler(page, this.logger);
|
|
358
|
+
}
|
|
359
|
+
else {
|
|
360
|
+
this.somHandler.setPage(page);
|
|
361
|
+
}
|
|
362
|
+
// Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
|
|
363
|
+
try {
|
|
364
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 5000 });
|
|
365
|
+
}
|
|
366
|
+
catch (error) {
|
|
367
|
+
// Page already loaded or timeout - continue
|
|
368
|
+
}
|
|
369
|
+
// Update SoM markers
|
|
370
|
+
await this.somHandler.updateSom();
|
|
371
|
+
// Get screenshot WITH markers (viewport only - agent can scroll or use take_screenshot for full page)
|
|
372
|
+
somScreenshot = await this.somHandler.getScreenshot(true, false, 60);
|
|
373
|
+
// Get element map for disambiguation
|
|
374
|
+
somElementMap = this.somHandler.getSomElementMap();
|
|
375
|
+
this.logger?.(`[Orchestrator] SoM screenshot captured for agent decision-making`, 'log');
|
|
376
|
+
}
|
|
377
|
+
catch (error) {
|
|
378
|
+
this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
|
|
379
|
+
}
|
|
380
|
+
}
|
|
322
381
|
// Build context
|
|
323
|
-
|
|
382
|
+
const context = {
|
|
324
383
|
overallGoal: scenarioSteps.join('\n'),
|
|
325
384
|
currentStepGoal,
|
|
326
385
|
stepNumber,
|
|
@@ -332,9 +391,17 @@ class OrchestratorAgent {
|
|
|
332
391
|
recentSteps,
|
|
333
392
|
experiences: memory.experiences,
|
|
334
393
|
extractedData: memory.extractedData,
|
|
335
|
-
|
|
336
|
-
|
|
394
|
+
noteFromPreviousIteration, // Pass tactical note from previous iteration
|
|
395
|
+
somScreenshot, // SoM screenshot with visual markers (current)
|
|
396
|
+
somElementMap, // SoM element details for disambiguation
|
|
397
|
+
priorSteps, // NEW: Repair context (undefined for script gen)
|
|
398
|
+
nextSteps // NEW: Repair context (undefined for script gen)
|
|
337
399
|
};
|
|
400
|
+
// Save current screenshot as previous for next iteration (for tool access)
|
|
401
|
+
if (somScreenshot) {
|
|
402
|
+
this.previousSomScreenshot = somScreenshot;
|
|
403
|
+
}
|
|
404
|
+
return context;
|
|
338
405
|
}
|
|
339
406
|
/**
|
|
340
407
|
* Call agent to make decision
|
|
@@ -346,10 +413,20 @@ class OrchestratorAgent {
|
|
|
346
413
|
const useCoordinateMode = consecutiveFailures !== undefined && consecutiveFailures >= 3;
|
|
347
414
|
// Build appropriate system prompt based on mode
|
|
348
415
|
const toolDescriptions = this.toolRegistry.generateToolDescriptions();
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
:
|
|
352
|
-
|
|
416
|
+
let systemPrompt;
|
|
417
|
+
if (this.config.useSoM) {
|
|
418
|
+
// SoM mode: Use visual element identification
|
|
419
|
+
systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates);
|
|
420
|
+
}
|
|
421
|
+
else if (useCoordinateMode) {
|
|
422
|
+
// Coordinate mode: Fallback when selectors fail
|
|
423
|
+
systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildCoordinateSystemPrompt();
|
|
424
|
+
}
|
|
425
|
+
else {
|
|
426
|
+
// Standard mode: DOM-based selectors
|
|
427
|
+
systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSystemPrompt(toolDescriptions, this.config.enableCoordinateMode);
|
|
428
|
+
}
|
|
429
|
+
const userPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures, this.config.enableCoordinateMode);
|
|
353
430
|
// Log prompt lengths for monitoring
|
|
354
431
|
const systemLength = systemPrompt.length;
|
|
355
432
|
const userLength = userPrompt.length;
|
|
@@ -363,6 +440,11 @@ class OrchestratorAgent {
|
|
|
363
440
|
systemPrompt,
|
|
364
441
|
userPrompt
|
|
365
442
|
};
|
|
443
|
+
// Include current SoM screenshot as image
|
|
444
|
+
if (context.somScreenshot) {
|
|
445
|
+
llmRequest.imageUrl = context.somScreenshot;
|
|
446
|
+
this.logger?.(`[Orchestrator] Including SoM screenshot in LLM request`, 'log');
|
|
447
|
+
}
|
|
366
448
|
const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
|
|
367
449
|
// Report token usage
|
|
368
450
|
if (response.usage && this.progressReporter?.onTokensUsed) {
|
|
@@ -383,7 +465,7 @@ class OrchestratorAgent {
|
|
|
383
465
|
this.logger?.(`[Orchestrator] ⚠ No usage data in LLM response`, 'warn');
|
|
384
466
|
}
|
|
385
467
|
// Parse response
|
|
386
|
-
return this.
|
|
468
|
+
return this.decisionParser.parse(response.answer);
|
|
387
469
|
}
|
|
388
470
|
catch (error) {
|
|
389
471
|
this.logger?.(`[Orchestrator] ✗ Agent call failed: ${error.message}`, 'error');
|
|
@@ -395,59 +477,20 @@ class OrchestratorAgent {
|
|
|
395
477
|
};
|
|
396
478
|
}
|
|
397
479
|
}
|
|
398
|
-
/**
|
|
399
|
-
* Parse agent decision from LLM response
|
|
400
|
-
*/
|
|
401
|
-
parseAgentDecision(response) {
|
|
402
|
-
try {
|
|
403
|
-
// Extract JSON from response
|
|
404
|
-
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
405
|
-
if (!jsonMatch) {
|
|
406
|
-
this.logger?.(`[Orchestrator] ✗ No JSON found in LLM response`, 'error');
|
|
407
|
-
this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
|
|
408
|
-
throw new Error('No JSON found in response');
|
|
409
|
-
}
|
|
410
|
-
const parsed = JSON.parse(jsonMatch[0]);
|
|
411
|
-
// Validate required fields
|
|
412
|
-
// Accept either "reasoning" or "statusReasoning" (LLMs sometimes only provide one)
|
|
413
|
-
if (!parsed.status || (!parsed.reasoning && !parsed.statusReasoning)) {
|
|
414
|
-
this.logger?.(`[Orchestrator] ✗ Missing required fields in parsed JSON`, 'error');
|
|
415
|
-
this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
|
|
416
|
-
this.logger?.(`[Orchestrator] 📄 PARSED JSON:\n${JSON.stringify(parsed, null, 2)}`, 'error');
|
|
417
|
-
this.logger?.(`[Orchestrator] ❌ Has status: ${!!parsed.status}, Has reasoning: ${!!parsed.reasoning}, Has statusReasoning: ${!!parsed.statusReasoning}`, 'error');
|
|
418
|
-
throw new Error('Missing required fields: status and (reasoning or statusReasoning)');
|
|
419
|
-
}
|
|
420
|
-
// Normalize: if reasoning is missing but statusReasoning exists, use statusReasoning as reasoning
|
|
421
|
-
if (!parsed.reasoning && parsed.statusReasoning) {
|
|
422
|
-
parsed.reasoning = parsed.statusReasoning;
|
|
423
|
-
}
|
|
424
|
-
return parsed;
|
|
425
|
-
}
|
|
426
|
-
catch (error) {
|
|
427
|
-
this.logger?.(`[Orchestrator] ✗ Failed to parse agent decision: ${error.message}`, 'error');
|
|
428
|
-
// Only log full response if not already logged above
|
|
429
|
-
if (!error.message.includes('Missing required fields') && !error.message.includes('No JSON found')) {
|
|
430
|
-
this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
|
|
431
|
-
}
|
|
432
|
-
// Return fallback
|
|
433
|
-
return {
|
|
434
|
-
status: 'stuck',
|
|
435
|
-
statusReasoning: 'Failed to parse agent response',
|
|
436
|
-
reasoning: `Parse error: ${error.message}`
|
|
437
|
-
};
|
|
438
|
-
}
|
|
439
|
-
}
|
|
440
480
|
/**
|
|
441
481
|
* Execute tools
|
|
442
482
|
*/
|
|
443
|
-
async executeTools(toolCalls, page, memory, stepNumber) {
|
|
483
|
+
async executeTools(toolCalls, page, memory, stepNumber, refMap) {
|
|
444
484
|
this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
|
|
445
485
|
const results = {};
|
|
446
486
|
const toolContext = {
|
|
447
487
|
page,
|
|
448
488
|
memory,
|
|
449
489
|
stepNumber,
|
|
450
|
-
logger: this.logger
|
|
490
|
+
logger: this.logger,
|
|
491
|
+
refMap, // Pass refMap for interact_with_ref tool
|
|
492
|
+
previousSomScreenshot: this.previousSomScreenshot, // For view_previous_screenshot tool
|
|
493
|
+
somHandler: this.somHandler // For refresh_som_markers tool
|
|
451
494
|
};
|
|
452
495
|
for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
|
|
453
496
|
this.logger?.(`[Orchestrator] ▶ ${toolCall.name}(${JSON.stringify(toolCall.params).substring(0, 50)}...)`);
|
|
@@ -463,45 +506,214 @@ class OrchestratorAgent {
|
|
|
463
506
|
return results;
|
|
464
507
|
}
|
|
465
508
|
/**
|
|
466
|
-
*
|
|
509
|
+
* Parse SomCommand from command object
|
|
467
510
|
*/
|
|
468
|
-
|
|
469
|
-
|
|
511
|
+
parseSomCommand(cmd) {
|
|
512
|
+
if (typeof cmd === 'object' && cmd.action) {
|
|
513
|
+
// Valid if: has elementRef, OR has coord, OR is navigation action
|
|
514
|
+
const isNavigationAction = ['navigate', 'goBack', 'goForward', 'reload'].includes(cmd.action);
|
|
515
|
+
const hasValidTarget = cmd.elementRef || cmd.coord || isNavigationAction;
|
|
516
|
+
if (hasValidTarget) {
|
|
517
|
+
return {
|
|
518
|
+
elementRef: cmd.elementRef,
|
|
519
|
+
coord: cmd.coord,
|
|
520
|
+
action: cmd.action,
|
|
521
|
+
value: cmd.value,
|
|
522
|
+
fromCoord: cmd.fromCoord,
|
|
523
|
+
toCoord: cmd.toCoord,
|
|
524
|
+
force: cmd.force,
|
|
525
|
+
scrollAmount: cmd.scrollAmount,
|
|
526
|
+
scrollDirection: cmd.scrollDirection,
|
|
527
|
+
button: cmd.button,
|
|
528
|
+
clickCount: cmd.clickCount,
|
|
529
|
+
modifiers: cmd.modifiers,
|
|
530
|
+
delay: cmd.delay,
|
|
531
|
+
timeout: cmd.timeout
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
return null;
|
|
536
|
+
}
|
|
537
|
+
/**
|
|
538
|
+
* Execute commands (mix of ref and playwright commands)
|
|
539
|
+
*/
|
|
540
|
+
async executeCommands(commands, page, memory, stepNumber, iteration, jobId) {
|
|
541
|
+
this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)`);
|
|
470
542
|
const executed = [];
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
543
|
+
if (commands.length === 0) {
|
|
544
|
+
return { executed: [], allSucceeded: true };
|
|
545
|
+
}
|
|
546
|
+
// SoM mode: Execute commands through PageSoMHandler
|
|
547
|
+
if (this.config.useSoM && this.somHandler) {
|
|
548
|
+
this.logger?.(`[Orchestrator] Using SoM mode for command execution`, 'log');
|
|
549
|
+
for (let i = 0; i < commands.length; i++) {
|
|
550
|
+
const cmd = commands[i];
|
|
551
|
+
// Check if verification or action command
|
|
552
|
+
if ((0, som_types_1.isSomVerification)(cmd)) {
|
|
553
|
+
// Handle verification command
|
|
554
|
+
try {
|
|
555
|
+
const result = await this.somHandler.executeVerification(cmd);
|
|
556
|
+
// Always add command to executed array (even if verification failed)
|
|
557
|
+
// Scripts should contain the expect even if it fails during generation
|
|
558
|
+
if (result.playwrightCommand) {
|
|
559
|
+
executed.push(result.playwrightCommand);
|
|
560
|
+
}
|
|
561
|
+
if (result.success) {
|
|
562
|
+
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Verification passed`, 'log');
|
|
563
|
+
memory.history.push({
|
|
564
|
+
stepNumber,
|
|
565
|
+
iteration,
|
|
566
|
+
action: `Verification ${i + 1}/${commands.length}: ${cmd.verificationType}`,
|
|
567
|
+
code: result.playwrightCommand,
|
|
568
|
+
result: 'success',
|
|
569
|
+
observation: `Verified: ${cmd.description || cmd.expected}`,
|
|
570
|
+
url: page.url(),
|
|
571
|
+
timestamp: Date.now()
|
|
572
|
+
});
|
|
573
|
+
}
|
|
574
|
+
else {
|
|
575
|
+
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification failed (non-fatal): ${result.error}`, 'warn');
|
|
576
|
+
memory.history.push({
|
|
577
|
+
stepNumber,
|
|
578
|
+
iteration,
|
|
579
|
+
action: `Verification ${i + 1}/${commands.length} - FAILED`,
|
|
580
|
+
code: result.playwrightCommand || JSON.stringify(cmd),
|
|
581
|
+
result: 'failure',
|
|
582
|
+
observation: `Failed: ${result.error}`,
|
|
583
|
+
error: result.error,
|
|
584
|
+
url: page.url(),
|
|
585
|
+
timestamp: Date.now()
|
|
586
|
+
});
|
|
587
|
+
// Continue anyway - verification failures are non-blocking for script generation
|
|
588
|
+
}
|
|
589
|
+
// Small delay between commands
|
|
590
|
+
if (i < commands.length - 1) {
|
|
591
|
+
await page.waitForTimeout(300);
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
catch (error) {
|
|
595
|
+
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] Verification exception: ${error.message}`, 'error');
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
else if ((0, som_types_1.isSomCommand)(cmd)) {
|
|
599
|
+
// Handle action command (existing logic)
|
|
600
|
+
const somCommand = cmd;
|
|
601
|
+
try {
|
|
602
|
+
const result = await this.somHandler.runCommand(somCommand, this.config.somUseSomIdBasedCommands || false);
|
|
603
|
+
if (result.status === som_types_1.CommandRunStatus.SUCCESS && result.successAttempt) {
|
|
604
|
+
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] SoM action succeeded`, 'log');
|
|
605
|
+
executed.push(result.successAttempt.command);
|
|
606
|
+
memory.history.push({
|
|
607
|
+
stepNumber,
|
|
608
|
+
iteration,
|
|
609
|
+
action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action}`,
|
|
610
|
+
code: result.successAttempt.command,
|
|
611
|
+
result: 'success',
|
|
612
|
+
observation: 'Executed successfully',
|
|
613
|
+
url: page.url(),
|
|
614
|
+
timestamp: Date.now()
|
|
615
|
+
});
|
|
616
|
+
// Small delay for form validation/animations
|
|
617
|
+
if (i < commands.length - 1) {
|
|
618
|
+
await page.waitForTimeout(300);
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
else {
|
|
622
|
+
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action failed: ${result.error}`, 'error');
|
|
623
|
+
memory.history.push({
|
|
624
|
+
stepNumber,
|
|
625
|
+
iteration,
|
|
626
|
+
action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action} - FAILED`,
|
|
627
|
+
code: JSON.stringify(somCommand),
|
|
628
|
+
result: 'failure',
|
|
629
|
+
observation: `Failed: ${result.error}`,
|
|
630
|
+
error: result.error,
|
|
631
|
+
url: page.url(),
|
|
632
|
+
timestamp: Date.now()
|
|
633
|
+
});
|
|
634
|
+
// Refresh SoM after batch (DOM may have changed)
|
|
635
|
+
if (this.somHandler && page) {
|
|
636
|
+
this.somHandler.setPage(page);
|
|
637
|
+
await this.somHandler.updateSom();
|
|
638
|
+
}
|
|
639
|
+
return { executed, allSucceeded: false };
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
catch (error) {
|
|
643
|
+
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${commands.length}] SoM action exception: ${error.message}`, 'error');
|
|
644
|
+
memory.history.push({
|
|
645
|
+
stepNumber,
|
|
646
|
+
iteration,
|
|
647
|
+
action: `SoM Action ${i + 1}/${commands.length} - EXCEPTION`,
|
|
648
|
+
code: JSON.stringify(somCommand),
|
|
649
|
+
result: 'failure',
|
|
650
|
+
observation: `Exception: ${error.message}`,
|
|
651
|
+
error: error.message,
|
|
652
|
+
url: page.url(),
|
|
653
|
+
timestamp: Date.now()
|
|
654
|
+
});
|
|
655
|
+
// Refresh SoM after batch (DOM may have changed)
|
|
656
|
+
if (this.somHandler && page) {
|
|
657
|
+
this.somHandler.setPage(page);
|
|
658
|
+
await this.somHandler.updateSom();
|
|
659
|
+
}
|
|
660
|
+
return { executed, allSucceeded: false };
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
else {
|
|
664
|
+
this.logger?.(`[Orchestrator] ⚠ [${i + 1}/${commands.length}] Not a valid SoM command/verification, skipping`, 'warn');
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
// Always wait for page to stabilize after command batch
|
|
668
|
+
// This handles both explicit navigation AND clicks that trigger navigation/SPA routes
|
|
669
|
+
try {
|
|
670
|
+
this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
|
|
671
|
+
// Use networkidle with short timeout to catch navigation without blocking on SPAs with continuous requests
|
|
672
|
+
await page.waitForLoadState('networkidle', { timeout: 3000 });
|
|
673
|
+
this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
|
|
674
|
+
}
|
|
675
|
+
catch (error) {
|
|
676
|
+
// If networkidle times out, fall back to domcontentloaded
|
|
677
|
+
try {
|
|
678
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
|
|
679
|
+
this.logger?.(`[Orchestrator] Page loaded (domcontentloaded)`, 'log');
|
|
680
|
+
}
|
|
681
|
+
catch (error2) {
|
|
682
|
+
this.logger?.(`[Orchestrator] Page load wait timeout (continuing anyway)`, 'warn');
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
// Refresh SoM after batch (DOM may have changed and page is now stable)
|
|
686
|
+
if (this.somHandler && page) {
|
|
687
|
+
this.somHandler.setPage(page);
|
|
688
|
+
await this.somHandler.updateSom();
|
|
689
|
+
}
|
|
690
|
+
return { executed, allSucceeded: true };
|
|
691
|
+
}
|
|
692
|
+
// Standard mode: Execute all commands in sequence with small delay between them
|
|
693
|
+
// Delay helps with form validation, button enabling, and animations
|
|
694
|
+
const wrappedCode = commands.map((cmd, i) => `
|
|
695
|
+
// Command ${i + 1}/${commands.length}
|
|
476
696
|
try {
|
|
477
697
|
${cmd}
|
|
478
698
|
__results.push({ index: ${i}, success: true });
|
|
699
|
+
${i < commands.length - 1 ? 'await page.waitForTimeout(300);' : ''} // Small delay for form validation/animations
|
|
479
700
|
} catch (error) {
|
|
480
701
|
__results.push({ index: ${i}, success: false, error: error.message });
|
|
481
|
-
throw error;
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
const wrappedCode = `
|
|
485
|
-
const __results = [];
|
|
486
|
-
${commandsWithTracking}
|
|
487
|
-
return __results;
|
|
488
|
-
`;
|
|
702
|
+
throw error;
|
|
703
|
+
}`).join('\n');
|
|
704
|
+
const fullCode = `const __results = []; ${wrappedCode} return __results;`;
|
|
489
705
|
try {
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
// Record results for each command
|
|
495
|
-
for (let i = 0; i < limitedCommands.length; i++) {
|
|
496
|
-
const cmd = limitedCommands[i];
|
|
706
|
+
const func = new Function('page', 'expect', 'return (async () => { ' + fullCode + ' })()');
|
|
707
|
+
const results = await func(page, global.expect);
|
|
708
|
+
for (let i = 0; i < commands.length; i++) {
|
|
709
|
+
const cmd = commands[i];
|
|
497
710
|
const result = results[i];
|
|
498
711
|
if (result && result.success) {
|
|
499
|
-
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${
|
|
500
|
-
// Record in history
|
|
712
|
+
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${commands.length}] Success`);
|
|
501
713
|
memory.history.push({
|
|
502
714
|
stepNumber,
|
|
503
715
|
iteration,
|
|
504
|
-
action: `Command ${i + 1}/${
|
|
716
|
+
action: `Command ${i + 1}/${commands.length}`,
|
|
505
717
|
code: cmd,
|
|
506
718
|
result: 'success',
|
|
507
719
|
observation: 'Executed successfully',
|
|
@@ -511,122 +723,27 @@ return __results;
|
|
|
511
723
|
executed.push(cmd);
|
|
512
724
|
}
|
|
513
725
|
}
|
|
514
|
-
// Cap history
|
|
515
726
|
if (memory.history.length > this.config.maxHistorySize) {
|
|
516
727
|
memory.history = memory.history.slice(-this.config.maxHistorySize);
|
|
517
728
|
}
|
|
518
729
|
return { executed, allSucceeded: true };
|
|
519
730
|
}
|
|
520
731
|
catch (error) {
|
|
521
|
-
// One of the commands failed - find which one
|
|
522
732
|
const errorMessage = error.message || String(error);
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
${pageInfo.formattedElements}
|
|
535
|
-
|
|
536
|
-
ARIA SNAPSHOT:
|
|
537
|
-
${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
|
|
538
|
-
|
|
539
|
-
====================================`;
|
|
540
|
-
}
|
|
541
|
-
catch (debugError) {
|
|
542
|
-
pageStateDebug = `Failed to capture page state: ${debugError.message}`;
|
|
543
|
-
}
|
|
544
|
-
}
|
|
545
|
-
// Record all that succeeded, then the failure
|
|
546
|
-
for (let i = 0; i < limitedCommands.length; i++) {
|
|
547
|
-
const cmd = limitedCommands[i];
|
|
548
|
-
// This is a failed command (error happened here or earlier)
|
|
549
|
-
if (executed.length <= i) {
|
|
550
|
-
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${limitedCommands.length}] Failed: ${errorMessage}`, 'error');
|
|
551
|
-
// Log detailed debug info
|
|
552
|
-
if (this.debugMode && pageStateDebug) {
|
|
553
|
-
this.logger?.(pageStateDebug, 'debug');
|
|
554
|
-
}
|
|
555
|
-
memory.history.push({
|
|
556
|
-
stepNumber,
|
|
557
|
-
iteration,
|
|
558
|
-
action: `Command ${i + 1}/${limitedCommands.length} - FAILED`,
|
|
559
|
-
code: cmd,
|
|
560
|
-
result: 'failure',
|
|
561
|
-
observation: `Failed with error: ${errorMessage}. This selector likely doesn't exist or is incorrect.`,
|
|
562
|
-
error: errorMessage,
|
|
563
|
-
url: page.url(),
|
|
564
|
-
timestamp: Date.now()
|
|
565
|
-
});
|
|
566
|
-
if (i < limitedCommands.length - 1) {
|
|
567
|
-
this.logger?.(`[Orchestrator] ⚠ Skipping remaining ${limitedCommands.length - i - 1} command(s)`, 'warn');
|
|
568
|
-
}
|
|
569
|
-
break;
|
|
570
|
-
}
|
|
571
|
-
}
|
|
572
|
-
return { executed, allSucceeded: false };
|
|
573
|
-
}
|
|
574
|
-
}
|
|
575
|
-
/**
|
|
576
|
-
* Execute a single command
|
|
577
|
-
*/
|
|
578
|
-
async executeCommand(cmd, page) {
|
|
579
|
-
// Wrap in async function and execute
|
|
580
|
-
const wrapped = `(async () => { ${cmd} })()`;
|
|
581
|
-
try {
|
|
582
|
-
await eval(wrapped);
|
|
583
|
-
}
|
|
584
|
-
catch (error) {
|
|
585
|
-
// If eval fails, try direct execution with page context
|
|
586
|
-
// Pass both page and expect to make Playwright assertions available
|
|
587
|
-
const func = new Function('page', 'expect', `return (async () => { ${cmd} })()`);
|
|
588
|
-
await func(page, test_1.expect);
|
|
589
|
-
}
|
|
590
|
-
}
|
|
591
|
-
/**
|
|
592
|
-
* Log agent decision
|
|
593
|
-
*/
|
|
594
|
-
logAgentDecision(decision, iteration) {
|
|
595
|
-
this.logger?.(`[Orchestrator] 💭 REASONING: ${decision.reasoning}`);
|
|
596
|
-
if (decision.selfReflection) {
|
|
597
|
-
this.logger?.(`[Orchestrator] 🧠 SELF-REFLECTION:`);
|
|
598
|
-
this.logger?.(`[Orchestrator] Next: ${decision.selfReflection.guidanceForNext}`);
|
|
599
|
-
if (decision.selfReflection.detectingLoop) {
|
|
600
|
-
this.logger?.(`[Orchestrator] 🔄 LOOP DETECTED: ${decision.selfReflection.loopReasoning}`, 'warn');
|
|
601
|
-
}
|
|
602
|
-
}
|
|
603
|
-
if (decision.toolCalls && decision.toolCalls.length > 0) {
|
|
604
|
-
this.logger?.(`[Orchestrator] 🔧 TOOLS: ${decision.toolCalls.map(t => t.name).join(', ')}`);
|
|
605
|
-
if (decision.toolReasoning) {
|
|
606
|
-
this.logger?.(`[Orchestrator] 📋 Why: ${decision.toolReasoning}`);
|
|
607
|
-
}
|
|
608
|
-
}
|
|
609
|
-
if (decision.blockerDetected) {
|
|
610
|
-
this.logger?.(`[Orchestrator] 🚧 BLOCKER: ${decision.blockerDetected.description}`, 'warn');
|
|
611
|
-
this.logger?.(`[Orchestrator] 🧹 Clearing with ${decision.blockerDetected.clearingCommands.length} command(s)`);
|
|
612
|
-
}
|
|
613
|
-
if (decision.stepReEvaluation?.detected) {
|
|
614
|
-
this.logger?.(`[Orchestrator] 🔍 STEP RE-EVALUATION: ${decision.stepReEvaluation.issue}`, 'warn');
|
|
615
|
-
this.logger?.(`[Orchestrator] 📝 Explanation: ${decision.stepReEvaluation.explanation}`);
|
|
616
|
-
}
|
|
617
|
-
if (decision.commands && decision.commands.length > 0) {
|
|
618
|
-
this.logger?.(`[Orchestrator] 📝 COMMANDS (${decision.commands.length}):`);
|
|
619
|
-
decision.commands.slice(0, 3).forEach((cmd, i) => {
|
|
620
|
-
this.logger?.(`[Orchestrator] ${i + 1}. ${cmd.substring(0, 80)}...`);
|
|
733
|
+
this.logger?.(`[Orchestrator] ❌ Command execution failed: ${errorMessage}`, 'error');
|
|
734
|
+
memory.history.push({
|
|
735
|
+
stepNumber,
|
|
736
|
+
iteration,
|
|
737
|
+
action: `Command - FAILED`,
|
|
738
|
+
code: commands[executed.length] || '',
|
|
739
|
+
result: 'failure',
|
|
740
|
+
observation: `Failed: ${errorMessage}`,
|
|
741
|
+
error: errorMessage,
|
|
742
|
+
url: page.url(),
|
|
743
|
+
timestamp: Date.now()
|
|
621
744
|
});
|
|
622
|
-
|
|
623
|
-
this.logger?.(`[Orchestrator] ... and ${decision.commands.length - 3} more`);
|
|
624
|
-
}
|
|
625
|
-
if (decision.commandReasoning) {
|
|
626
|
-
this.logger?.(`[Orchestrator] 💡 Why: ${decision.commandReasoning}`);
|
|
627
|
-
}
|
|
745
|
+
return { executed, allSucceeded: false };
|
|
628
746
|
}
|
|
629
|
-
// Experiences will be logged when added to memory, no need to log here
|
|
630
747
|
}
|
|
631
748
|
/**
|
|
632
749
|
* Report step progress
|
|
@@ -651,6 +768,219 @@ ${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
|
|
|
651
768
|
agentStatus: decision.status
|
|
652
769
|
});
|
|
653
770
|
}
|
|
771
|
+
/**
|
|
772
|
+
* Execute exploration mode - agent autonomously explores to achieve journey goal
|
|
773
|
+
* Fires onStepProgress callbacks for each autonomous action (transparent to caller)
|
|
774
|
+
*/
|
|
775
|
+
async executeExploration(page, explorationConfig, jobId) {
|
|
776
|
+
this.logger?.(`\n[Orchestrator] ========== EXPLORATION MODE ==========`);
|
|
777
|
+
this.logger?.(`[Orchestrator] 🎯 Journey Goal: ${explorationConfig.explorationPrompt}`);
|
|
778
|
+
if (explorationConfig.testDataPrompt) {
|
|
779
|
+
this.logger?.(`[Orchestrator] 📋 Test Data: ${explorationConfig.testDataPrompt}`);
|
|
780
|
+
}
|
|
781
|
+
const memory = {
|
|
782
|
+
history: [],
|
|
783
|
+
experiences: [],
|
|
784
|
+
extractedData: {}
|
|
785
|
+
};
|
|
786
|
+
const maxSteps = explorationConfig.maxExplorationSteps || 50;
|
|
787
|
+
let stepNumber = 0;
|
|
788
|
+
const commandsExecuted = [];
|
|
789
|
+
while (stepNumber < maxSteps) {
|
|
790
|
+
stepNumber++;
|
|
791
|
+
this.logger?.(`\n[Orchestrator] === Exploration Step ${stepNumber}/${maxSteps} ===`);
|
|
792
|
+
// Build exploratory context
|
|
793
|
+
const context = await this.buildExploratoryContext(page, explorationConfig.explorationPrompt, explorationConfig.testDataPrompt, memory, stepNumber, maxSteps);
|
|
794
|
+
// Call agent with exploratory prompt
|
|
795
|
+
const decision = await this.callExploratoryAgent(context, jobId, stepNumber);
|
|
796
|
+
this.decisionParser.log(decision, stepNumber);
|
|
797
|
+
// Report step start (fires JourneyRunner's beforeStepStart callback)
|
|
798
|
+
if (this.progressReporter?.onStepProgress) {
|
|
799
|
+
const stepInfo = {
|
|
800
|
+
jobId,
|
|
801
|
+
stepNumber,
|
|
802
|
+
stepId: `exploration-${stepNumber}-${Date.now()}`,
|
|
803
|
+
description: decision.reasoning,
|
|
804
|
+
code: '', // Will be filled after commands execute
|
|
805
|
+
status: progress_reporter_1.StepExecutionStatus.IN_PROGRESS,
|
|
806
|
+
wasRepaired: false
|
|
807
|
+
};
|
|
808
|
+
await this.progressReporter.onStepProgress(stepInfo);
|
|
809
|
+
}
|
|
810
|
+
// Execute tools if requested
|
|
811
|
+
if (decision.toolCalls && decision.toolCalls.length > 0) {
|
|
812
|
+
const toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
|
|
813
|
+
// If needs tool results, call agent again
|
|
814
|
+
if (decision.needsToolResults) {
|
|
815
|
+
const updatedContext = { ...context, toolResults };
|
|
816
|
+
const continuedDecision = await this.callExploratoryAgent(updatedContext, jobId, stepNumber);
|
|
817
|
+
decision.commands = continuedDecision.commands || decision.commands;
|
|
818
|
+
decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
|
|
819
|
+
decision.status = continuedDecision.status;
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
// Handle blocker clearing
|
|
823
|
+
if (decision.blockerDetected && decision.blockerDetected.clearingCommands) {
|
|
824
|
+
this.logger?.(`[Orchestrator] 🚧 Clearing blocker: ${decision.blockerDetected.description}`);
|
|
825
|
+
const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, 1, jobId);
|
|
826
|
+
commandsExecuted.push(...blockerResult.executed);
|
|
827
|
+
}
|
|
828
|
+
// Execute exploration commands
|
|
829
|
+
let commandsSucceeded = true;
|
|
830
|
+
if (decision.commands && decision.commands.length > 0) {
|
|
831
|
+
const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, 1, jobId);
|
|
832
|
+
commandsExecuted.push(...executeResult.executed);
|
|
833
|
+
commandsSucceeded = executeResult.allSucceeded;
|
|
834
|
+
}
|
|
835
|
+
// Report step completion (fires JourneyRunner's onStepComplete callback)
|
|
836
|
+
if (this.progressReporter?.onStepProgress) {
|
|
837
|
+
const stepInfo = {
|
|
838
|
+
jobId,
|
|
839
|
+
stepNumber,
|
|
840
|
+
stepId: `exploration-${stepNumber}-${Date.now()}`,
|
|
841
|
+
description: decision.reasoning,
|
|
842
|
+
code: decision.commands?.join('\n') || '',
|
|
843
|
+
status: commandsSucceeded ? progress_reporter_1.StepExecutionStatus.SUCCESS : progress_reporter_1.StepExecutionStatus.FAILURE,
|
|
844
|
+
error: commandsSucceeded ? undefined : 'Command execution failed',
|
|
845
|
+
wasRepaired: false
|
|
846
|
+
};
|
|
847
|
+
await this.progressReporter.onStepProgress(stepInfo);
|
|
848
|
+
}
|
|
849
|
+
// Add experiences (both app patterns AND exploration progress)
|
|
850
|
+
if (decision.experiences) {
|
|
851
|
+
memory.experiences.push(...decision.experiences);
|
|
852
|
+
if (memory.experiences.length > this.config.maxExperiences) {
|
|
853
|
+
memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
// Store note for next iteration
|
|
857
|
+
if (decision.noteToFutureSelf) {
|
|
858
|
+
memory.latestNote = {
|
|
859
|
+
fromIteration: stepNumber,
|
|
860
|
+
content: decision.noteToFutureSelf
|
|
861
|
+
};
|
|
862
|
+
}
|
|
863
|
+
// Check termination
|
|
864
|
+
if (decision.status === 'complete') {
|
|
865
|
+
this.logger?.(`[Orchestrator] ✅ Journey exploration complete: ${decision.statusReasoning}`);
|
|
866
|
+
return {
|
|
867
|
+
success: true,
|
|
868
|
+
commands: commandsExecuted,
|
|
869
|
+
iterations: stepNumber,
|
|
870
|
+
terminationReason: 'complete',
|
|
871
|
+
memory
|
|
872
|
+
};
|
|
873
|
+
}
|
|
874
|
+
else if (decision.status === 'stuck') {
|
|
875
|
+
this.logger?.(`[Orchestrator] ❌ Exploration stuck: ${decision.statusReasoning}`);
|
|
876
|
+
return {
|
|
877
|
+
success: false,
|
|
878
|
+
commands: commandsExecuted,
|
|
879
|
+
iterations: stepNumber,
|
|
880
|
+
terminationReason: 'agent_stuck',
|
|
881
|
+
memory,
|
|
882
|
+
error: decision.statusReasoning
|
|
883
|
+
};
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
// Hit max steps - not necessarily a failure
|
|
887
|
+
this.logger?.(`[Orchestrator] ⚠ Maximum exploration steps reached (budget limit)`);
|
|
888
|
+
return {
|
|
889
|
+
success: true, // Not a failure - just budget limit
|
|
890
|
+
commands: commandsExecuted,
|
|
891
|
+
iterations: stepNumber,
|
|
892
|
+
terminationReason: 'system_limit',
|
|
893
|
+
memory
|
|
894
|
+
};
|
|
895
|
+
}
|
|
896
|
+
async buildExploratoryContext(page, explorationPrompt, testDataPrompt, memory, stepNumber, maxSteps) {
|
|
897
|
+
// Wait for page to be ready and elements to appear (especially important after navigation)
|
|
898
|
+
const currentPageInfo = await page_info_retry_1.PageInfoRetry.getWithRetry(page);
|
|
899
|
+
const currentURL = page.url();
|
|
900
|
+
const recentSteps = memory.history.slice(-this.config.recentStepsCount);
|
|
901
|
+
// SoM integration for exploratory mode
|
|
902
|
+
let somScreenshot = undefined;
|
|
903
|
+
let somElementMap = undefined;
|
|
904
|
+
if (this.config.useSoM && this.somHandler) {
|
|
905
|
+
try {
|
|
906
|
+
this.somHandler.setPage(page);
|
|
907
|
+
// Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
|
|
908
|
+
try {
|
|
909
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
|
|
910
|
+
}
|
|
911
|
+
catch (error) {
|
|
912
|
+
// Page already loaded or timeout - continue
|
|
913
|
+
}
|
|
914
|
+
// Update SoM markers
|
|
915
|
+
await this.somHandler.updateSom();
|
|
916
|
+
somScreenshot = await this.somHandler.getScreenshot(true, false, 60); // Viewport only - agent can scroll or request full page
|
|
917
|
+
// Get element map for disambiguation
|
|
918
|
+
somElementMap = this.somHandler.getSomElementMap();
|
|
919
|
+
this.logger?.(`[Orchestrator] SoM screenshot captured for exploratory agent`, 'log');
|
|
920
|
+
}
|
|
921
|
+
catch (error) {
|
|
922
|
+
this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
const context = {
|
|
926
|
+
overallGoal: explorationPrompt,
|
|
927
|
+
currentStepGoal: explorationPrompt, // Same as overall for single journey
|
|
928
|
+
stepNumber,
|
|
929
|
+
totalSteps: maxSteps,
|
|
930
|
+
completedSteps: [],
|
|
931
|
+
remainingSteps: [],
|
|
932
|
+
currentPageInfo,
|
|
933
|
+
currentURL,
|
|
934
|
+
recentSteps,
|
|
935
|
+
experiences: memory.experiences,
|
|
936
|
+
extractedData: memory.extractedData,
|
|
937
|
+
noteFromPreviousIteration: memory.latestNote,
|
|
938
|
+
testDataPrompt, // CRITICAL: Store testDataPrompt in context
|
|
939
|
+
somScreenshot, // SoM screenshot for exploratory mode (current)
|
|
940
|
+
somElementMap // SoM element details for disambiguation
|
|
941
|
+
};
|
|
942
|
+
// Save current screenshot as previous for next iteration (for tool access)
|
|
943
|
+
if (somScreenshot) {
|
|
944
|
+
this.previousSomScreenshot = somScreenshot;
|
|
945
|
+
}
|
|
946
|
+
return context;
|
|
947
|
+
}
|
|
948
|
+
async callExploratoryAgent(context, jobId, stepNumber) {
|
|
949
|
+
const toolDescriptions = this.toolRegistry.generateToolDescriptions();
|
|
950
|
+
// Use SoM system prompt if in SoM mode, otherwise use standard exploratory prompt
|
|
951
|
+
const systemPrompt = this.config.useSoM
|
|
952
|
+
? orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates)
|
|
953
|
+
: orchestrator_prompts_1.OrchestratorPrompts.buildExploratorySystemPrompt(toolDescriptions);
|
|
954
|
+
const userPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildExploratoryUserPrompt(context, context.overallGoal, context.testDataPrompt, // Pass testDataPrompt from context
|
|
955
|
+
stepNumber, context.totalSteps);
|
|
956
|
+
const llmRequest = {
|
|
957
|
+
model: model_constants_1.DEFAULT_MODEL,
|
|
958
|
+
systemPrompt,
|
|
959
|
+
userPrompt
|
|
960
|
+
};
|
|
961
|
+
// Include current SoM screenshot as image
|
|
962
|
+
if (context.somScreenshot) {
|
|
963
|
+
llmRequest.imageUrl = context.somScreenshot;
|
|
964
|
+
this.logger?.(`[Orchestrator] Including SoM screenshot in exploratory LLM request`, 'log');
|
|
965
|
+
}
|
|
966
|
+
const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
|
|
967
|
+
// Report token usage
|
|
968
|
+
if (response.usage && this.progressReporter?.onTokensUsed) {
|
|
969
|
+
await this.progressReporter.onTokensUsed({
|
|
970
|
+
jobId,
|
|
971
|
+
stepNumber,
|
|
972
|
+
iteration: 1,
|
|
973
|
+
inputTokens: response.usage.inputTokens,
|
|
974
|
+
outputTokens: response.usage.outputTokens,
|
|
975
|
+
includesImage: false,
|
|
976
|
+
model: model_constants_1.DEFAULT_MODEL,
|
|
977
|
+
timestamp: Date.now()
|
|
978
|
+
});
|
|
979
|
+
}
|
|
980
|
+
// Parse response (same JSON format as regular mode)
|
|
981
|
+
const decision = this.decisionParser.parse(response.answer);
|
|
982
|
+
return decision;
|
|
983
|
+
}
|
|
654
984
|
}
|
|
655
985
|
exports.OrchestratorAgent = OrchestratorAgent;
|
|
656
986
|
//# sourceMappingURL=orchestrator-agent.js.map
|