testchimp-runner-core 0.0.40 → 0.0.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +1 -3
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +7 -6
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -11
- package/dist/index.js.map +1 -1
- package/dist/orchestrator/decision-parser.d.ts.map +1 -1
- package/dist/orchestrator/decision-parser.js +16 -0
- package/dist/orchestrator/decision-parser.js.map +1 -1
- package/dist/orchestrator/index.d.ts +4 -2
- package/dist/orchestrator/index.d.ts.map +1 -1
- package/dist/orchestrator/index.js +10 -8
- package/dist/orchestrator/index.js.map +1 -1
- package/dist/orchestrator/orchestrator-agent.d.ts +10 -4
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +376 -118
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts +2 -10
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.js +343 -452
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
- package/dist/orchestrator/page-loading-utils.d.ts +15 -0
- package/dist/orchestrator/page-loading-utils.d.ts.map +1 -0
- package/dist/orchestrator/page-loading-utils.js +115 -0
- package/dist/orchestrator/page-loading-utils.js.map +1 -0
- package/dist/orchestrator/page-som-handler.d.ts +2 -1
- package/dist/orchestrator/page-som-handler.d.ts.map +1 -1
- package/dist/orchestrator/page-som-handler.js +250 -33
- package/dist/orchestrator/page-som-handler.js.map +1 -1
- package/dist/orchestrator/site-learnings-utils.d.ts +31 -0
- package/dist/orchestrator/site-learnings-utils.d.ts.map +1 -0
- package/dist/orchestrator/site-learnings-utils.js +175 -0
- package/dist/orchestrator/site-learnings-utils.js.map +1 -0
- package/dist/orchestrator/som-types.d.ts +2 -0
- package/dist/orchestrator/som-types.d.ts.map +1 -1
- package/dist/orchestrator/som-types.js.map +1 -1
- package/dist/orchestrator/tools/index.d.ts +9 -8
- package/dist/orchestrator/tools/index.d.ts.map +1 -1
- package/dist/orchestrator/tools/index.js +10 -15
- package/dist/orchestrator/tools/index.js.map +1 -1
- package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -1
- package/dist/orchestrator/tools/take-screenshot.js +10 -1
- package/dist/orchestrator/tools/take-screenshot.js.map +1 -1
- package/dist/orchestrator/types.d.ts +54 -9
- package/dist/orchestrator/types.d.ts.map +1 -1
- package/dist/orchestrator/types.js.map +1 -1
- package/dist/progress-reporter.d.ts +23 -2
- package/dist/progress-reporter.d.ts.map +1 -1
- package/dist/progress-reporter.js.map +1 -1
- package/dist/prompts.d.ts.map +1 -1
- package/dist/prompts.js +14 -3
- package/dist/prompts.js.map +1 -1
- package/dist/scenario-service.d.ts +3 -3
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +6 -5
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +7 -3
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +94 -21
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/types.d.ts +4 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
- package/dist/testing/agent-tester.d.ts +0 -35
- package/dist/testing/agent-tester.d.ts.map +0 -1
- package/dist/testing/agent-tester.js +0 -84
- package/dist/testing/agent-tester.js.map +0 -1
- package/dist/testing/ref-translator-tester.d.ts +0 -44
- package/dist/testing/ref-translator-tester.d.ts.map +0 -1
- package/dist/testing/ref-translator-tester.js +0 -104
- package/dist/testing/ref-translator-tester.js.map +0 -1
- package/dist/utils/hierarchical-selector.d.ts +0 -47
- package/dist/utils/hierarchical-selector.d.ts.map +0 -1
- package/dist/utils/hierarchical-selector.js +0 -212
- package/dist/utils/hierarchical-selector.js.map +0 -1
- package/dist/utils/ref-attacher.d.ts +0 -21
- package/dist/utils/ref-attacher.d.ts.map +0 -1
- package/dist/utils/ref-attacher.js +0 -149
- package/dist/utils/ref-attacher.js.map +0 -1
- package/dist/utils/ref-translator.d.ts +0 -49
- package/dist/utils/ref-translator.d.ts.map +0 -1
- package/dist/utils/ref-translator.js +0 -276
- package/dist/utils/ref-translator.js.map +0 -1
|
@@ -9,6 +9,10 @@ const progress_reporter_1 = require("../progress-reporter");
|
|
|
9
9
|
const page_info_utils_1 = require("../utils/page-info-utils");
|
|
10
10
|
const coordinate_converter_1 = require("../utils/coordinate-converter");
|
|
11
11
|
const model_constants_1 = require("../model-constants");
|
|
12
|
+
// @ts-ignore - package.json exists
|
|
13
|
+
const package_json_1 = require("../../package.json");
|
|
14
|
+
const site_learnings_utils_1 = require("./site-learnings-utils");
|
|
15
|
+
const page_loading_utils_1 = require("./page-loading-utils");
|
|
12
16
|
const types_1 = require("./types");
|
|
13
17
|
const orchestrator_prompts_1 = require("./orchestrator-prompts");
|
|
14
18
|
const page_info_retry_1 = require("../utils/page-info-retry");
|
|
@@ -21,6 +25,14 @@ const som_types_1 = require("./som-types");
|
|
|
21
25
|
class OrchestratorAgent {
|
|
22
26
|
constructor(llmFacade, toolRegistry, config, progressReporter, logger, debugMode) {
|
|
23
27
|
this.debugMode = false;
|
|
28
|
+
// Debug stats tracking
|
|
29
|
+
this.debugStats = {
|
|
30
|
+
tokensUsedIn: 0,
|
|
31
|
+
tokensUsedOut: 0,
|
|
32
|
+
imagesUsed: 0,
|
|
33
|
+
toolsUsed: {},
|
|
34
|
+
promptImproveSuggestions: []
|
|
35
|
+
};
|
|
24
36
|
this.llmFacade = llmFacade;
|
|
25
37
|
this.toolRegistry = toolRegistry;
|
|
26
38
|
this.config = { ...types_1.DEFAULT_AGENT_CONFIG, ...config };
|
|
@@ -43,10 +55,19 @@ class OrchestratorAgent {
|
|
|
43
55
|
nextSteps, // For repair mode: steps after this one
|
|
44
56
|
successfulCommandsInStep, // For repair mode: commands that succeeded within THIS step
|
|
45
57
|
failingCommand, // For repair mode: the specific command that failed
|
|
46
|
-
remainingCommandsInStep // For repair mode: commands after the failing one
|
|
58
|
+
remainingCommandsInStep, // For repair mode: commands after the failing one
|
|
59
|
+
existingSiteLearnings // Pre-existing site learnings from previous runs
|
|
47
60
|
) {
|
|
48
61
|
this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
|
|
62
|
+
this.logger?.(`[Orchestrator] 🚀 runner-core v${package_json_1.version}`);
|
|
49
63
|
this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
|
|
64
|
+
// Site learnings (persistent across journeys) - initialize with existing or empty
|
|
65
|
+
const siteLearnings = existingSiteLearnings || { screens: {}, uxPatterns: {} };
|
|
66
|
+
if (existingSiteLearnings) {
|
|
67
|
+
const screenCount = Object.keys(existingSiteLearnings.screens).length;
|
|
68
|
+
const patternCount = Object.keys(existingSiteLearnings.uxPatterns).length;
|
|
69
|
+
this.logger?.(`[Orchestrator] 📚 Loaded existing learnings: ${screenCount} screens, ${patternCount} UX patterns`);
|
|
70
|
+
}
|
|
50
71
|
let iteration = 0;
|
|
51
72
|
let noteToSelf = memory.latestNote; // Start with note from previous step
|
|
52
73
|
const commandsExecuted = [];
|
|
@@ -56,8 +77,8 @@ class OrchestratorAgent {
|
|
|
56
77
|
iteration++;
|
|
57
78
|
this.logger?.(`\n[Orchestrator] === Iteration ${iteration}/${this.config.maxIterationsPerStep} ===`);
|
|
58
79
|
// Build context for agent
|
|
59
|
-
const context = await this.buildAgentContext(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory,
|
|
60
|
-
priorSteps, // Repair context: prior completed steps
|
|
80
|
+
const context = await this.buildAgentContext(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, siteLearnings, // Site learnings (persistent)
|
|
81
|
+
consecutiveFailures, priorSteps, // Repair context: prior completed steps
|
|
61
82
|
nextSteps, // Repair context: next steps
|
|
62
83
|
successfulCommandsInStep, // Repair context: successful commands in THIS step
|
|
63
84
|
failingCommand, // Repair context: the failing command
|
|
@@ -67,6 +88,30 @@ class OrchestratorAgent {
|
|
|
67
88
|
const decision = await this.callAgent(context, jobId, stepNumber, iteration, consecutiveFailures);
|
|
68
89
|
// Log agent's reasoning
|
|
69
90
|
this.decisionParser.log(decision, iteration);
|
|
91
|
+
// Handle debug info from agent
|
|
92
|
+
if (decision.debugInfo) {
|
|
93
|
+
// Collect prompt improvement suggestions
|
|
94
|
+
if (decision.debugInfo.suggestedPromptUpdates) {
|
|
95
|
+
this.debugStats.promptImproveSuggestions.push(decision.debugInfo.suggestedPromptUpdates);
|
|
96
|
+
this.logger?.(`[Orchestrator] 💡 Prompt suggestion collected: ${decision.debugInfo.suggestedPromptUpdates.substring(0, 80)}...`, 'log');
|
|
97
|
+
}
|
|
98
|
+
// Process tool usefulness feedback (for tools from PREVIOUS iteration)
|
|
99
|
+
if (decision.debugInfo.toolUsefulnessFeedback) {
|
|
100
|
+
for (const [toolName, rating] of Object.entries(decision.debugInfo.toolUsefulnessFeedback)) {
|
|
101
|
+
if (this.debugStats.toolsUsed[toolName]) {
|
|
102
|
+
const stats = this.debugStats.toolsUsed[toolName];
|
|
103
|
+
// Calculate running average: (oldAvg * oldCount + newRating) / newCount
|
|
104
|
+
const oldTotal = stats.averageUsefulnessScore * stats.numTimesRated;
|
|
105
|
+
stats.numTimesRated++;
|
|
106
|
+
stats.averageUsefulnessScore = (oldTotal + rating) / stats.numTimesRated;
|
|
107
|
+
this.logger?.(`[Orchestrator] ⭐ Tool feedback: ${toolName} rated ${rating}/5 (avg: ${stats.averageUsefulnessScore.toFixed(2)})`, 'log');
|
|
108
|
+
}
|
|
109
|
+
else {
|
|
110
|
+
this.logger?.(`[Orchestrator] ⚠️ Tool feedback for unknown tool: ${toolName}`, 'warn');
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
70
115
|
// Report progress
|
|
71
116
|
await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
|
|
72
117
|
// Execute tools if requested (tools are READ-ONLY, they don't change state)
|
|
@@ -82,6 +127,7 @@ class OrchestratorAgent {
|
|
|
82
127
|
this.logger?.(`[Orchestrator] ⚠️ WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
|
|
83
128
|
}
|
|
84
129
|
if (decision.toolCalls && decision.toolCalls.length > 0) {
|
|
130
|
+
this.logger?.(`[Orchestrator] 🔧 Agent using TOOL CALLS: ${decision.toolCalls.map(tc => tc.name).join(', ')}`);
|
|
85
131
|
// ENFORCE: Block screenshot tool calls if too many taken IN THIS STEP
|
|
86
132
|
if (screenshotsThisStep.length >= 3) {
|
|
87
133
|
decision.toolCalls = decision.toolCalls.filter(tc => tc.name !== 'take_screenshot');
|
|
@@ -96,27 +142,28 @@ class OrchestratorAgent {
|
|
|
96
142
|
}
|
|
97
143
|
}
|
|
98
144
|
if (decision.toolCalls.length > 0) {
|
|
99
|
-
toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber
|
|
100
|
-
}
|
|
101
|
-
// If agent wants to wait for tool results before proceeding, call agent again
|
|
102
|
-
if (decision.needsToolResults) {
|
|
103
|
-
const updatedContext = { ...context, toolResults };
|
|
104
|
-
const continuedDecision = await this.callAgent(updatedContext, jobId, stepNumber, iteration, consecutiveFailures);
|
|
105
|
-
// Merge continued decision
|
|
106
|
-
decision.commands = continuedDecision.commands || decision.commands;
|
|
107
|
-
decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
|
|
108
|
-
decision.status = continuedDecision.status;
|
|
109
|
-
decision.statusReasoning = continuedDecision.statusReasoning;
|
|
110
|
-
decision.reasoning = continuedDecision.reasoning;
|
|
145
|
+
toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
|
|
111
146
|
}
|
|
112
147
|
}
|
|
148
|
+
// If agent wants to wait for tool results before proceeding, call agent again
|
|
149
|
+
if (decision.toolCalls && decision.toolCalls.length > 0 && decision.needsToolResults) {
|
|
150
|
+
const updatedContext = { ...context, toolResults };
|
|
151
|
+
const continuedDecision = await this.callAgent(updatedContext, jobId, stepNumber, iteration, consecutiveFailures);
|
|
152
|
+
// Merge continued decision
|
|
153
|
+
decision.commands = continuedDecision.commands || decision.commands;
|
|
154
|
+
decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
|
|
155
|
+
decision.status = continuedDecision.status;
|
|
156
|
+
decision.statusReasoning = continuedDecision.statusReasoning;
|
|
157
|
+
decision.reasoning = continuedDecision.reasoning;
|
|
158
|
+
}
|
|
113
159
|
// Execute commands sequentially
|
|
114
160
|
let iterationHadFailure = false;
|
|
115
161
|
// Handle blocker if detected (clear blocker FIRST, then proceed with main commands)
|
|
116
162
|
if (decision.blockerDetected && decision.blockerDetected.clearingCommands && decision.blockerDetected.clearingCommands.length > 0) {
|
|
117
163
|
this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
|
|
118
164
|
this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
|
|
119
|
-
const
|
|
165
|
+
const urlBeforeBlockerClear = page.url();
|
|
166
|
+
const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, iteration, jobId, urlBeforeBlockerClear, decision.screenState);
|
|
120
167
|
// Add blocker commands with comment to output
|
|
121
168
|
if (blockerResult.executed.length > 0) {
|
|
122
169
|
commandsExecuted.push(`// Blocker: ${decision.blockerDetected.description}`);
|
|
@@ -131,7 +178,8 @@ class OrchestratorAgent {
|
|
|
131
178
|
}
|
|
132
179
|
// Execute main commands (only if no blocker failure)
|
|
133
180
|
if (!iterationHadFailure && decision.commands && decision.commands.length > 0) {
|
|
134
|
-
const
|
|
181
|
+
const urlBeforeCommands = page.url();
|
|
182
|
+
const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, iteration, jobId, urlBeforeCommands, decision.screenState);
|
|
135
183
|
commandsExecuted.push(...executeResult.executed);
|
|
136
184
|
// Track failures
|
|
137
185
|
if (!executeResult.allSucceeded) {
|
|
@@ -156,7 +204,8 @@ class OrchestratorAgent {
|
|
|
156
204
|
this.logger?.(`[Orchestrator] Generated commands:`);
|
|
157
205
|
coordCommands.forEach(cmd => this.logger?.(` ${cmd}`));
|
|
158
206
|
// Execute coordinate commands
|
|
159
|
-
const
|
|
207
|
+
const urlBeforeCoord = page.url();
|
|
208
|
+
const coordResult = await this.executeCommands(coordCommands, page, memory, stepNumber, iteration, jobId, urlBeforeCoord, decision.screenState);
|
|
160
209
|
commandsExecuted.push(...coordResult.executed);
|
|
161
210
|
if (!coordResult.allSucceeded) {
|
|
162
211
|
this.logger?.(`[Orchestrator] ❌ Coordinate action failed (Playwright error)`);
|
|
@@ -171,6 +220,7 @@ class OrchestratorAgent {
|
|
|
171
220
|
iterations: iteration,
|
|
172
221
|
terminationReason: 'agent_stuck',
|
|
173
222
|
memory,
|
|
223
|
+
siteLearnings,
|
|
174
224
|
error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
|
|
175
225
|
};
|
|
176
226
|
}
|
|
@@ -240,6 +290,7 @@ class OrchestratorAgent {
|
|
|
240
290
|
iterations: iteration,
|
|
241
291
|
terminationReason: 'agent_stuck',
|
|
242
292
|
memory,
|
|
293
|
+
siteLearnings,
|
|
243
294
|
error: `Coordinate actions clicked but didn't achieve goal: ${reasoning}`
|
|
244
295
|
};
|
|
245
296
|
}
|
|
@@ -265,6 +316,7 @@ class OrchestratorAgent {
|
|
|
265
316
|
iterations: iteration,
|
|
266
317
|
terminationReason: 'agent_stuck',
|
|
267
318
|
memory,
|
|
319
|
+
siteLearnings,
|
|
268
320
|
error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
|
|
269
321
|
};
|
|
270
322
|
}
|
|
@@ -280,25 +332,39 @@ class OrchestratorAgent {
|
|
|
280
332
|
iterations: iteration,
|
|
281
333
|
terminationReason: 'agent_stuck',
|
|
282
334
|
memory,
|
|
335
|
+
siteLearnings,
|
|
283
336
|
error: `Failed ${consecutiveFailures} iterations in a row - unable to proceed`
|
|
284
337
|
};
|
|
285
338
|
}
|
|
286
|
-
//
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
339
|
+
// Auto-track visited screen (even without explicit learnings)
|
|
340
|
+
// Filter out transient screens and loading states
|
|
341
|
+
if (decision.screenState) {
|
|
342
|
+
const { screen, state } = decision.screenState;
|
|
343
|
+
// Skip about:blank and loading states (transient, not worth persisting)
|
|
344
|
+
const isTransientScreen = screen === 'about:blank' ||
|
|
345
|
+
screen.toLowerCase().includes('blank');
|
|
346
|
+
const isLoadingState = state.toLowerCase().includes('loading') ||
|
|
347
|
+
state.toLowerCase().includes('spinner') ||
|
|
348
|
+
state.toLowerCase().includes('initializing');
|
|
349
|
+
if (!isTransientScreen && !isLoadingState) {
|
|
350
|
+
if (!siteLearnings.screens[screen]) {
|
|
351
|
+
siteLearnings.screens[screen] = { states: {} };
|
|
352
|
+
this.logger?.(`[📍 Auto-tracked] Screen: ${screen}`);
|
|
353
|
+
}
|
|
354
|
+
if (!siteLearnings.screens[screen].states[state]) {
|
|
355
|
+
siteLearnings.screens[screen].states[state] = { observations: {} };
|
|
356
|
+
this.logger?.(`[📍 Auto-tracked] State: ${screen}[${state}]`);
|
|
295
357
|
}
|
|
296
358
|
}
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
|
|
359
|
+
else {
|
|
360
|
+
this.logger?.(`[⏭️ Skipped] Transient screen/state: ${screen}[${state}]`);
|
|
300
361
|
}
|
|
301
362
|
}
|
|
363
|
+
// Update site learnings
|
|
364
|
+
if (decision.siteLearningsUpdate) {
|
|
365
|
+
this.logger?.(`[🔍 DEBUG] siteLearningsUpdate from LLM:\n${JSON.stringify(decision.siteLearningsUpdate, null, 2)}`);
|
|
366
|
+
(0, site_learnings_utils_1.mergeSiteLearnings)(siteLearnings, decision.siteLearningsUpdate, this.logger);
|
|
367
|
+
}
|
|
302
368
|
// Store note to future self (tactical memory across iterations AND steps)
|
|
303
369
|
if (decision.noteToFutureSelf) {
|
|
304
370
|
noteToSelf = {
|
|
@@ -327,7 +393,8 @@ class OrchestratorAgent {
|
|
|
327
393
|
terminationReason: decision.status === 'complete' ? 'complete' :
|
|
328
394
|
decision.status === 'stuck' ? 'agent_stuck' :
|
|
329
395
|
'infeasible',
|
|
330
|
-
memory
|
|
396
|
+
memory,
|
|
397
|
+
siteLearnings
|
|
331
398
|
};
|
|
332
399
|
}
|
|
333
400
|
}
|
|
@@ -340,21 +407,58 @@ class OrchestratorAgent {
|
|
|
340
407
|
iterations: iteration,
|
|
341
408
|
terminationReason: 'system_limit',
|
|
342
409
|
memory,
|
|
410
|
+
siteLearnings,
|
|
343
411
|
error: 'Maximum iterations reached'
|
|
344
412
|
};
|
|
345
413
|
}
|
|
346
414
|
/**
|
|
347
415
|
* Build context for agent
|
|
348
416
|
*/
|
|
349
|
-
async buildAgentContext(page, currentStepGoal, stepNumber, totalSteps, scenarioSteps, memory,
|
|
417
|
+
async buildAgentContext(page, currentStepGoal, stepNumber, totalSteps, scenarioSteps, memory, siteLearnings, // Site learnings (persistent across journeys)
|
|
418
|
+
consecutiveFailures, priorSteps, // For repair mode: prior completed steps
|
|
350
419
|
nextSteps, // For repair mode: next steps
|
|
351
420
|
successfulCommandsInStep, // For repair mode: successful commands in THIS step
|
|
352
421
|
failingCommand, // For repair mode: the failing command
|
|
353
422
|
remainingCommandsInStep // For repair mode: remaining commands in THIS step
|
|
354
423
|
) {
|
|
355
|
-
// Get fresh DOM
|
|
424
|
+
// Get fresh DOM (for title only, not displayed in prompts - SoM mode uses visual markers)
|
|
356
425
|
const currentPageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(page);
|
|
357
426
|
const currentURL = page.url();
|
|
427
|
+
// Get page dimensions for scroll vs screenshot decisions
|
|
428
|
+
// IMPORTANT: Wait for page to stabilize and retry until dimensions stop changing (fixes lazy-loaded/dynamic content)
|
|
429
|
+
try {
|
|
430
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 10000 });
|
|
431
|
+
}
|
|
432
|
+
catch (e) {
|
|
433
|
+
// Already loaded, continue
|
|
434
|
+
}
|
|
435
|
+
// Retry approach: Measure scrollHeight multiple times until it stabilizes
|
|
436
|
+
// This handles React/Vue/Angular apps that expand the DOM after initial render
|
|
437
|
+
// Check MULTIPLE sources and use the maximum (handles edge cases like overflow:hidden)
|
|
438
|
+
const measureHeight = `Math.max(
|
|
439
|
+
document.documentElement.scrollHeight || 0,
|
|
440
|
+
document.body.scrollHeight || 0,
|
|
441
|
+
document.documentElement.offsetHeight || 0,
|
|
442
|
+
document.body.offsetHeight || 0
|
|
443
|
+
)`;
|
|
444
|
+
let pageHeight = await page.evaluate(measureHeight).catch(() => 0);
|
|
445
|
+
let previousHeight = 0;
|
|
446
|
+
let attempts = 0;
|
|
447
|
+
while (pageHeight !== previousHeight && attempts < 5) {
|
|
448
|
+
previousHeight = pageHeight;
|
|
449
|
+
await page.waitForTimeout(200); // Wait for potential expansion
|
|
450
|
+
pageHeight = await page.evaluate(measureHeight).catch(() => 0);
|
|
451
|
+
attempts++;
|
|
452
|
+
}
|
|
453
|
+
const viewport = page.viewportSize();
|
|
454
|
+
// @ts-expect-error - document is available in browser context during page.evaluate()
|
|
455
|
+
const pageWidth = await page.evaluate(() => document.documentElement.scrollWidth).catch(() => 0);
|
|
456
|
+
// @ts-expect-error - window is available in browser context during page.evaluate()
|
|
457
|
+
const scrollX = await page.evaluate(() => window.scrollX || window.pageXOffset).catch(() => 0);
|
|
458
|
+
// @ts-expect-error - window is available in browser context during page.evaluate()
|
|
459
|
+
const scrollY = await page.evaluate(() => window.scrollY || window.pageYOffset).catch(() => 0);
|
|
460
|
+
const pageDimensions = { width: pageWidth, height: pageHeight };
|
|
461
|
+
this.logger?.(`[Orchestrator] Page dimensions: ${pageWidth}x${pageHeight}px (viewport: ${viewport?.width}x${viewport?.height}px) - stabilized after ${attempts} checks`, 'log');
|
|
358
462
|
// Get recent steps
|
|
359
463
|
const recentSteps = memory.history.slice(-this.config.recentStepsCount);
|
|
360
464
|
// SoM integration: Update markers and capture screenshot with visual IDs
|
|
@@ -375,10 +479,14 @@ class OrchestratorAgent {
|
|
|
375
479
|
catch (error) {
|
|
376
480
|
// Page already loaded or timeout - continue
|
|
377
481
|
}
|
|
378
|
-
// Update SoM markers
|
|
379
|
-
await this.somHandler.updateSom();
|
|
380
|
-
//
|
|
381
|
-
|
|
482
|
+
// Update SoM markers - include offscreen elements for full-page screenshots
|
|
483
|
+
await this.somHandler.updateSom(true);
|
|
484
|
+
// TEMPORARY: Always use full-page screenshot for debugging
|
|
485
|
+
// TODO: Re-enable heuristic once we verify full-page works correctly
|
|
486
|
+
const useFullPageSom = true;
|
|
487
|
+
this.logger?.(`[Orchestrator] SoM screenshot strategy: FULL PAGE (ALWAYS) - page: ${pageWidth}x${pageHeight}px, viewport: ${viewport?.width}x${viewport?.height}px`, 'log');
|
|
488
|
+
// Get screenshot WITH markers
|
|
489
|
+
somScreenshot = await this.somHandler.getScreenshot(true, useFullPageSom, 60);
|
|
382
490
|
// Get element map for disambiguation
|
|
383
491
|
somElementMap = this.somHandler.getSomElementMap();
|
|
384
492
|
this.logger?.(`[Orchestrator] SoM screenshot captured for agent decision-making`, 'log');
|
|
@@ -395,12 +503,16 @@ class OrchestratorAgent {
|
|
|
395
503
|
totalSteps,
|
|
396
504
|
completedSteps: scenarioSteps.slice(0, stepNumber - 1),
|
|
397
505
|
remainingSteps: scenarioSteps.slice(stepNumber),
|
|
398
|
-
currentPageInfo,
|
|
399
506
|
currentURL,
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
507
|
+
currentPageTitle: currentPageInfo.title,
|
|
508
|
+
viewportWidth: viewport?.width,
|
|
509
|
+
viewportHeight: viewport?.height,
|
|
510
|
+
pageWidth: pageDimensions.width,
|
|
511
|
+
pageHeight: pageDimensions.height,
|
|
512
|
+
scrollX,
|
|
513
|
+
scrollY,
|
|
514
|
+
journeyMemory: memory, // Journey-specific memory (includes history, extractedData, latestNote)
|
|
515
|
+
siteLearnings, // Site-level learnings (persistent across journeys)
|
|
404
516
|
somScreenshot, // SoM screenshot with visual markers (current)
|
|
405
517
|
somElementMap, // SoM element details for disambiguation
|
|
406
518
|
priorSteps, // Repair: prior completed steps
|
|
@@ -419,26 +531,10 @@ class OrchestratorAgent {
|
|
|
419
531
|
* Call agent to make decision
|
|
420
532
|
*/
|
|
421
533
|
async callAgent(context, jobId, stepNumber, iteration, consecutiveFailures) {
|
|
422
|
-
//
|
|
423
|
-
// Phase 1: Only 2 tiers (selectors → coordinates), so activate after 3 failures
|
|
424
|
-
// Phase 2: Will have 3 tiers (selectors → index → coordinates), threshold will be 5
|
|
425
|
-
const useCoordinateMode = consecutiveFailures !== undefined && consecutiveFailures >= 3;
|
|
426
|
-
// Build appropriate system prompt based on mode
|
|
534
|
+
// Build SoM system prompt (standard mode)
|
|
427
535
|
const toolDescriptions = this.toolRegistry.generateToolDescriptions();
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
// SoM mode: Use visual element identification
|
|
431
|
-
systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates);
|
|
432
|
-
}
|
|
433
|
-
else if (useCoordinateMode) {
|
|
434
|
-
// Coordinate mode: Fallback when selectors fail
|
|
435
|
-
systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildCoordinateSystemPrompt();
|
|
436
|
-
}
|
|
437
|
-
else {
|
|
438
|
-
// Standard mode: DOM-based selectors
|
|
439
|
-
systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSystemPrompt(toolDescriptions, this.config.enableCoordinateMode);
|
|
440
|
-
}
|
|
441
|
-
const userPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures, this.config.enableCoordinateMode);
|
|
536
|
+
const systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates, toolDescriptions);
|
|
537
|
+
const userPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures);
|
|
442
538
|
// Log prompt lengths for monitoring
|
|
443
539
|
const systemLength = systemPrompt.length;
|
|
444
540
|
const userLength = userPrompt.length;
|
|
@@ -458,22 +554,32 @@ class OrchestratorAgent {
|
|
|
458
554
|
this.logger?.(`[Orchestrator] Including SoM screenshot in LLM request`, 'log');
|
|
459
555
|
}
|
|
460
556
|
const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
|
|
461
|
-
//
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
557
|
+
// Track token usage and images
|
|
558
|
+
const includesImage = !!context.somScreenshot;
|
|
559
|
+
if (response.usage) {
|
|
560
|
+
// Accumulate in debug stats
|
|
561
|
+
this.debugStats.tokensUsedIn += response.usage.inputTokens;
|
|
562
|
+
this.debugStats.tokensUsedOut += response.usage.outputTokens;
|
|
563
|
+
if (includesImage) {
|
|
564
|
+
this.debugStats.imagesUsed++;
|
|
565
|
+
}
|
|
566
|
+
// Report to progress reporter
|
|
567
|
+
if (this.progressReporter?.onTokensUsed) {
|
|
568
|
+
const tokenUsage = {
|
|
569
|
+
jobId,
|
|
570
|
+
stepNumber,
|
|
571
|
+
iteration,
|
|
572
|
+
inputTokens: response.usage.inputTokens,
|
|
573
|
+
outputTokens: response.usage.outputTokens,
|
|
574
|
+
includesImage,
|
|
575
|
+
model: model_constants_1.DEFAULT_MODEL,
|
|
576
|
+
timestamp: Date.now()
|
|
577
|
+
};
|
|
578
|
+
this.logger?.(`[Orchestrator] 💰 Reporting token usage: ${tokenUsage.inputTokens} + ${tokenUsage.outputTokens}`, 'log');
|
|
579
|
+
await this.progressReporter.onTokensUsed(tokenUsage);
|
|
580
|
+
}
|
|
475
581
|
}
|
|
476
|
-
else
|
|
582
|
+
else {
|
|
477
583
|
this.logger?.(`[Orchestrator] ⚠ No usage data in LLM response`, 'warn');
|
|
478
584
|
}
|
|
479
585
|
// Parse response
|
|
@@ -492,7 +598,7 @@ class OrchestratorAgent {
|
|
|
492
598
|
/**
|
|
493
599
|
* Execute tools
|
|
494
600
|
*/
|
|
495
|
-
async executeTools(toolCalls, page, memory, stepNumber
|
|
601
|
+
async executeTools(toolCalls, page, memory, stepNumber) {
|
|
496
602
|
this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
|
|
497
603
|
const results = {};
|
|
498
604
|
const toolContext = {
|
|
@@ -500,12 +606,23 @@ class OrchestratorAgent {
|
|
|
500
606
|
memory,
|
|
501
607
|
stepNumber,
|
|
502
608
|
logger: this.logger,
|
|
503
|
-
refMap, // Pass refMap for interact_with_ref tool
|
|
504
609
|
previousSomScreenshot: this.previousSomScreenshot, // For view_previous_screenshot tool
|
|
505
610
|
somHandler: this.somHandler // For refresh_som_markers tool
|
|
506
611
|
};
|
|
507
612
|
for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
|
|
508
|
-
|
|
613
|
+
// Log full parameters for debugging (especially for take_screenshot to see if isFullPage is set)
|
|
614
|
+
this.logger?.(`[Orchestrator] ▶ ${toolCall.name}(${JSON.stringify(toolCall.params)})`);
|
|
615
|
+
// Track tool usage in debug stats
|
|
616
|
+
if (!this.debugStats.toolsUsed[toolCall.name]) {
|
|
617
|
+
this.debugStats.toolsUsed[toolCall.name] = {
|
|
618
|
+
count: 0,
|
|
619
|
+
averageUsefulnessScore: 0,
|
|
620
|
+
numTimesRated: 0
|
|
621
|
+
};
|
|
622
|
+
this.logger?.(`[DebugStats] 📊 Tracking new tool: ${toolCall.name}`);
|
|
623
|
+
}
|
|
624
|
+
this.debugStats.toolsUsed[toolCall.name].count++;
|
|
625
|
+
this.logger?.(`[DebugStats] 📊 Tool '${toolCall.name}' used (count: ${this.debugStats.toolsUsed[toolCall.name].count})`);
|
|
509
626
|
const result = await this.toolRegistry.execute(toolCall, toolContext);
|
|
510
627
|
results[toolCall.name] = result;
|
|
511
628
|
if (result.success) {
|
|
@@ -549,7 +666,9 @@ class OrchestratorAgent {
|
|
|
549
666
|
/**
|
|
550
667
|
* Execute commands (mix of ref and playwright commands)
|
|
551
668
|
*/
|
|
552
|
-
async executeCommands(commands, page, memory, stepNumber, iteration, jobId
|
|
669
|
+
async executeCommands(commands, page, memory, stepNumber, iteration, jobId, urlBeforeAction, // URL before commands execute
|
|
670
|
+
screenState // Screen state for memory
|
|
671
|
+
) {
|
|
553
672
|
this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)`);
|
|
554
673
|
const executed = [];
|
|
555
674
|
if (commands.length === 0) {
|
|
@@ -560,6 +679,11 @@ class OrchestratorAgent {
|
|
|
560
679
|
this.logger?.(`[Orchestrator] Using SoM mode for command execution`, 'log');
|
|
561
680
|
for (let i = 0; i < commands.length; i++) {
|
|
562
681
|
const cmd = commands[i];
|
|
682
|
+
// Skip if plain string (should not happen in SoM mode, but handle gracefully)
|
|
683
|
+
if (typeof cmd === 'string') {
|
|
684
|
+
this.logger?.(`[Orchestrator] ⚠️ Skipping plain string command in SoM mode: "${cmd}"`, 'warn');
|
|
685
|
+
continue;
|
|
686
|
+
}
|
|
563
687
|
// Check if verification or action command
|
|
564
688
|
if ((0, som_types_1.isSomVerification)(cmd)) {
|
|
565
689
|
// Handle verification command
|
|
@@ -580,6 +704,8 @@ class OrchestratorAgent {
|
|
|
580
704
|
result: 'success',
|
|
581
705
|
observation: `Verified: ${cmd.description || cmd.expected}`,
|
|
582
706
|
url: page.url(),
|
|
707
|
+
previousUrl: urlBeforeAction,
|
|
708
|
+
screenState,
|
|
583
709
|
timestamp: Date.now()
|
|
584
710
|
});
|
|
585
711
|
}
|
|
@@ -594,6 +720,8 @@ class OrchestratorAgent {
|
|
|
594
720
|
observation: `Failed: ${result.error}`,
|
|
595
721
|
error: result.error,
|
|
596
722
|
url: page.url(),
|
|
723
|
+
previousUrl: urlBeforeAction,
|
|
724
|
+
screenState,
|
|
597
725
|
timestamp: Date.now()
|
|
598
726
|
});
|
|
599
727
|
// Continue anyway - verification failures are non-blocking for script generation
|
|
@@ -624,6 +752,8 @@ class OrchestratorAgent {
|
|
|
624
752
|
result: 'success',
|
|
625
753
|
observation: 'Executed successfully',
|
|
626
754
|
url: page.url(),
|
|
755
|
+
previousUrl: urlBeforeAction,
|
|
756
|
+
screenState,
|
|
627
757
|
timestamp: Date.now()
|
|
628
758
|
});
|
|
629
759
|
// Small delay for form validation/animations
|
|
@@ -644,12 +774,14 @@ class OrchestratorAgent {
|
|
|
644
774
|
observation: `Failed: ${result.error}`,
|
|
645
775
|
error: result.error,
|
|
646
776
|
url: page.url(),
|
|
777
|
+
previousUrl: urlBeforeAction,
|
|
778
|
+
screenState,
|
|
647
779
|
timestamp: Date.now()
|
|
648
780
|
});
|
|
649
781
|
// Refresh SoM after batch (DOM may have changed)
|
|
650
782
|
if (this.somHandler && page) {
|
|
651
783
|
this.somHandler.setPage(page);
|
|
652
|
-
await this.somHandler.updateSom();
|
|
784
|
+
await this.somHandler.updateSom(true);
|
|
653
785
|
}
|
|
654
786
|
return { executed, allSucceeded: false };
|
|
655
787
|
}
|
|
@@ -665,12 +797,14 @@ class OrchestratorAgent {
|
|
|
665
797
|
observation: `Exception: ${error.message}`,
|
|
666
798
|
error: error.message,
|
|
667
799
|
url: page.url(),
|
|
800
|
+
previousUrl: urlBeforeAction,
|
|
801
|
+
screenState,
|
|
668
802
|
timestamp: Date.now()
|
|
669
803
|
});
|
|
670
804
|
// Refresh SoM after batch (DOM may have changed)
|
|
671
805
|
if (this.somHandler && page) {
|
|
672
806
|
this.somHandler.setPage(page);
|
|
673
|
-
await this.somHandler.updateSom();
|
|
807
|
+
await this.somHandler.updateSom(true);
|
|
674
808
|
}
|
|
675
809
|
return { executed, allSucceeded: false };
|
|
676
810
|
}
|
|
@@ -679,19 +813,33 @@ class OrchestratorAgent {
|
|
|
679
813
|
this.logger?.(`[Orchestrator] ⚠ [${i + 1}/${commands.length}] Not a valid SoM command/verification, skipping`, 'warn');
|
|
680
814
|
}
|
|
681
815
|
}
|
|
816
|
+
// Action-aware stabilization: Detect if commands likely triggered navigation
|
|
817
|
+
const isNavigationAction = (0, page_loading_utils_1.detectNavigationAction)(commands, executed);
|
|
682
818
|
// Always wait for page to stabilize after command batch
|
|
683
819
|
// This handles both explicit navigation AND clicks that trigger navigation/SPA routes
|
|
684
820
|
try {
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
821
|
+
if (isNavigationAction) {
|
|
822
|
+
this.logger?.(`[Orchestrator] Detected navigation action - using extended wait...`, 'log');
|
|
823
|
+
// Extended wait for form submissions and navigation clicks
|
|
824
|
+
await page.waitForLoadState('networkidle', { timeout: 15000 }); // Longer timeout for slow SPAs
|
|
825
|
+
await page.waitForTimeout(1000); // Initial buffer for SPA rendering
|
|
826
|
+
this.logger?.(`[Orchestrator] Page stabilized after navigation (networkidle + 1s buffer)`, 'log');
|
|
827
|
+
// Smart loading detection: Check if page still shows loading indicators
|
|
828
|
+
await (0, page_loading_utils_1.waitForLoadingToComplete)(page, this.logger);
|
|
829
|
+
}
|
|
830
|
+
else {
|
|
831
|
+
this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
|
|
832
|
+
// Use networkidle with short timeout for standard interactions
|
|
833
|
+
await page.waitForLoadState('networkidle', { timeout: 3000 });
|
|
834
|
+
this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
|
|
835
|
+
}
|
|
689
836
|
}
|
|
690
837
|
catch (error) {
|
|
691
|
-
// If networkidle times out, fall back to domcontentloaded
|
|
838
|
+
// If networkidle times out, fall back to domcontentloaded + buffer
|
|
692
839
|
try {
|
|
693
840
|
await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
|
|
694
|
-
|
|
841
|
+
await page.waitForTimeout(1000);
|
|
842
|
+
this.logger?.(`[Orchestrator] Page loaded (domcontentloaded + buffer)`, 'log');
|
|
695
843
|
}
|
|
696
844
|
catch (error2) {
|
|
697
845
|
this.logger?.(`[Orchestrator] Page load wait timeout (continuing anyway)`, 'warn');
|
|
@@ -700,7 +848,7 @@ class OrchestratorAgent {
|
|
|
700
848
|
// Refresh SoM after batch (DOM may have changed and page is now stable)
|
|
701
849
|
if (this.somHandler && page) {
|
|
702
850
|
this.somHandler.setPage(page);
|
|
703
|
-
await this.somHandler.updateSom();
|
|
851
|
+
await this.somHandler.updateSom(true);
|
|
704
852
|
}
|
|
705
853
|
return { executed, allSucceeded: true };
|
|
706
854
|
}
|
|
@@ -733,6 +881,8 @@ try {
|
|
|
733
881
|
result: 'success',
|
|
734
882
|
observation: 'Executed successfully',
|
|
735
883
|
url: page.url(),
|
|
884
|
+
previousUrl: urlBeforeAction,
|
|
885
|
+
screenState,
|
|
736
886
|
timestamp: Date.now()
|
|
737
887
|
});
|
|
738
888
|
executed.push(cmd);
|
|
@@ -755,6 +905,8 @@ try {
|
|
|
755
905
|
observation: `Failed: ${errorMessage}`,
|
|
756
906
|
error: errorMessage,
|
|
757
907
|
url: page.url(),
|
|
908
|
+
previousUrl: urlBeforeAction,
|
|
909
|
+
screenState,
|
|
758
910
|
timestamp: Date.now()
|
|
759
911
|
});
|
|
760
912
|
return { executed, allSucceeded: false };
|
|
@@ -766,6 +918,17 @@ try {
|
|
|
766
918
|
async reportStepProgress(jobId, stepNumber, description, decision, iteration) {
|
|
767
919
|
if (!this.progressReporter?.onStepProgress)
|
|
768
920
|
return;
|
|
921
|
+
// Convert commands to strings (handle both string[] and SomCommand[])
|
|
922
|
+
const commandStrings = decision.commands?.map(cmd => {
|
|
923
|
+
if (typeof cmd === 'string') {
|
|
924
|
+
return cmd;
|
|
925
|
+
}
|
|
926
|
+
else if (cmd && typeof cmd === 'object') {
|
|
927
|
+
// SomCommand object - convert to readable string
|
|
928
|
+
return JSON.stringify(cmd);
|
|
929
|
+
}
|
|
930
|
+
return String(cmd);
|
|
931
|
+
}) || [];
|
|
769
932
|
await this.progressReporter.onStepProgress({
|
|
770
933
|
jobId,
|
|
771
934
|
stepNumber,
|
|
@@ -773,12 +936,11 @@ try {
|
|
|
773
936
|
status: decision.status === 'complete' ? progress_reporter_1.StepExecutionStatus.SUCCESS :
|
|
774
937
|
decision.status === 'stuck' || decision.status === 'infeasible' ? progress_reporter_1.StepExecutionStatus.FAILURE :
|
|
775
938
|
progress_reporter_1.StepExecutionStatus.IN_PROGRESS,
|
|
776
|
-
code:
|
|
939
|
+
code: commandStrings.join('\n'),
|
|
777
940
|
// Include agent metadata for transparency
|
|
778
941
|
agentIteration: iteration,
|
|
779
942
|
agentReasoning: decision.reasoning,
|
|
780
943
|
agentSelfReflection: decision.selfReflection,
|
|
781
|
-
agentExperiences: decision.experiences,
|
|
782
944
|
agentToolsUsed: decision.toolCalls?.map(t => t.name),
|
|
783
945
|
agentStatus: decision.status
|
|
784
946
|
});
|
|
@@ -787,17 +949,25 @@ try {
|
|
|
787
949
|
* Execute exploration mode - agent autonomously explores to achieve journey goal
|
|
788
950
|
* Fires onStepProgress callbacks for each autonomous action (transparent to caller)
|
|
789
951
|
*/
|
|
790
|
-
async executeExploration(page, explorationConfig, jobId) {
|
|
952
|
+
async executeExploration(page, explorationConfig, jobId, existingSiteLearnings) {
|
|
791
953
|
this.logger?.(`\n[Orchestrator] ========== EXPLORATION MODE ==========`);
|
|
954
|
+
this.logger?.(`[Orchestrator] 🚀 runner-core v${package_json_1.version}`);
|
|
792
955
|
this.logger?.(`[Orchestrator] 🎯 Journey Goal: ${explorationConfig.explorationPrompt}`);
|
|
793
956
|
if (explorationConfig.testDataPrompt) {
|
|
794
957
|
this.logger?.(`[Orchestrator] 📋 Test Data: ${explorationConfig.testDataPrompt}`);
|
|
795
958
|
}
|
|
959
|
+
// Journey memory (temporal)
|
|
796
960
|
const memory = {
|
|
797
961
|
history: [],
|
|
798
|
-
experiences: [],
|
|
799
962
|
extractedData: {}
|
|
800
963
|
};
|
|
964
|
+
// Site learnings (persistent across journeys) - initialize with existing or empty
|
|
965
|
+
const siteLearnings = existingSiteLearnings || { screens: {}, uxPatterns: {} };
|
|
966
|
+
if (existingSiteLearnings) {
|
|
967
|
+
const screenCount = Object.keys(existingSiteLearnings.screens).length;
|
|
968
|
+
const patternCount = Object.keys(existingSiteLearnings.uxPatterns).length;
|
|
969
|
+
this.logger?.(`[Orchestrator] 📚 Loaded existing learnings: ${screenCount} screens, ${patternCount} UX patterns`);
|
|
970
|
+
}
|
|
801
971
|
const maxSteps = explorationConfig.maxExplorationSteps || 50;
|
|
802
972
|
let stepNumber = 0;
|
|
803
973
|
const commandsExecuted = [];
|
|
@@ -805,7 +975,7 @@ try {
|
|
|
805
975
|
stepNumber++;
|
|
806
976
|
this.logger?.(`\n[Orchestrator] === Exploration Step ${stepNumber}/${maxSteps} ===`);
|
|
807
977
|
// Build exploratory context
|
|
808
|
-
const context = await this.buildExploratoryContext(page, explorationConfig.explorationPrompt, explorationConfig.testDataPrompt, memory, stepNumber, maxSteps);
|
|
978
|
+
const context = await this.buildExploratoryContext(page, explorationConfig.explorationPrompt, explorationConfig.testDataPrompt, memory, siteLearnings, stepNumber, maxSteps);
|
|
809
979
|
// Call agent with exploratory prompt
|
|
810
980
|
const decision = await this.callExploratoryAgent(context, jobId, stepNumber);
|
|
811
981
|
this.decisionParser.log(decision, stepNumber);
|
|
@@ -837,37 +1007,72 @@ try {
|
|
|
837
1007
|
// Handle blocker clearing
|
|
838
1008
|
if (decision.blockerDetected && decision.blockerDetected.clearingCommands) {
|
|
839
1009
|
this.logger?.(`[Orchestrator] 🚧 Clearing blocker: ${decision.blockerDetected.description}`);
|
|
840
|
-
const
|
|
1010
|
+
const urlBeforeBlocker = page.url();
|
|
1011
|
+
const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, 1, jobId, urlBeforeBlocker, decision.screenState);
|
|
841
1012
|
commandsExecuted.push(...blockerResult.executed);
|
|
842
1013
|
}
|
|
843
1014
|
// Execute exploration commands
|
|
844
1015
|
let commandsSucceeded = true;
|
|
845
1016
|
if (decision.commands && decision.commands.length > 0) {
|
|
846
|
-
const
|
|
1017
|
+
const urlBeforeExploration = page.url();
|
|
1018
|
+
const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, 1, jobId, urlBeforeExploration, decision.screenState);
|
|
847
1019
|
commandsExecuted.push(...executeResult.executed);
|
|
848
1020
|
commandsSucceeded = executeResult.allSucceeded;
|
|
849
1021
|
}
|
|
850
1022
|
// Report step completion (fires JourneyRunner's onStepComplete callback)
|
|
851
1023
|
if (this.progressReporter?.onStepProgress) {
|
|
1024
|
+
// Convert commands to strings (handle both string[] and SomCommand[])
|
|
1025
|
+
const commandStrings = decision.commands?.map(cmd => {
|
|
1026
|
+
if (typeof cmd === 'string') {
|
|
1027
|
+
return cmd;
|
|
1028
|
+
}
|
|
1029
|
+
else if (cmd && typeof cmd === 'object') {
|
|
1030
|
+
// SomCommand object - convert to readable string
|
|
1031
|
+
return JSON.stringify(cmd);
|
|
1032
|
+
}
|
|
1033
|
+
return String(cmd);
|
|
1034
|
+
}) || [];
|
|
852
1035
|
const stepInfo = {
|
|
853
1036
|
jobId,
|
|
854
1037
|
stepNumber,
|
|
855
1038
|
stepId: `exploration-${stepNumber}-${Date.now()}`,
|
|
856
1039
|
description: decision.reasoning,
|
|
857
|
-
code:
|
|
1040
|
+
code: commandStrings.join('\n'),
|
|
858
1041
|
status: commandsSucceeded ? progress_reporter_1.StepExecutionStatus.SUCCESS : progress_reporter_1.StepExecutionStatus.FAILURE,
|
|
859
1042
|
error: commandsSucceeded ? undefined : 'Command execution failed',
|
|
860
1043
|
wasRepaired: false
|
|
861
1044
|
};
|
|
862
1045
|
await this.progressReporter.onStepProgress(stepInfo);
|
|
863
1046
|
}
|
|
864
|
-
//
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
1047
|
+
// Auto-track visited screen (even without explicit learnings)
|
|
1048
|
+
// Filter out transient screens and loading states
|
|
1049
|
+
if (decision.screenState) {
|
|
1050
|
+
const { screen, state } = decision.screenState;
|
|
1051
|
+
// Skip about:blank and loading states (transient, not worth persisting)
|
|
1052
|
+
const isTransientScreen = screen === 'about:blank' ||
|
|
1053
|
+
screen.toLowerCase().includes('blank');
|
|
1054
|
+
const isLoadingState = state.toLowerCase().includes('loading') ||
|
|
1055
|
+
state.toLowerCase().includes('spinner') ||
|
|
1056
|
+
state.toLowerCase().includes('initializing');
|
|
1057
|
+
if (!isTransientScreen && !isLoadingState) {
|
|
1058
|
+
if (!siteLearnings.screens[screen]) {
|
|
1059
|
+
siteLearnings.screens[screen] = { states: {} };
|
|
1060
|
+
this.logger?.(`[📍 Auto-tracked] Screen: ${screen}`);
|
|
1061
|
+
}
|
|
1062
|
+
if (!siteLearnings.screens[screen].states[state]) {
|
|
1063
|
+
siteLearnings.screens[screen].states[state] = { observations: {} };
|
|
1064
|
+
this.logger?.(`[📍 Auto-tracked] State: ${screen}[${state}]`);
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
else {
|
|
1068
|
+
this.logger?.(`[⏭️ Skipped] Transient screen/state: ${screen}[${state}]`);
|
|
869
1069
|
}
|
|
870
1070
|
}
|
|
1071
|
+
// Update site learnings
|
|
1072
|
+
if (decision.siteLearningsUpdate) {
|
|
1073
|
+
this.logger?.(`[🔍 DEBUG] siteLearningsUpdate from LLM:\n${JSON.stringify(decision.siteLearningsUpdate, null, 2)}`);
|
|
1074
|
+
(0, site_learnings_utils_1.mergeSiteLearnings)(siteLearnings, decision.siteLearningsUpdate, this.logger);
|
|
1075
|
+
}
|
|
871
1076
|
// Store note for next iteration
|
|
872
1077
|
if (decision.noteToFutureSelf) {
|
|
873
1078
|
memory.latestNote = {
|
|
@@ -883,7 +1088,8 @@ try {
|
|
|
883
1088
|
commands: commandsExecuted,
|
|
884
1089
|
iterations: stepNumber,
|
|
885
1090
|
terminationReason: 'complete',
|
|
886
|
-
memory
|
|
1091
|
+
memory,
|
|
1092
|
+
siteLearnings
|
|
887
1093
|
};
|
|
888
1094
|
}
|
|
889
1095
|
else if (decision.status === 'stuck') {
|
|
@@ -894,6 +1100,7 @@ try {
|
|
|
894
1100
|
iterations: stepNumber,
|
|
895
1101
|
terminationReason: 'agent_stuck',
|
|
896
1102
|
memory,
|
|
1103
|
+
siteLearnings,
|
|
897
1104
|
error: decision.statusReasoning
|
|
898
1105
|
};
|
|
899
1106
|
}
|
|
@@ -905,14 +1112,47 @@ try {
|
|
|
905
1112
|
commands: commandsExecuted,
|
|
906
1113
|
iterations: stepNumber,
|
|
907
1114
|
terminationReason: 'system_limit',
|
|
908
|
-
memory
|
|
1115
|
+
memory,
|
|
1116
|
+
siteLearnings
|
|
909
1117
|
};
|
|
910
1118
|
}
|
|
911
|
-
async buildExploratoryContext(page, explorationPrompt, testDataPrompt, memory, stepNumber, maxSteps) {
|
|
1119
|
+
async buildExploratoryContext(page, explorationPrompt, testDataPrompt, memory, siteLearnings, stepNumber, maxSteps) {
|
|
912
1120
|
// Wait for page to be ready and elements to appear (especially important after navigation)
|
|
913
1121
|
const currentPageInfo = await page_info_retry_1.PageInfoRetry.getWithRetry(page);
|
|
914
1122
|
const currentURL = page.url();
|
|
915
|
-
|
|
1123
|
+
// Get page dimensions for scroll vs screenshot decisions
|
|
1124
|
+
// IMPORTANT: Wait for page to stabilize with retry (fixes lazy-loaded/dynamic content)
|
|
1125
|
+
try {
|
|
1126
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 10000 });
|
|
1127
|
+
}
|
|
1128
|
+
catch (e) {
|
|
1129
|
+
// Already loaded, continue
|
|
1130
|
+
}
|
|
1131
|
+
// Retry approach: Measure scrollHeight multiple times until it stabilizes
|
|
1132
|
+
// Check MULTIPLE sources and use the maximum (handles edge cases like overflow:hidden)
|
|
1133
|
+
const measureHeight = `Math.max(
|
|
1134
|
+
document.documentElement.scrollHeight || 0,
|
|
1135
|
+
document.body.scrollHeight || 0,
|
|
1136
|
+
document.documentElement.offsetHeight || 0,
|
|
1137
|
+
document.body.offsetHeight || 0
|
|
1138
|
+
)`;
|
|
1139
|
+
let pageHeight = await page.evaluate(measureHeight).catch(() => 0);
|
|
1140
|
+
let previousHeight = 0;
|
|
1141
|
+
let attempts = 0;
|
|
1142
|
+
while (pageHeight !== previousHeight && attempts < 5) {
|
|
1143
|
+
previousHeight = pageHeight;
|
|
1144
|
+
await page.waitForTimeout(200); // Wait for potential expansion
|
|
1145
|
+
pageHeight = await page.evaluate(measureHeight).catch(() => 0);
|
|
1146
|
+
attempts++;
|
|
1147
|
+
}
|
|
1148
|
+
const viewport = page.viewportSize();
|
|
1149
|
+
// @ts-expect-error - document is available in browser context during page.evaluate()
|
|
1150
|
+
const pageWidth = await page.evaluate(() => document.documentElement.scrollWidth).catch(() => 0);
|
|
1151
|
+
// @ts-expect-error - window is available in browser context during page.evaluate()
|
|
1152
|
+
const scrollX = await page.evaluate(() => window.scrollX || window.pageXOffset).catch(() => 0);
|
|
1153
|
+
// @ts-expect-error - window is available in browser context during page.evaluate()
|
|
1154
|
+
const scrollY = await page.evaluate(() => window.scrollY || window.pageYOffset).catch(() => 0);
|
|
1155
|
+
this.logger?.(`[Orchestrator] Exploration page dimensions: ${pageWidth}x${pageHeight}px (viewport: ${viewport?.width}x${viewport?.height}px) - stabilized after ${attempts} checks`, 'log');
|
|
916
1156
|
// SoM integration for exploratory mode
|
|
917
1157
|
let somScreenshot = undefined;
|
|
918
1158
|
let somElementMap = undefined;
|
|
@@ -926,9 +1166,11 @@ try {
|
|
|
926
1166
|
catch (error) {
|
|
927
1167
|
// Page already loaded or timeout - continue
|
|
928
1168
|
}
|
|
929
|
-
// Update SoM markers
|
|
930
|
-
await this.somHandler.updateSom();
|
|
931
|
-
|
|
1169
|
+
// Update SoM markers after coordinate action
|
|
1170
|
+
await this.somHandler.updateSom(true);
|
|
1171
|
+
// TEMPORARY: Always use full-page screenshot for debugging
|
|
1172
|
+
const useFullPageSom = true;
|
|
1173
|
+
somScreenshot = await this.somHandler.getScreenshot(true, useFullPageSom, 60);
|
|
932
1174
|
// Get element map for disambiguation
|
|
933
1175
|
somElementMap = this.somHandler.getSomElementMap();
|
|
934
1176
|
this.logger?.(`[Orchestrator] SoM screenshot captured for exploratory agent`, 'log');
|
|
@@ -944,12 +1186,16 @@ try {
|
|
|
944
1186
|
totalSteps: maxSteps,
|
|
945
1187
|
completedSteps: [],
|
|
946
1188
|
remainingSteps: [],
|
|
947
|
-
currentPageInfo,
|
|
948
1189
|
currentURL,
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
1190
|
+
currentPageTitle: currentPageInfo.title,
|
|
1191
|
+
viewportWidth: viewport?.width,
|
|
1192
|
+
viewportHeight: viewport?.height,
|
|
1193
|
+
pageWidth,
|
|
1194
|
+
pageHeight,
|
|
1195
|
+
scrollX,
|
|
1196
|
+
scrollY,
|
|
1197
|
+
journeyMemory: memory, // Journey-specific memory
|
|
1198
|
+
siteLearnings, // Site-level learnings
|
|
953
1199
|
testDataPrompt, // CRITICAL: Store testDataPrompt in context
|
|
954
1200
|
somScreenshot, // SoM screenshot for exploratory mode (current)
|
|
955
1201
|
somElementMap // SoM element details for disambiguation
|
|
@@ -962,10 +1208,8 @@ try {
|
|
|
962
1208
|
}
|
|
963
1209
|
async callExploratoryAgent(context, jobId, stepNumber) {
|
|
964
1210
|
const toolDescriptions = this.toolRegistry.generateToolDescriptions();
|
|
965
|
-
// Use SoM system prompt
|
|
966
|
-
const systemPrompt = this.config.
|
|
967
|
-
? orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates)
|
|
968
|
-
: orchestrator_prompts_1.OrchestratorPrompts.buildExploratorySystemPrompt(toolDescriptions);
|
|
1211
|
+
// Use SoM system prompt (standard mode)
|
|
1212
|
+
const systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates, toolDescriptions);
|
|
969
1213
|
const userPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildExploratoryUserPrompt(context, context.overallGoal, context.testDataPrompt, // Pass testDataPrompt from context
|
|
970
1214
|
stepNumber, context.totalSteps);
|
|
971
1215
|
const llmRequest = {
|
|
@@ -996,6 +1240,20 @@ try {
|
|
|
996
1240
|
const decision = this.decisionParser.parse(response.answer);
|
|
997
1241
|
return decision;
|
|
998
1242
|
}
|
|
1243
|
+
/**
|
|
1244
|
+
* Get accumulated debug statistics
|
|
1245
|
+
*/
|
|
1246
|
+
getDebugStats() {
|
|
1247
|
+
const stats = { ...this.debugStats };
|
|
1248
|
+
// Log summary of collected debug stats
|
|
1249
|
+
this.logger?.(`\n========== DEBUG STATS SUMMARY ==========`);
|
|
1250
|
+
this.logger?.(`Tokens In: ${stats.tokensUsedIn}, Tokens Out: ${stats.tokensUsedOut}`);
|
|
1251
|
+
this.logger?.(`Images Used: ${stats.imagesUsed}`);
|
|
1252
|
+
this.logger?.(`Tools Used: ${Object.keys(stats.toolsUsed).length > 0 ? JSON.stringify(stats.toolsUsed, null, 2) : 'NONE'}`);
|
|
1253
|
+
this.logger?.(`Prompt Suggestions: ${stats.promptImproveSuggestions.length}`);
|
|
1254
|
+
this.logger?.(`=========================================\n`);
|
|
1255
|
+
return stats;
|
|
1256
|
+
}
|
|
999
1257
|
}
|
|
1000
1258
|
exports.OrchestratorAgent = OrchestratorAgent;
|
|
1001
1259
|
//# sourceMappingURL=orchestrator-agent.js.map
|