testchimp-runner-core 0.0.34 → 0.0.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-service.d.ts +1 -4
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +155 -468
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -1
- package/dist/index.js.map +1 -1
- package/dist/orchestrator/decision-parser.d.ts +18 -0
- package/dist/orchestrator/decision-parser.d.ts.map +1 -0
- package/dist/orchestrator/decision-parser.js +127 -0
- package/dist/orchestrator/decision-parser.js.map +1 -0
- package/dist/orchestrator/index.d.ts +4 -2
- package/dist/orchestrator/index.d.ts.map +1 -1
- package/dist/orchestrator/index.js +14 -2
- package/dist/orchestrator/index.js.map +1 -1
- package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +534 -204
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.js +529 -247
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
- package/dist/orchestrator/page-som-handler.d.ts +106 -0
- package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
- package/dist/orchestrator/page-som-handler.js +1353 -0
- package/dist/orchestrator/page-som-handler.js.map +1 -0
- package/dist/orchestrator/som-types.d.ts +149 -0
- package/dist/orchestrator/som-types.d.ts.map +1 -0
- package/dist/orchestrator/som-types.js +87 -0
- package/dist/orchestrator/som-types.js.map +1 -0
- package/dist/orchestrator/tool-registry.d.ts +2 -0
- package/dist/orchestrator/tool-registry.d.ts.map +1 -1
- package/dist/orchestrator/tool-registry.js.map +1 -1
- package/dist/orchestrator/tools/index.d.ts +4 -1
- package/dist/orchestrator/tools/index.d.ts.map +1 -1
- package/dist/orchestrator/tools/index.js +7 -2
- package/dist/orchestrator/tools/index.js.map +1 -1
- package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
- package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
- package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
- package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
- package/dist/orchestrator/types.d.ts +23 -1
- package/dist/orchestrator/types.d.ts.map +1 -1
- package/dist/orchestrator/types.js +11 -1
- package/dist/orchestrator/types.js.map +1 -1
- package/dist/scenario-service.d.ts +5 -0
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +17 -0
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +4 -0
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +18 -3
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/testing/agent-tester.d.ts +35 -0
- package/dist/testing/agent-tester.d.ts.map +1 -0
- package/dist/testing/agent-tester.js +84 -0
- package/dist/testing/agent-tester.js.map +1 -0
- package/dist/testing/ref-translator-tester.d.ts +44 -0
- package/dist/testing/ref-translator-tester.d.ts.map +1 -0
- package/dist/testing/ref-translator-tester.js +104 -0
- package/dist/testing/ref-translator-tester.js.map +1 -0
- package/dist/utils/hierarchical-selector.d.ts +47 -0
- package/dist/utils/hierarchical-selector.d.ts.map +1 -0
- package/dist/utils/hierarchical-selector.js +212 -0
- package/dist/utils/hierarchical-selector.js.map +1 -0
- package/dist/utils/page-info-retry.d.ts +14 -0
- package/dist/utils/page-info-retry.d.ts.map +1 -0
- package/dist/utils/page-info-retry.js +60 -0
- package/dist/utils/page-info-retry.js.map +1 -0
- package/dist/utils/page-info-utils.d.ts +1 -0
- package/dist/utils/page-info-utils.d.ts.map +1 -1
- package/dist/utils/page-info-utils.js +46 -18
- package/dist/utils/page-info-utils.js.map +1 -1
- package/dist/utils/ref-attacher.d.ts +21 -0
- package/dist/utils/ref-attacher.d.ts.map +1 -0
- package/dist/utils/ref-attacher.js +149 -0
- package/dist/utils/ref-attacher.js.map +1 -0
- package/dist/utils/ref-translator.d.ts +49 -0
- package/dist/utils/ref-translator.d.ts.map +1 -0
- package/dist/utils/ref-translator.js +276 -0
- package/dist/utils/ref-translator.js.map +1 -0
- package/package.json +6 -1
- package/RELEASE_0.0.26.md +0 -165
- package/RELEASE_0.0.27.md +0 -236
- package/RELEASE_0.0.28.md +0 -286
- package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
- package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
- package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
- package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
- package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
- package/plandocs/INTEGRATION_COMPLETE.md +0 -322
- package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
- package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
- package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
- package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
- package/plandocs/PHASE_1_COMPLETE.md +0 -165
- package/plandocs/PHASE_1_SUMMARY.md +0 -184
- package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
- package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
- package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
- package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
- package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
- package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
- package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
- package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
- package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
- package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
- package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
- package/src/auth-config.ts +0 -84
- package/src/credit-usage-service.ts +0 -188
- package/src/env-loader.ts +0 -103
- package/src/execution-service.ts +0 -1413
- package/src/file-handler.ts +0 -104
- package/src/index.ts +0 -422
- package/src/llm-facade.ts +0 -821
- package/src/llm-provider.ts +0 -53
- package/src/model-constants.ts +0 -35
- package/src/orchestrator/index.ts +0 -34
- package/src/orchestrator/orchestrator-agent.ts +0 -862
- package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
- package/src/orchestrator/orchestrator-prompts.ts +0 -474
- package/src/orchestrator/tool-registry.ts +0 -182
- package/src/orchestrator/tools/check-page-ready.ts +0 -75
- package/src/orchestrator/tools/extract-data.ts +0 -92
- package/src/orchestrator/tools/index.ts +0 -12
- package/src/orchestrator/tools/inspect-page.ts +0 -42
- package/src/orchestrator/tools/recall-history.ts +0 -72
- package/src/orchestrator/tools/take-screenshot.ts +0 -128
- package/src/orchestrator/tools/verify-action-result.ts +0 -159
- package/src/orchestrator/types.ts +0 -248
- package/src/playwright-mcp-service.ts +0 -224
- package/src/progress-reporter.ts +0 -144
- package/src/prompts.ts +0 -842
- package/src/providers/backend-proxy-llm-provider.ts +0 -91
- package/src/providers/local-llm-provider.ts +0 -38
- package/src/scenario-service.ts +0 -232
- package/src/scenario-worker-class.ts +0 -1089
- package/src/script-utils.ts +0 -203
- package/src/types.ts +0 -239
- package/src/utils/browser-utils.ts +0 -348
- package/src/utils/coordinate-converter.ts +0 -162
- package/src/utils/page-info-utils.ts +0 -250
- package/testchimp-runner-core-0.0.33.tgz +0 -0
- package/tsconfig.json +0 -19
|
@@ -1,862 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Orchestrator Agent
|
|
3
|
-
* Single agent with tool-use capabilities that maintains journey memory and executes scenarios
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { expect } from '@playwright/test';
|
|
7
|
-
import { LLMFacade } from '../llm-facade';
|
|
8
|
-
import { ProgressReporter, TokenUsage, StepExecutionStatus } from '../progress-reporter';
|
|
9
|
-
import { getEnhancedPageInfo } from '../utils/page-info-utils';
|
|
10
|
-
import { CoordinateConverter } from '../utils/coordinate-converter';
|
|
11
|
-
import { ToolRegistry, ToolExecutionContext } from './tool-registry';
|
|
12
|
-
import { DEFAULT_MODEL } from '../model-constants';
|
|
13
|
-
import {
|
|
14
|
-
AgentConfig,
|
|
15
|
-
AgentContext,
|
|
16
|
-
AgentDecision,
|
|
17
|
-
JourneyMemory,
|
|
18
|
-
MemoryStep,
|
|
19
|
-
OrchestratorStepResult,
|
|
20
|
-
SelfReflection,
|
|
21
|
-
NoteToFutureSelf,
|
|
22
|
-
CoordinateAction,
|
|
23
|
-
DEFAULT_AGENT_CONFIG
|
|
24
|
-
} from './types';
|
|
25
|
-
import { OrchestratorPrompts } from './orchestrator-prompts';
|
|
26
|
-
|
|
27
|
-
/**
|
|
28
|
-
* Orchestrator Agent - manages step execution with tool use and memory
|
|
29
|
-
*/
|
|
30
|
-
export class OrchestratorAgent {
|
|
31
|
-
private llmFacade: LLMFacade;
|
|
32
|
-
private toolRegistry: ToolRegistry;
|
|
33
|
-
private progressReporter?: ProgressReporter;
|
|
34
|
-
private config: Required<AgentConfig>;
|
|
35
|
-
private logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void;
|
|
36
|
-
private debugMode: boolean = false;
|
|
37
|
-
|
|
38
|
-
constructor(
|
|
39
|
-
llmFacade: LLMFacade,
|
|
40
|
-
toolRegistry: ToolRegistry,
|
|
41
|
-
config?: Partial<AgentConfig>,
|
|
42
|
-
progressReporter?: ProgressReporter,
|
|
43
|
-
logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void,
|
|
44
|
-
debugMode?: boolean
|
|
45
|
-
) {
|
|
46
|
-
this.llmFacade = llmFacade;
|
|
47
|
-
this.toolRegistry = toolRegistry;
|
|
48
|
-
this.config = { ...DEFAULT_AGENT_CONFIG, ...config };
|
|
49
|
-
this.progressReporter = progressReporter;
|
|
50
|
-
this.logger = logger;
|
|
51
|
-
this.debugMode = debugMode || false;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
setDebugMode(enabled: boolean): void {
|
|
55
|
-
this.debugMode = enabled;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* Execute a single step of the scenario
|
|
60
|
-
*/
|
|
61
|
-
async executeStep(
|
|
62
|
-
page: any,
|
|
63
|
-
stepDescription: string,
|
|
64
|
-
stepNumber: number,
|
|
65
|
-
totalSteps: number,
|
|
66
|
-
scenarioSteps: string[],
|
|
67
|
-
memory: JourneyMemory,
|
|
68
|
-
jobId: string
|
|
69
|
-
): Promise<OrchestratorStepResult> {
|
|
70
|
-
this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
|
|
71
|
-
this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
|
|
72
|
-
|
|
73
|
-
let iteration = 0;
|
|
74
|
-
let previousReflection: SelfReflection | undefined = undefined;
|
|
75
|
-
let noteToSelf: NoteToFutureSelf | undefined = memory.latestNote; // Start with note from previous step
|
|
76
|
-
const commandsExecuted: string[] = [];
|
|
77
|
-
let consecutiveFailures = 0; // Track consecutive iterations with failed commands
|
|
78
|
-
let coordinateAttempts = 0; // Track coordinate mode attempts (max 2)
|
|
79
|
-
|
|
80
|
-
while (iteration < this.config.maxIterationsPerStep) {
|
|
81
|
-
iteration++;
|
|
82
|
-
|
|
83
|
-
this.logger?.(`\n[Orchestrator] === Iteration ${iteration}/${this.config.maxIterationsPerStep} ===`);
|
|
84
|
-
|
|
85
|
-
// Build context for agent
|
|
86
|
-
const context = await this.buildAgentContext(
|
|
87
|
-
page,
|
|
88
|
-
stepDescription,
|
|
89
|
-
stepNumber,
|
|
90
|
-
totalSteps,
|
|
91
|
-
scenarioSteps,
|
|
92
|
-
memory,
|
|
93
|
-
previousReflection,
|
|
94
|
-
consecutiveFailures,
|
|
95
|
-
noteToSelf // NEW: Pass note from previous iteration
|
|
96
|
-
);
|
|
97
|
-
|
|
98
|
-
// Call agent to make decision
|
|
99
|
-
const decision = await this.callAgent(
|
|
100
|
-
context,
|
|
101
|
-
jobId,
|
|
102
|
-
stepNumber,
|
|
103
|
-
iteration,
|
|
104
|
-
consecutiveFailures
|
|
105
|
-
);
|
|
106
|
-
|
|
107
|
-
// Log agent's reasoning
|
|
108
|
-
this.logAgentDecision(decision, iteration);
|
|
109
|
-
|
|
110
|
-
// Report progress
|
|
111
|
-
await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
|
|
112
|
-
|
|
113
|
-
// Execute tools if requested (tools are READ-ONLY, they don't change state)
|
|
114
|
-
let toolResults: Record<string, any> = {};
|
|
115
|
-
|
|
116
|
-
// ANTI-LOOP: Detect if agent is taking screenshots repeatedly without acting
|
|
117
|
-
const recentScreenshots = memory.history.slice(-3).filter(s =>
|
|
118
|
-
s.code.includes('take_screenshot') || s.action.includes('Screenshot')
|
|
119
|
-
);
|
|
120
|
-
if (recentScreenshots.length >= 2 && iteration >= 3) {
|
|
121
|
-
this.logger?.(`[Orchestrator] ⚠️ WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
|
|
122
|
-
this.logger?.(`[Orchestrator] 💭 System: Stop gathering info, START ACTING with available selectors`);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
if (decision.toolCalls && decision.toolCalls.length > 0) {
|
|
126
|
-
toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
|
|
127
|
-
|
|
128
|
-
// If agent wants to wait for tool results before proceeding, call agent again
|
|
129
|
-
if (decision.needsToolResults) {
|
|
130
|
-
const updatedContext = { ...context, toolResults };
|
|
131
|
-
const continuedDecision = await this.callAgent(updatedContext, jobId, stepNumber, iteration, consecutiveFailures);
|
|
132
|
-
|
|
133
|
-
// Merge continued decision
|
|
134
|
-
decision.commands = continuedDecision.commands || decision.commands;
|
|
135
|
-
decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
|
|
136
|
-
decision.status = continuedDecision.status;
|
|
137
|
-
decision.statusReasoning = continuedDecision.statusReasoning;
|
|
138
|
-
decision.reasoning = continuedDecision.reasoning;
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
// Execute commands sequentially
|
|
143
|
-
let iterationHadFailure = false;
|
|
144
|
-
|
|
145
|
-
// Handle blocker if detected (clear blocker FIRST, then proceed with main commands)
|
|
146
|
-
if (decision.blockerDetected && decision.blockerDetected.clearingCommands && decision.blockerDetected.clearingCommands.length > 0) {
|
|
147
|
-
this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
|
|
148
|
-
this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
|
|
149
|
-
|
|
150
|
-
const blockerResult = await this.executeCommandsSequentially(
|
|
151
|
-
decision.blockerDetected.clearingCommands,
|
|
152
|
-
page,
|
|
153
|
-
memory,
|
|
154
|
-
stepNumber,
|
|
155
|
-
iteration,
|
|
156
|
-
jobId
|
|
157
|
-
);
|
|
158
|
-
|
|
159
|
-
// Add blocker commands with comment to output
|
|
160
|
-
if (blockerResult.executed.length > 0) {
|
|
161
|
-
commandsExecuted.push(`// Blocker: ${decision.blockerDetected.description}`);
|
|
162
|
-
commandsExecuted.push(...blockerResult.executed);
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
// If blocker clearing failed, track it
|
|
166
|
-
if (!blockerResult.allSucceeded) {
|
|
167
|
-
this.logger?.(`[Orchestrator] ❌ Failed to clear blocker - continuing anyway`);
|
|
168
|
-
consecutiveFailures++;
|
|
169
|
-
iterationHadFailure = true;
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
// Execute main commands (only if no blocker failure)
|
|
174
|
-
if (decision.commands && decision.commands.length > 0 && !iterationHadFailure) {
|
|
175
|
-
const executeResult = await this.executeCommandsSequentially(
|
|
176
|
-
decision.commands,
|
|
177
|
-
page,
|
|
178
|
-
memory,
|
|
179
|
-
stepNumber,
|
|
180
|
-
iteration,
|
|
181
|
-
jobId
|
|
182
|
-
);
|
|
183
|
-
|
|
184
|
-
commandsExecuted.push(...executeResult.executed);
|
|
185
|
-
|
|
186
|
-
// Track failures
|
|
187
|
-
if (!executeResult.allSucceeded) {
|
|
188
|
-
this.logger?.(`[Orchestrator] ⚠ Command execution stopped at failure`);
|
|
189
|
-
consecutiveFailures++;
|
|
190
|
-
iterationHadFailure = true;
|
|
191
|
-
} else {
|
|
192
|
-
consecutiveFailures = 0; // Reset on success
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
// Handle coordinate-based actions (NEW - fallback when selectors fail)
|
|
197
|
-
if (decision.coordinateAction && !iterationHadFailure) {
|
|
198
|
-
coordinateAttempts++;
|
|
199
|
-
|
|
200
|
-
this.logger?.(`[Orchestrator] 🎯 Coordinate Action (attempt ${coordinateAttempts}/2): ${decision.coordinateAction.action} at (${decision.coordinateAction.xPercent}%, ${decision.coordinateAction.yPercent}%)`);
|
|
201
|
-
|
|
202
|
-
try {
|
|
203
|
-
// BEFORE screenshot for visual verification
|
|
204
|
-
const beforeScreenshot = await page.screenshot({ encoding: 'base64', fullPage: false, type: 'jpeg', quality: 60 });
|
|
205
|
-
const beforeDataUrl = `data:image/jpeg;base64,${beforeScreenshot}`;
|
|
206
|
-
|
|
207
|
-
// Generate Playwright commands from coordinate action
|
|
208
|
-
const coordCommands = await CoordinateConverter.generateCommands(decision.coordinateAction, page);
|
|
209
|
-
|
|
210
|
-
this.logger?.(`[Orchestrator] Generated commands:`);
|
|
211
|
-
coordCommands.forEach(cmd => this.logger?.(` ${cmd}`));
|
|
212
|
-
|
|
213
|
-
// Execute coordinate commands
|
|
214
|
-
const coordResult = await this.executeCommandsSequentially(
|
|
215
|
-
coordCommands,
|
|
216
|
-
page,
|
|
217
|
-
memory,
|
|
218
|
-
stepNumber,
|
|
219
|
-
iteration,
|
|
220
|
-
jobId
|
|
221
|
-
);
|
|
222
|
-
|
|
223
|
-
commandsExecuted.push(...coordResult.executed);
|
|
224
|
-
|
|
225
|
-
if (!coordResult.allSucceeded) {
|
|
226
|
-
this.logger?.(`[Orchestrator] ❌ Coordinate action failed (Playwright error)`);
|
|
227
|
-
consecutiveFailures++;
|
|
228
|
-
iterationHadFailure = true;
|
|
229
|
-
|
|
230
|
-
// Give up after 2 coordinate attempts
|
|
231
|
-
if (coordinateAttempts >= 2) {
|
|
232
|
-
this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts) - marking stuck`);
|
|
233
|
-
return {
|
|
234
|
-
success: false,
|
|
235
|
-
commands: commandsExecuted,
|
|
236
|
-
iterations: iteration,
|
|
237
|
-
terminationReason: 'agent_stuck',
|
|
238
|
-
memory,
|
|
239
|
-
error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
|
|
240
|
-
};
|
|
241
|
-
}
|
|
242
|
-
} else {
|
|
243
|
-
this.logger?.(`[Orchestrator] ✅ Coordinate action succeeded (no Playwright error)`);
|
|
244
|
-
|
|
245
|
-
// CRITICAL: Verify visually if goal was achieved (coordinates might have clicked wrong place)
|
|
246
|
-
// Wait for network idle (smarter than fixed timeout - waits as needed, max 10s)
|
|
247
|
-
try {
|
|
248
|
-
await page.waitForLoadState('networkidle', { timeout: 10000 });
|
|
249
|
-
} catch (e) {
|
|
250
|
-
// Network idle timeout - page may still be loading, but proceed with verification
|
|
251
|
-
this.logger?.(`[Orchestrator] ⚠️ Network idle timeout after 10s, proceeding with verification`, 'warn');
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
const afterScreenshot = await page.screenshot({ encoding: 'base64', fullPage: false, type: 'jpeg', quality: 60 });
|
|
255
|
-
const afterDataUrl = `data:image/jpeg;base64,${afterScreenshot}`;
|
|
256
|
-
|
|
257
|
-
this.logger?.(`[Orchestrator] 📸 Verifying coordinate action visually...`);
|
|
258
|
-
|
|
259
|
-
const verificationRequest = {
|
|
260
|
-
model: 'gpt-5-mini',
|
|
261
|
-
systemPrompt: 'You are a visual verification expert for web automation. Compare before/after screenshots to determine if an action achieved its goal.',
|
|
262
|
-
userPrompt: `Goal: ${scenarioSteps[stepNumber - 1]}\n\nA coordinate-based action was just executed. Compare the BEFORE and AFTER screenshots.\n\nDid the action achieve the goal? Respond with JSON:\n{\n "verified": boolean,\n "reasoning": "What changed (or didn't change) between screenshots",\n "visibleChanges": ["List of UI changes observed"],\n "clickedWrongPlace": boolean\n}\n\nBe strict: Only return verified=true if you clearly see the expected change matching the goal.`,
|
|
263
|
-
images: [
|
|
264
|
-
{ label: 'BEFORE', dataUrl: beforeDataUrl },
|
|
265
|
-
{ label: 'AFTER', dataUrl: afterDataUrl }
|
|
266
|
-
]
|
|
267
|
-
};
|
|
268
|
-
|
|
269
|
-
const verificationResponse = await this.llmFacade.llmProvider.callLLM(verificationRequest);
|
|
270
|
-
const jsonMatch = verificationResponse.answer.match(/\{[\s\S]*\}/);
|
|
271
|
-
|
|
272
|
-
if (jsonMatch) {
|
|
273
|
-
const verificationResult = JSON.parse(jsonMatch[0]);
|
|
274
|
-
const verified = verificationResult.verified === true;
|
|
275
|
-
const reasoning = verificationResult.reasoning || 'No reasoning provided';
|
|
276
|
-
const clickedWrongPlace = verificationResult.clickedWrongPlace === true;
|
|
277
|
-
|
|
278
|
-
this.logger?.(`[Orchestrator] 📊 Visual verification: ${verified ? '✅ VERIFIED' : '❌ NOT VERIFIED'}`);
|
|
279
|
-
this.logger?.(`[Orchestrator] 💭 Reasoning: ${reasoning}`);
|
|
280
|
-
|
|
281
|
-
if (verified) {
|
|
282
|
-
// Goal achieved! Can mark complete
|
|
283
|
-
consecutiveFailures = 0;
|
|
284
|
-
|
|
285
|
-
// Store note for context
|
|
286
|
-
noteToSelf = {
|
|
287
|
-
fromIteration: iteration,
|
|
288
|
-
content: `Coordinate action verified successful: ${reasoning}`
|
|
289
|
-
};
|
|
290
|
-
memory.latestNote = noteToSelf;
|
|
291
|
-
} else {
|
|
292
|
-
// Goal NOT achieved despite no error
|
|
293
|
-
this.logger?.(`[Orchestrator] ⚠️ Coordinate click succeeded but goal NOT achieved`, 'warn');
|
|
294
|
-
consecutiveFailures++;
|
|
295
|
-
iterationHadFailure = true;
|
|
296
|
-
|
|
297
|
-
// Store diagnostic note for next attempt
|
|
298
|
-
const diagnostic = clickedWrongPlace
|
|
299
|
-
? `Clicked wrong place. ${reasoning}. Try different coordinates.`
|
|
300
|
-
: `Action executed but goal not achieved. ${reasoning}. May need different approach.`;
|
|
301
|
-
|
|
302
|
-
noteToSelf = {
|
|
303
|
-
fromIteration: iteration,
|
|
304
|
-
content: diagnostic
|
|
305
|
-
};
|
|
306
|
-
memory.latestNote = noteToSelf;
|
|
307
|
-
|
|
308
|
-
// Give up after 2 coordinate attempts
|
|
309
|
-
if (coordinateAttempts >= 2) {
|
|
310
|
-
this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts, none achieved goal) - marking stuck`);
|
|
311
|
-
return {
|
|
312
|
-
success: false,
|
|
313
|
-
commands: commandsExecuted,
|
|
314
|
-
iterations: iteration,
|
|
315
|
-
terminationReason: 'agent_stuck',
|
|
316
|
-
memory,
|
|
317
|
-
error: `Coordinate actions clicked but didn't achieve goal: ${reasoning}`
|
|
318
|
-
};
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
} else {
|
|
322
|
-
this.logger?.(`[Orchestrator] ⚠️ Could not parse verification response - treating as unverified`, 'warn');
|
|
323
|
-
consecutiveFailures++;
|
|
324
|
-
iterationHadFailure = true;
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
} catch (error: any) {
|
|
329
|
-
this.logger?.(`[Orchestrator] ❌ Coordinate action error: ${error.message}`, 'error');
|
|
330
|
-
consecutiveFailures++;
|
|
331
|
-
iterationHadFailure = true;
|
|
332
|
-
|
|
333
|
-
// Give up after 2 coordinate attempts
|
|
334
|
-
if (coordinateAttempts >= 2) {
|
|
335
|
-
this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts) - marking stuck`);
|
|
336
|
-
return {
|
|
337
|
-
success: false,
|
|
338
|
-
commands: commandsExecuted,
|
|
339
|
-
iterations: iteration,
|
|
340
|
-
terminationReason: 'agent_stuck',
|
|
341
|
-
memory,
|
|
342
|
-
error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
|
|
343
|
-
};
|
|
344
|
-
}
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
// System-enforced stuck detection (agent might not detect it)
|
|
349
|
-
// Allow 5 failures: 3 selector attempts + 2 coordinate attempts
|
|
350
|
-
if (consecutiveFailures >= 5) {
|
|
351
|
-
this.logger?.(`[Orchestrator] 🛑 SYSTEM: ${consecutiveFailures} consecutive failures detected - forcing stuck`, 'warn');
|
|
352
|
-
return {
|
|
353
|
-
success: false,
|
|
354
|
-
commands: commandsExecuted,
|
|
355
|
-
iterations: iteration,
|
|
356
|
-
terminationReason: 'agent_stuck',
|
|
357
|
-
memory,
|
|
358
|
-
error: `Failed ${consecutiveFailures} iterations in a row - unable to proceed`
|
|
359
|
-
};
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
// Update memory with experiences
|
|
363
|
-
if (decision.experiences && decision.experiences.length > 0) {
|
|
364
|
-
for (const exp of decision.experiences) {
|
|
365
|
-
// Deduplicate - don't add if very similar experience exists
|
|
366
|
-
const exists = memory.experiences.some(existing =>
|
|
367
|
-
existing.toLowerCase().includes(exp.toLowerCase()) ||
|
|
368
|
-
exp.toLowerCase().includes(existing.toLowerCase())
|
|
369
|
-
);
|
|
370
|
-
|
|
371
|
-
if (!exists) {
|
|
372
|
-
memory.experiences.push(exp);
|
|
373
|
-
this.logger?.(`[Orchestrator] 📚 Experience: ${exp}`);
|
|
374
|
-
}
|
|
375
|
-
}
|
|
376
|
-
|
|
377
|
-
// Cap experiences
|
|
378
|
-
if (memory.experiences.length > this.config.maxExperiences) {
|
|
379
|
-
memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
|
|
380
|
-
}
|
|
381
|
-
}
|
|
382
|
-
|
|
383
|
-
// Store self-reflection for next iteration
|
|
384
|
-
previousReflection = decision.selfReflection;
|
|
385
|
-
|
|
386
|
-
// Store note to future self (NEW - tactical memory across iterations AND steps)
|
|
387
|
-
if (decision.noteToFutureSelf) {
|
|
388
|
-
noteToSelf = {
|
|
389
|
-
fromIteration: iteration,
|
|
390
|
-
content: decision.noteToFutureSelf
|
|
391
|
-
};
|
|
392
|
-
memory.latestNote = noteToSelf; // Persist in journey memory across steps
|
|
393
|
-
this.logger?.(`[Orchestrator] 📝 Note to self: ${decision.noteToFutureSelf}`);
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
// Check termination
|
|
397
|
-
if (decision.status !== 'continue') {
|
|
398
|
-
this.logger?.(`[Orchestrator] 🎯 Status: ${decision.status}`);
|
|
399
|
-
this.logger?.(`[Orchestrator] 💭 Reason: ${decision.statusReasoning}`);
|
|
400
|
-
|
|
401
|
-
// SAFETY CHECK: Don't allow "complete" if commands failed this iteration
|
|
402
|
-
if (decision.status === 'complete' && iterationHadFailure) {
|
|
403
|
-
this.logger?.(`[Orchestrator] ⚠️ OVERRIDE: Agent said "complete" but commands FAILED - forcing "continue"`, 'warn');
|
|
404
|
-
this.logger?.(`[Orchestrator] 💭 System: Commands must succeed before marking complete`);
|
|
405
|
-
// Don't return - continue to next iteration
|
|
406
|
-
} else {
|
|
407
|
-
// Valid termination
|
|
408
|
-
return {
|
|
409
|
-
success: decision.status === 'complete',
|
|
410
|
-
commands: commandsExecuted,
|
|
411
|
-
iterations: iteration,
|
|
412
|
-
terminationReason: decision.status === 'complete' ? 'complete' :
|
|
413
|
-
decision.status === 'stuck' ? 'agent_stuck' :
|
|
414
|
-
'infeasible',
|
|
415
|
-
memory
|
|
416
|
-
};
|
|
417
|
-
}
|
|
418
|
-
}
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
// Hit iteration limit
|
|
422
|
-
this.logger?.(`[Orchestrator] ⚠ Maximum iterations reached (${this.config.maxIterationsPerStep})`);
|
|
423
|
-
|
|
424
|
-
return {
|
|
425
|
-
success: false,
|
|
426
|
-
commands: commandsExecuted,
|
|
427
|
-
iterations: iteration,
|
|
428
|
-
terminationReason: 'system_limit',
|
|
429
|
-
memory,
|
|
430
|
-
error: 'Maximum iterations reached'
|
|
431
|
-
};
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
/**
|
|
435
|
-
* Build context for agent
|
|
436
|
-
*/
|
|
437
|
-
private async buildAgentContext(
|
|
438
|
-
page: any,
|
|
439
|
-
currentStepGoal: string,
|
|
440
|
-
stepNumber: number,
|
|
441
|
-
totalSteps: number,
|
|
442
|
-
scenarioSteps: string[],
|
|
443
|
-
memory: JourneyMemory,
|
|
444
|
-
previousReflection?: SelfReflection,
|
|
445
|
-
consecutiveFailures?: number,
|
|
446
|
-
noteFromPreviousIteration?: NoteToFutureSelf // NEW
|
|
447
|
-
): Promise<AgentContext> {
|
|
448
|
-
// Get fresh DOM
|
|
449
|
-
const currentPageInfo = await getEnhancedPageInfo(page);
|
|
450
|
-
const currentURL = page.url();
|
|
451
|
-
|
|
452
|
-
// Get recent steps
|
|
453
|
-
const recentSteps = memory.history.slice(-this.config.recentStepsCount);
|
|
454
|
-
|
|
455
|
-
// Build context
|
|
456
|
-
return {
|
|
457
|
-
overallGoal: scenarioSteps.join('\n'),
|
|
458
|
-
currentStepGoal,
|
|
459
|
-
stepNumber,
|
|
460
|
-
totalSteps,
|
|
461
|
-
completedSteps: scenarioSteps.slice(0, stepNumber - 1),
|
|
462
|
-
remainingSteps: scenarioSteps.slice(stepNumber),
|
|
463
|
-
currentPageInfo,
|
|
464
|
-
currentURL,
|
|
465
|
-
recentSteps,
|
|
466
|
-
experiences: memory.experiences,
|
|
467
|
-
extractedData: memory.extractedData,
|
|
468
|
-
previousIterationGuidance: previousReflection,
|
|
469
|
-
noteFromPreviousIteration // NEW: Pass tactical note from previous iteration
|
|
470
|
-
};
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
/**
|
|
474
|
-
* Call agent to make decision
|
|
475
|
-
*/
|
|
476
|
-
private async callAgent(
|
|
477
|
-
context: AgentContext,
|
|
478
|
-
jobId: string,
|
|
479
|
-
stepNumber: number,
|
|
480
|
-
iteration: number,
|
|
481
|
-
consecutiveFailures?: number
|
|
482
|
-
): Promise<AgentDecision> {
|
|
483
|
-
// Detect if coordinate mode should be activated
|
|
484
|
-
// Phase 1: Only 2 tiers (selectors → coordinates), so activate after 3 failures
|
|
485
|
-
// Phase 2: Will have 3 tiers (selectors → index → coordinates), threshold will be 5
|
|
486
|
-
const useCoordinateMode = consecutiveFailures !== undefined && consecutiveFailures >= 3;
|
|
487
|
-
|
|
488
|
-
// Build appropriate system prompt based on mode
|
|
489
|
-
const toolDescriptions = this.toolRegistry.generateToolDescriptions();
|
|
490
|
-
const systemPrompt = useCoordinateMode
|
|
491
|
-
? OrchestratorPrompts.buildCoordinateSystemPrompt()
|
|
492
|
-
: OrchestratorPrompts.buildSystemPrompt(toolDescriptions);
|
|
493
|
-
const userPrompt = OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures);
|
|
494
|
-
|
|
495
|
-
// Log prompt lengths for monitoring
|
|
496
|
-
const systemLength = systemPrompt.length;
|
|
497
|
-
const userLength = userPrompt.length;
|
|
498
|
-
const totalLength = systemLength + userLength;
|
|
499
|
-
const estimatedTokens = Math.ceil(totalLength / 4); // Rough estimate: 4 chars per token
|
|
500
|
-
|
|
501
|
-
this.logger?.(`[Orchestrator] 📊 Prompt lengths: system=${systemLength} chars, user=${userLength} chars, total=${totalLength} chars (~${estimatedTokens} tokens)`, 'log');
|
|
502
|
-
|
|
503
|
-
try {
|
|
504
|
-
// Call LLM directly via provider
|
|
505
|
-
const llmRequest = {
|
|
506
|
-
model: DEFAULT_MODEL,
|
|
507
|
-
systemPrompt,
|
|
508
|
-
userPrompt
|
|
509
|
-
};
|
|
510
|
-
|
|
511
|
-
const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
|
|
512
|
-
|
|
513
|
-
// Report token usage
|
|
514
|
-
if (response.usage && this.progressReporter?.onTokensUsed) {
|
|
515
|
-
const tokenUsage: TokenUsage = {
|
|
516
|
-
jobId,
|
|
517
|
-
stepNumber,
|
|
518
|
-
iteration,
|
|
519
|
-
inputTokens: response.usage.inputTokens,
|
|
520
|
-
outputTokens: response.usage.outputTokens,
|
|
521
|
-
includesImage: false,
|
|
522
|
-
model: DEFAULT_MODEL,
|
|
523
|
-
timestamp: Date.now()
|
|
524
|
-
};
|
|
525
|
-
this.logger?.(`[Orchestrator] 💰 Reporting token usage: ${tokenUsage.inputTokens} + ${tokenUsage.outputTokens}`, 'log');
|
|
526
|
-
await this.progressReporter.onTokensUsed(tokenUsage);
|
|
527
|
-
} else if (!response.usage) {
|
|
528
|
-
this.logger?.(`[Orchestrator] ⚠ No usage data in LLM response`, 'warn');
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
// Parse response
|
|
532
|
-
return this.parseAgentDecision(response.answer);
|
|
533
|
-
|
|
534
|
-
} catch (error: any) {
|
|
535
|
-
this.logger?.(`[Orchestrator] ✗ Agent call failed: ${error.message}`, 'error');
|
|
536
|
-
|
|
537
|
-
// Return fallback decision
|
|
538
|
-
return {
|
|
539
|
-
status: 'stuck',
|
|
540
|
-
statusReasoning: `Agent call failed: ${error.message}`,
|
|
541
|
-
reasoning: 'LLM call failed'
|
|
542
|
-
};
|
|
543
|
-
}
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
/**
|
|
549
|
-
* Parse agent decision from LLM response
|
|
550
|
-
*/
|
|
551
|
-
private parseAgentDecision(response: string): AgentDecision {
|
|
552
|
-
try {
|
|
553
|
-
// Extract JSON from response
|
|
554
|
-
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
555
|
-
if (!jsonMatch) {
|
|
556
|
-
this.logger?.(`[Orchestrator] ✗ No JSON found in LLM response`, 'error');
|
|
557
|
-
this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
|
|
558
|
-
throw new Error('No JSON found in response');
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
const parsed = JSON.parse(jsonMatch[0]);
|
|
562
|
-
|
|
563
|
-
// Validate required fields
|
|
564
|
-
// Accept either "reasoning" or "statusReasoning" (LLMs sometimes only provide one)
|
|
565
|
-
if (!parsed.status || (!parsed.reasoning && !parsed.statusReasoning)) {
|
|
566
|
-
this.logger?.(`[Orchestrator] ✗ Missing required fields in parsed JSON`, 'error');
|
|
567
|
-
this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
|
|
568
|
-
this.logger?.(`[Orchestrator] 📄 PARSED JSON:\n${JSON.stringify(parsed, null, 2)}`, 'error');
|
|
569
|
-
this.logger?.(`[Orchestrator] ❌ Has status: ${!!parsed.status}, Has reasoning: ${!!parsed.reasoning}, Has statusReasoning: ${!!parsed.statusReasoning}`, 'error');
|
|
570
|
-
throw new Error('Missing required fields: status and (reasoning or statusReasoning)');
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
// Normalize: if reasoning is missing but statusReasoning exists, use statusReasoning as reasoning
|
|
574
|
-
if (!parsed.reasoning && parsed.statusReasoning) {
|
|
575
|
-
parsed.reasoning = parsed.statusReasoning;
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
return parsed as AgentDecision;
|
|
579
|
-
|
|
580
|
-
} catch (error: any) {
|
|
581
|
-
this.logger?.(`[Orchestrator] ✗ Failed to parse agent decision: ${error.message}`, 'error');
|
|
582
|
-
|
|
583
|
-
// Only log full response if not already logged above
|
|
584
|
-
if (!error.message.includes('Missing required fields') && !error.message.includes('No JSON found')) {
|
|
585
|
-
this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
|
|
586
|
-
}
|
|
587
|
-
|
|
588
|
-
// Return fallback
|
|
589
|
-
return {
|
|
590
|
-
status: 'stuck',
|
|
591
|
-
statusReasoning: 'Failed to parse agent response',
|
|
592
|
-
reasoning: `Parse error: ${error.message}`
|
|
593
|
-
};
|
|
594
|
-
}
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
/**
|
|
598
|
-
* Execute tools
|
|
599
|
-
*/
|
|
600
|
-
private async executeTools(
|
|
601
|
-
toolCalls: any[],
|
|
602
|
-
page: any,
|
|
603
|
-
memory: JourneyMemory,
|
|
604
|
-
stepNumber: number
|
|
605
|
-
): Promise<Record<string, any>> {
|
|
606
|
-
this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
|
|
607
|
-
|
|
608
|
-
const results: Record<string, any> = {};
|
|
609
|
-
const toolContext: ToolExecutionContext = {
|
|
610
|
-
page,
|
|
611
|
-
memory,
|
|
612
|
-
stepNumber,
|
|
613
|
-
logger: this.logger
|
|
614
|
-
};
|
|
615
|
-
|
|
616
|
-
for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
|
|
617
|
-
this.logger?.(`[Orchestrator] ▶ ${toolCall.name}(${JSON.stringify(toolCall.params).substring(0, 50)}...)`);
|
|
618
|
-
|
|
619
|
-
const result = await this.toolRegistry.execute(toolCall, toolContext);
|
|
620
|
-
results[toolCall.name] = result;
|
|
621
|
-
|
|
622
|
-
if (result.success) {
|
|
623
|
-
this.logger?.(`[Orchestrator] ✓ ${toolCall.name} succeeded`);
|
|
624
|
-
} else {
|
|
625
|
-
this.logger?.(`[Orchestrator] ✗ ${toolCall.name} failed: ${result.error}`, 'error');
|
|
626
|
-
}
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
return results;
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
/**
|
|
633
|
-
* Execute commands sequentially with SHARED context (variables persist across commands)
|
|
634
|
-
*/
|
|
635
|
-
private async executeCommandsSequentially(
|
|
636
|
-
commands: string[],
|
|
637
|
-
page: any,
|
|
638
|
-
memory: JourneyMemory,
|
|
639
|
-
stepNumber: number,
|
|
640
|
-
iteration: number,
|
|
641
|
-
jobId: string
|
|
642
|
-
): Promise<{ executed: string[]; allSucceeded: boolean }> {
|
|
643
|
-
this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s) in shared context`);
|
|
644
|
-
|
|
645
|
-
const executed: string[] = [];
|
|
646
|
-
const limitedCommands = commands.slice(0, this.config.maxCommandsPerIteration);
|
|
647
|
-
|
|
648
|
-
// Build execution with shared context (all commands share scope - variables persist)
|
|
649
|
-
const commandsWithTracking = limitedCommands.map((cmd, i) => {
|
|
650
|
-
return `
|
|
651
|
-
// Command ${i + 1}/${limitedCommands.length}
|
|
652
|
-
try {
|
|
653
|
-
${cmd}
|
|
654
|
-
__results.push({ index: ${i}, success: true });
|
|
655
|
-
} catch (error) {
|
|
656
|
-
__results.push({ index: ${i}, success: false, error: error.message });
|
|
657
|
-
throw error; // Stop on first failure
|
|
658
|
-
}`;
|
|
659
|
-
}).join('\n');
|
|
660
|
-
|
|
661
|
-
const wrappedCode = `
|
|
662
|
-
const __results = [];
|
|
663
|
-
${commandsWithTracking}
|
|
664
|
-
return __results;
|
|
665
|
-
`;
|
|
666
|
-
|
|
667
|
-
try {
|
|
668
|
-
// Execute in shared context - variables declared here persist for entire scenario
|
|
669
|
-
// Pass both page and expect to make Playwright assertions available
|
|
670
|
-
const func = new Function('page', 'expect', 'return (async () => { ' + wrappedCode + ' })()');
|
|
671
|
-
const results = await func(page, expect);
|
|
672
|
-
|
|
673
|
-
// Record results for each command
|
|
674
|
-
for (let i = 0; i < limitedCommands.length; i++) {
|
|
675
|
-
const cmd = limitedCommands[i];
|
|
676
|
-
const result = results[i];
|
|
677
|
-
|
|
678
|
-
if (result && result.success) {
|
|
679
|
-
this.logger?.(`[Orchestrator] ✓ [${i + 1}/${limitedCommands.length}] Success`);
|
|
680
|
-
|
|
681
|
-
// Record in history
|
|
682
|
-
memory.history.push({
|
|
683
|
-
stepNumber,
|
|
684
|
-
iteration,
|
|
685
|
-
action: `Command ${i + 1}/${limitedCommands.length}`,
|
|
686
|
-
code: cmd,
|
|
687
|
-
result: 'success',
|
|
688
|
-
observation: 'Executed successfully',
|
|
689
|
-
url: page.url(),
|
|
690
|
-
timestamp: Date.now()
|
|
691
|
-
});
|
|
692
|
-
|
|
693
|
-
executed.push(cmd);
|
|
694
|
-
}
|
|
695
|
-
}
|
|
696
|
-
|
|
697
|
-
// Cap history
|
|
698
|
-
if (memory.history.length > this.config.maxHistorySize) {
|
|
699
|
-
memory.history = memory.history.slice(-this.config.maxHistorySize);
|
|
700
|
-
}
|
|
701
|
-
|
|
702
|
-
return { executed, allSucceeded: true };
|
|
703
|
-
|
|
704
|
-
} catch (error: any) {
|
|
705
|
-
// One of the commands failed - find which one
|
|
706
|
-
const errorMessage = error.message || String(error);
|
|
707
|
-
|
|
708
|
-
// Capture page state for debug logging
|
|
709
|
-
let pageStateDebug = '';
|
|
710
|
-
if (this.debugMode) {
|
|
711
|
-
try {
|
|
712
|
-
const pageInfo = await getEnhancedPageInfo(page);
|
|
713
|
-
pageStateDebug = `
|
|
714
|
-
=== DEBUG: PAGE STATE AT FAILURE ===
|
|
715
|
-
URL: ${page.url()}
|
|
716
|
-
Title: ${pageInfo.title}
|
|
717
|
-
|
|
718
|
-
INTERACTIVE ELEMENTS:
|
|
719
|
-
${pageInfo.formattedElements}
|
|
720
|
-
|
|
721
|
-
ARIA SNAPSHOT:
|
|
722
|
-
${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
|
|
723
|
-
|
|
724
|
-
====================================`;
|
|
725
|
-
} catch (debugError: any) {
|
|
726
|
-
pageStateDebug = `Failed to capture page state: ${debugError.message}`;
|
|
727
|
-
}
|
|
728
|
-
}
|
|
729
|
-
|
|
730
|
-
// Record all that succeeded, then the failure
|
|
731
|
-
for (let i = 0; i < limitedCommands.length; i++) {
|
|
732
|
-
const cmd = limitedCommands[i];
|
|
733
|
-
|
|
734
|
-
// This is a failed command (error happened here or earlier)
|
|
735
|
-
if (executed.length <= i) {
|
|
736
|
-
this.logger?.(`[Orchestrator] ✗ [${i + 1}/${limitedCommands.length}] Failed: ${errorMessage}`, 'error');
|
|
737
|
-
|
|
738
|
-
// Log detailed debug info
|
|
739
|
-
if (this.debugMode && pageStateDebug) {
|
|
740
|
-
this.logger?.(pageStateDebug, 'debug');
|
|
741
|
-
}
|
|
742
|
-
|
|
743
|
-
memory.history.push({
|
|
744
|
-
stepNumber,
|
|
745
|
-
iteration,
|
|
746
|
-
action: `Command ${i + 1}/${limitedCommands.length} - FAILED`,
|
|
747
|
-
code: cmd,
|
|
748
|
-
result: 'failure',
|
|
749
|
-
observation: `Failed with error: ${errorMessage}. This selector likely doesn't exist or is incorrect.`,
|
|
750
|
-
error: errorMessage,
|
|
751
|
-
url: page.url(),
|
|
752
|
-
timestamp: Date.now()
|
|
753
|
-
});
|
|
754
|
-
|
|
755
|
-
if (i < limitedCommands.length - 1) {
|
|
756
|
-
this.logger?.(`[Orchestrator] ⚠ Skipping remaining ${limitedCommands.length - i - 1} command(s)`, 'warn');
|
|
757
|
-
}
|
|
758
|
-
|
|
759
|
-
break;
|
|
760
|
-
}
|
|
761
|
-
}
|
|
762
|
-
|
|
763
|
-
return { executed, allSucceeded: false };
|
|
764
|
-
}
|
|
765
|
-
}
|
|
766
|
-
|
|
767
|
-
/**
|
|
768
|
-
* Execute a single command
|
|
769
|
-
*/
|
|
770
|
-
private async executeCommand(cmd: string, page: any): Promise<void> {
|
|
771
|
-
// Wrap in async function and execute
|
|
772
|
-
const wrapped = `(async () => { ${cmd} })()`;
|
|
773
|
-
|
|
774
|
-
try {
|
|
775
|
-
await eval(wrapped);
|
|
776
|
-
} catch (error: any) {
|
|
777
|
-
// If eval fails, try direct execution with page context
|
|
778
|
-
// Pass both page and expect to make Playwright assertions available
|
|
779
|
-
const func = new Function('page', 'expect', `return (async () => { ${cmd} })()`);
|
|
780
|
-
await func(page, expect);
|
|
781
|
-
}
|
|
782
|
-
}
|
|
783
|
-
|
|
784
|
-
/**
|
|
785
|
-
* Log agent decision
|
|
786
|
-
*/
|
|
787
|
-
private logAgentDecision(decision: AgentDecision, iteration: number): void {
|
|
788
|
-
this.logger?.(`[Orchestrator] 💭 REASONING: ${decision.reasoning}`);
|
|
789
|
-
|
|
790
|
-
if (decision.selfReflection) {
|
|
791
|
-
this.logger?.(`[Orchestrator] 🧠 SELF-REFLECTION:`);
|
|
792
|
-
this.logger?.(`[Orchestrator] Next: ${decision.selfReflection.guidanceForNext}`);
|
|
793
|
-
if (decision.selfReflection.detectingLoop) {
|
|
794
|
-
this.logger?.(`[Orchestrator] 🔄 LOOP DETECTED: ${decision.selfReflection.loopReasoning}`, 'warn');
|
|
795
|
-
}
|
|
796
|
-
}
|
|
797
|
-
|
|
798
|
-
if (decision.toolCalls && decision.toolCalls.length > 0) {
|
|
799
|
-
this.logger?.(`[Orchestrator] 🔧 TOOLS: ${decision.toolCalls.map(t => t.name).join(', ')}`);
|
|
800
|
-
if (decision.toolReasoning) {
|
|
801
|
-
this.logger?.(`[Orchestrator] 📋 Why: ${decision.toolReasoning}`);
|
|
802
|
-
}
|
|
803
|
-
}
|
|
804
|
-
|
|
805
|
-
if (decision.blockerDetected) {
|
|
806
|
-
this.logger?.(`[Orchestrator] 🚧 BLOCKER: ${decision.blockerDetected.description}`, 'warn');
|
|
807
|
-
this.logger?.(`[Orchestrator] 🧹 Clearing with ${decision.blockerDetected.clearingCommands.length} command(s)`);
|
|
808
|
-
}
|
|
809
|
-
|
|
810
|
-
if (decision.stepReEvaluation?.detected) {
|
|
811
|
-
this.logger?.(`[Orchestrator] 🔍 STEP RE-EVALUATION: ${decision.stepReEvaluation.issue}`, 'warn');
|
|
812
|
-
this.logger?.(`[Orchestrator] 📝 Explanation: ${decision.stepReEvaluation.explanation}`);
|
|
813
|
-
}
|
|
814
|
-
|
|
815
|
-
if (decision.commands && decision.commands.length > 0) {
|
|
816
|
-
this.logger?.(`[Orchestrator] 📝 COMMANDS (${decision.commands.length}):`);
|
|
817
|
-
decision.commands.slice(0, 3).forEach((cmd, i) => {
|
|
818
|
-
this.logger?.(`[Orchestrator] ${i + 1}. ${cmd.substring(0, 80)}...`);
|
|
819
|
-
});
|
|
820
|
-
if (decision.commands.length > 3) {
|
|
821
|
-
this.logger?.(`[Orchestrator] ... and ${decision.commands.length - 3} more`);
|
|
822
|
-
}
|
|
823
|
-
if (decision.commandReasoning) {
|
|
824
|
-
this.logger?.(`[Orchestrator] 💡 Why: ${decision.commandReasoning}`);
|
|
825
|
-
}
|
|
826
|
-
}
|
|
827
|
-
|
|
828
|
-
// Experiences will be logged when added to memory, no need to log here
|
|
829
|
-
}
|
|
830
|
-
|
|
831
|
-
/**
|
|
832
|
-
* Report step progress
|
|
833
|
-
*/
|
|
834
|
-
private async reportStepProgress(
|
|
835
|
-
jobId: string,
|
|
836
|
-
stepNumber: number,
|
|
837
|
-
description: string,
|
|
838
|
-
decision: AgentDecision,
|
|
839
|
-
iteration: number
|
|
840
|
-
): Promise<void> {
|
|
841
|
-
if (!this.progressReporter?.onStepProgress) return;
|
|
842
|
-
|
|
843
|
-
await this.progressReporter.onStepProgress({
|
|
844
|
-
jobId,
|
|
845
|
-
stepNumber,
|
|
846
|
-
description,
|
|
847
|
-
status: decision.status === 'complete' ? StepExecutionStatus.SUCCESS :
|
|
848
|
-
decision.status === 'stuck' || decision.status === 'infeasible' ? StepExecutionStatus.FAILURE :
|
|
849
|
-
StepExecutionStatus.IN_PROGRESS,
|
|
850
|
-
code: decision.commands?.join('\n'),
|
|
851
|
-
// Include agent metadata for transparency
|
|
852
|
-
agentIteration: iteration,
|
|
853
|
-
agentReasoning: decision.reasoning,
|
|
854
|
-
agentSelfReflection: decision.selfReflection,
|
|
855
|
-
agentExperiences: decision.experiences,
|
|
856
|
-
agentToolsUsed: decision.toolCalls?.map(t => t.name),
|
|
857
|
-
agentStatus: decision.status
|
|
858
|
-
});
|
|
859
|
-
}
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
|