testchimp-runner-core 0.0.43 → 0.0.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +15 -1
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +1 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/llm-facade.d.ts.map +1 -1
- package/dist/llm-facade.js +15 -13
- package/dist/llm-facade.js.map +1 -1
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +103 -34
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.js +28 -0
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
- package/dist/orchestrator/page-loading-utils.d.ts.map +1 -1
- package/dist/orchestrator/page-loading-utils.js +8 -0
- package/dist/orchestrator/page-loading-utils.js.map +1 -1
- package/dist/prompts.d.ts.map +1 -1
- package/dist/prompts.js +27 -10
- package/dist/prompts.js.map +1 -1
- package/dist/scenario-service.d.ts +2 -4
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +11 -8
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +2 -13
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +119 -533
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/utils/page-info-retry.d.ts.map +1 -1
- package/dist/utils/page-info-retry.js +3 -7
- package/dist/utils/page-info-retry.js.map +1 -1
- package/dist/utils/page-info-utils.js +3 -8
- package/dist/utils/page-info-utils.js.map +1 -1
- package/package.json +1 -1
|
@@ -2,24 +2,17 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.ScenarioWorker = void 0;
|
|
4
4
|
const events_1 = require("events");
|
|
5
|
-
const page_info_utils_1 = require("./utils/page-info-utils");
|
|
6
5
|
const browser_utils_1 = require("./utils/browser-utils");
|
|
7
6
|
const llm_facade_1 = require("./llm-facade");
|
|
8
7
|
const script_utils_1 = require("./script-utils");
|
|
9
|
-
const model_constants_1 = require("./model-constants");
|
|
10
8
|
const progress_reporter_1 = require("./progress-reporter");
|
|
11
9
|
const backend_proxy_llm_provider_1 = require("./providers/backend-proxy-llm-provider");
|
|
12
10
|
const orchestrator_1 = require("./orchestrator");
|
|
13
|
-
const MAX_RETRIES_PER_STEP = 3; // 4 total attempts per sub-action: 3 DOM-only, then 1 potential vision attempt
|
|
14
|
-
const MAX_SUBACTIONS_PER_STEP = 5; // Maximum sub-actions to attempt for a single step (reduced from 10 to prevent excessive retries)
|
|
15
|
-
const MAX_FAILED_ATTEMPTS_PER_STEP = 12; // Hard limit on FAILED attempts per step across all sub-actions
|
|
16
11
|
class ScenarioWorker extends events_1.EventEmitter {
|
|
17
12
|
constructor(fileHandler, llmProvider, progressReporter, authConfig, backendUrl, options, outputChannel) {
|
|
18
13
|
super();
|
|
19
14
|
this.initialized = false;
|
|
20
15
|
this.sessionId = null;
|
|
21
|
-
// Orchestrator mode
|
|
22
|
-
this.useOrchestrator = false;
|
|
23
16
|
this.debugMode = false;
|
|
24
17
|
// Use provided LLM provider or default to backend proxy (backward compatible)
|
|
25
18
|
const actualLLMProvider = llmProvider || new backend_proxy_llm_provider_1.BackendProxyLLMProvider(authConfig, backendUrl);
|
|
@@ -27,13 +20,9 @@ class ScenarioWorker extends events_1.EventEmitter {
|
|
|
27
20
|
this.fileHandler = fileHandler;
|
|
28
21
|
this.progressReporter = progressReporter;
|
|
29
22
|
this.outputChannel = outputChannel; // Set outputChannel for log routing
|
|
30
|
-
// Orchestrator setup
|
|
31
|
-
this.useOrchestrator = options?.useOrchestrator || false;
|
|
32
23
|
this.orchestratorConfig = options?.orchestratorConfig;
|
|
33
24
|
this.debugMode = options?.debugMode || false;
|
|
34
|
-
|
|
35
|
-
this.initializeOrchestrator();
|
|
36
|
-
}
|
|
25
|
+
this.initializeOrchestrator();
|
|
37
26
|
}
|
|
38
27
|
/**
|
|
39
28
|
* Initialize orchestrator mode with tools
|
|
@@ -69,12 +58,20 @@ class ScenarioWorker extends events_1.EventEmitter {
|
|
|
69
58
|
);
|
|
70
59
|
// Minimal initialization logging - internal details not needed by consumer
|
|
71
60
|
}
|
|
61
|
+
setLogger(logger) {
|
|
62
|
+
this.logger = logger;
|
|
63
|
+
}
|
|
72
64
|
log(message) {
|
|
73
65
|
// Let consumer add timestamps - just report the raw message
|
|
74
66
|
const formattedMessage = `[ScenarioWorker] ${message}`;
|
|
75
|
-
//
|
|
76
|
-
|
|
77
|
-
|
|
67
|
+
// Use logger if provided, otherwise fall back to console
|
|
68
|
+
if (this.logger) {
|
|
69
|
+
this.logger(formattedMessage, 'log');
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
console.log(formattedMessage);
|
|
73
|
+
}
|
|
74
|
+
// Also route to outputChannel if provided (for VS Code extension)
|
|
78
75
|
if (this.outputChannel) {
|
|
79
76
|
this.outputChannel.appendLine(formattedMessage);
|
|
80
77
|
}
|
|
@@ -82,26 +79,16 @@ class ScenarioWorker extends events_1.EventEmitter {
|
|
|
82
79
|
logError(message) {
|
|
83
80
|
// Let consumer add timestamps - just report the raw message
|
|
84
81
|
const formattedMessage = `[ScenarioWorker] ERROR: ${message}`;
|
|
85
|
-
//
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
if (this.outputChannel) {
|
|
89
|
-
this.outputChannel.appendLine(formattedMessage);
|
|
82
|
+
// Use logger if provided, otherwise fall back to console
|
|
83
|
+
if (this.logger) {
|
|
84
|
+
this.logger(formattedMessage, 'error');
|
|
90
85
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
* Capture screenshot as data URL
|
|
94
|
-
* Returns data:image/png;base64,... format
|
|
95
|
-
*/
|
|
96
|
-
async captureStepScreenshot(page) {
|
|
97
|
-
try {
|
|
98
|
-
const screenshot = await page.screenshot({ type: 'png' });
|
|
99
|
-
const base64 = screenshot.toString('base64');
|
|
100
|
-
return `data:image/png;base64,${base64}`;
|
|
86
|
+
else {
|
|
87
|
+
console.error(formattedMessage);
|
|
101
88
|
}
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
89
|
+
// Also route to outputChannel if provided (for VS Code extension)
|
|
90
|
+
if (this.outputChannel) {
|
|
91
|
+
this.outputChannel.appendLine(formattedMessage);
|
|
105
92
|
}
|
|
106
93
|
}
|
|
107
94
|
/**
|
|
@@ -126,36 +113,6 @@ class ScenarioWorker extends events_1.EventEmitter {
|
|
|
126
113
|
// Also log for visibility
|
|
127
114
|
this.progressReporter?.log?.(`Job ${progress.jobId}: ${progress.status}`);
|
|
128
115
|
}
|
|
129
|
-
/**
|
|
130
|
-
* Detect if a step is complex and benefits from proactive vision usage
|
|
131
|
-
* Complex steps: form filling, verification, navigation after actions
|
|
132
|
-
*/
|
|
133
|
-
isComplexStep(stepDescription) {
|
|
134
|
-
const description = stepDescription.toLowerCase();
|
|
135
|
-
// Verification steps - often need visual confirmation
|
|
136
|
-
if (description.includes('verify') || description.includes('check') ||
|
|
137
|
-
description.includes('confirm') || description.includes('ensure')) {
|
|
138
|
-
return true;
|
|
139
|
-
}
|
|
140
|
-
// Form-related steps - multiple fields, complex interactions
|
|
141
|
-
if (description.includes('fill') && (description.includes('form') || description.includes('field'))) {
|
|
142
|
-
return true;
|
|
143
|
-
}
|
|
144
|
-
if (description.includes('enter') && description.includes('information')) {
|
|
145
|
-
return true;
|
|
146
|
-
}
|
|
147
|
-
// Steps that typically follow navigation (page may still be loading)
|
|
148
|
-
if (description.includes('click') && (description.includes('menu') ||
|
|
149
|
-
description.includes('tab') ||
|
|
150
|
-
description.includes('link'))) {
|
|
151
|
-
return true;
|
|
152
|
-
}
|
|
153
|
-
// Multi-step actions indicated by "and" or commas
|
|
154
|
-
if (description.includes(' and ') || description.split(',').length > 1) {
|
|
155
|
-
return true;
|
|
156
|
-
}
|
|
157
|
-
return false;
|
|
158
|
-
}
|
|
159
116
|
async initialize() {
|
|
160
117
|
try {
|
|
161
118
|
this.sessionId = `scenario_worker_${Date.now()}`;
|
|
@@ -250,459 +207,116 @@ class ScenarioWorker extends events_1.EventEmitter {
|
|
|
250
207
|
this.log(`📚 Starting with existing learnings: ${screenCount} screens, ${patternCount} UX patterns`);
|
|
251
208
|
}
|
|
252
209
|
// 3a. ORCHESTRATOR MODE - Use orchestrator agent for execution
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
// Mark current and remaining steps as skipped
|
|
269
|
-
for (let j = i; j < steps.length; j++) {
|
|
270
|
-
steps[j].success = false;
|
|
271
|
-
steps[j].error = 'Cancelled by user';
|
|
272
|
-
steps[j].playwrightCommands = [];
|
|
273
|
-
}
|
|
274
|
-
break; // Exit loop
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
// Only stop if consecutive failures exceed limit AND continueOnFailure is false
|
|
278
|
-
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES && !CONTINUE_ON_FAILURE) {
|
|
279
|
-
this.log(`🛑 Stopping execution: ${consecutiveFailures} consecutive failures`);
|
|
280
|
-
// Mark remaining steps as skipped
|
|
210
|
+
this.log(`🤖 Using Orchestrator Mode (continueOnFailure: ${CONTINUE_ON_FAILURE})`);
|
|
211
|
+
// Initialize journey memory
|
|
212
|
+
const memory = {
|
|
213
|
+
history: [],
|
|
214
|
+
extractedData: {}
|
|
215
|
+
};
|
|
216
|
+
// Execute steps using orchestrator
|
|
217
|
+
for (let i = 0; i < steps.length; i++) {
|
|
218
|
+
// Check if job was cancelled by user
|
|
219
|
+
if (this.progressReporter?.shouldContinue) {
|
|
220
|
+
const shouldContinue = await this.progressReporter.shouldContinue(job.id);
|
|
221
|
+
if (!shouldContinue) {
|
|
222
|
+
this.log(`🛑 Job ${job.id} cancelled by user - aborting execution`);
|
|
223
|
+
overallSuccess = false;
|
|
224
|
+
// Mark current and remaining steps as skipped
|
|
281
225
|
for (let j = i; j < steps.length; j++) {
|
|
282
226
|
steps[j].success = false;
|
|
283
|
-
steps[j].error =
|
|
227
|
+
steps[j].error = 'Cancelled by user';
|
|
284
228
|
steps[j].playwrightCommands = [];
|
|
285
229
|
}
|
|
286
|
-
|
|
287
|
-
break;
|
|
288
|
-
}
|
|
289
|
-
// Warn if approaching limit (even with continueOnFailure)
|
|
290
|
-
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES && CONTINUE_ON_FAILURE) {
|
|
291
|
-
this.log(`⚠️ ${consecutiveFailures} consecutive failures - continuing but scenario may have issues`);
|
|
292
|
-
}
|
|
293
|
-
const step = steps[i];
|
|
294
|
-
step.stepNumber = i + 1;
|
|
295
|
-
try {
|
|
296
|
-
// LIFECYCLE: Call beforeStepStart if provided
|
|
297
|
-
if (this.progressReporter?.beforeStepStart) {
|
|
298
|
-
await this.progressReporter.beforeStepStart({
|
|
299
|
-
stepNumber: step.stepNumber,
|
|
300
|
-
description: step.description
|
|
301
|
-
}, page);
|
|
302
|
-
}
|
|
303
|
-
// Use orchestrator to execute this step
|
|
304
|
-
// Pass accumulated site learnings so agent can build upon them
|
|
305
|
-
const result = await this.orchestratorAgent.executeStep(page, step.description, step.stepNumber, steps.length, steps.map(s => s.description), memory, job.id, undefined, // priorSteps
|
|
306
|
-
undefined, // nextSteps
|
|
307
|
-
undefined, // successfulCommandsInStep
|
|
308
|
-
undefined, // failingCommand
|
|
309
|
-
undefined, // remainingCommandsInStep
|
|
310
|
-
siteLearnings // Pass accumulated learnings
|
|
311
|
-
);
|
|
312
|
-
// Update step with result
|
|
313
|
-
step.success = result.success;
|
|
314
|
-
step.playwrightCommands = result.commands;
|
|
315
|
-
step.error = result.error;
|
|
316
|
-
// Merge site learnings from this step
|
|
317
|
-
if (result.siteLearnings) {
|
|
318
|
-
siteLearnings = this.mergeSiteLearningsSimple(siteLearnings, result.siteLearnings);
|
|
319
|
-
}
|
|
320
|
-
if (result.success) {
|
|
321
|
-
this.log(`✓ Step ${step.stepNumber} completed via orchestrator (${result.iterations} iterations, ${result.commands.length} commands)`);
|
|
322
|
-
consecutiveFailures = 0;
|
|
323
|
-
}
|
|
324
|
-
else {
|
|
325
|
-
this.log(`✗ Step ${step.stepNumber} failed via orchestrator: ${result.terminationReason}`);
|
|
326
|
-
this.log(` Reason: ${result.error || 'No error message'}`);
|
|
327
|
-
this.log(` Commands executed: ${result.commands.length}`);
|
|
328
|
-
consecutiveFailures++;
|
|
329
|
-
overallSuccess = false;
|
|
330
|
-
// CRITICAL: Stop on agent_stuck or infeasible (explicit agent decision)
|
|
331
|
-
// continueOnStepFailure only applies to command failures, not agent decisions
|
|
332
|
-
if (result.terminationReason === 'agent_stuck' || result.terminationReason === 'infeasible') {
|
|
333
|
-
this.log(`🛑 Stopping: Agent declared step ${result.terminationReason} - cannot continue`);
|
|
334
|
-
// Mark remaining steps as skipped
|
|
335
|
-
for (let j = i + 1; j < steps.length; j++) {
|
|
336
|
-
steps[j].success = false;
|
|
337
|
-
steps[j].error = `Skipped: Previous step was ${result.terminationReason}`;
|
|
338
|
-
steps[j].playwrightCommands = [];
|
|
339
|
-
}
|
|
340
|
-
break; // Exit loop
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
// REPORT FINAL STEP RESULT (after orchestrator completes all iterations)
|
|
344
|
-
// This gives the complete accumulated commands, not just one iteration
|
|
345
|
-
await this.reportStepProgress({
|
|
346
|
-
jobId: job.id,
|
|
347
|
-
stepNumber: step.stepNumber,
|
|
348
|
-
description: step.description,
|
|
349
|
-
code: step.playwrightCommands?.join('\n') || '', // All accumulated commands
|
|
350
|
-
status: step.success ? progress_reporter_1.StepExecutionStatus.SUCCESS : progress_reporter_1.StepExecutionStatus.FAILURE,
|
|
351
|
-
error: step.error,
|
|
352
|
-
agentIteration: result.iterations
|
|
353
|
-
});
|
|
354
|
-
}
|
|
355
|
-
catch (error) {
|
|
356
|
-
this.logError(`Orchestrator execution failed for step ${step.stepNumber}: ${error.message}`);
|
|
357
|
-
step.success = false;
|
|
358
|
-
step.error = error.message;
|
|
359
|
-
consecutiveFailures++;
|
|
360
|
-
overallSuccess = false;
|
|
230
|
+
break; // Exit loop
|
|
361
231
|
}
|
|
362
|
-
previousSteps.push(step);
|
|
363
232
|
}
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
this.log(` Remaining ${steps.length - i} steps will be skipped to avoid wasting resources`);
|
|
373
|
-
// Emit log events about early termination
|
|
374
|
-
this.emit('log', job.id, `\n🛑 EARLY TERMINATION\n`);
|
|
375
|
-
this.emit('log', job.id, `Reason: ${consecutiveFailures} consecutive step failures\n`);
|
|
376
|
-
this.emit('log', job.id, `Steps attempted: ${i}\n`);
|
|
377
|
-
this.emit('log', job.id, `Steps skipped: ${steps.length - i}\n\n`);
|
|
378
|
-
// Mark remaining steps as skipped
|
|
379
|
-
for (let j = i; j < steps.length; j++) {
|
|
380
|
-
const skippedStep = steps[j];
|
|
381
|
-
skippedStep.stepNumber = j + 1;
|
|
382
|
-
skippedStep.success = false;
|
|
383
|
-
skippedStep.error = `Skipped due to ${consecutiveFailures} consecutive failures in previous steps`;
|
|
384
|
-
skippedStep.playwrightCommands = [];
|
|
385
|
-
previousSteps.push(skippedStep);
|
|
386
|
-
}
|
|
387
|
-
overallSuccess = false;
|
|
388
|
-
break; // Exit the loop
|
|
233
|
+
// Only stop if consecutive failures exceed limit AND continueOnFailure is false
|
|
234
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES && !CONTINUE_ON_FAILURE) {
|
|
235
|
+
this.log(`🛑 Stopping execution: ${consecutiveFailures} consecutive failures`);
|
|
236
|
+
// Mark remaining steps as skipped
|
|
237
|
+
for (let j = i; j < steps.length; j++) {
|
|
238
|
+
steps[j].success = false;
|
|
239
|
+
steps[j].error = `Skipped due to ${consecutiveFailures} consecutive failures`;
|
|
240
|
+
steps[j].playwrightCommands = [];
|
|
389
241
|
}
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
let subActionSuccess = false;
|
|
407
|
-
let subActionCommand;
|
|
408
|
-
let subActionError;
|
|
409
|
-
let subActionRetries = 0;
|
|
410
|
-
let usedVisionMode = false;
|
|
411
|
-
// Build context about what's been done so far in this step
|
|
412
|
-
const stepContext = step.subActions && step.subActions.length > 0
|
|
413
|
-
? `\nSub-actions completed so far for this step:\n${step.subActions.map((sa, idx) => ` ${idx + 1}. ${sa.command} - ${sa.success ? 'SUCCESS' : 'FAILED'}`).join('\n')}`
|
|
414
|
-
: '';
|
|
415
|
-
for (let attempt = 0; attempt <= MAX_RETRIES_PER_STEP; attempt++) {
|
|
416
|
-
// Check if we've exceeded failed attempts budget BEFORE attempting
|
|
417
|
-
if (totalFailedAttemptsForStep >= MAX_FAILED_ATTEMPTS_PER_STEP) {
|
|
418
|
-
this.log(` ⚠️ Exceeded failed attempts budget (${MAX_FAILED_ATTEMPTS_PER_STEP}) for this step`);
|
|
419
|
-
stepComplete = true;
|
|
420
|
-
stepSuccess = false;
|
|
421
|
-
stepError = `Exceeded maximum failed attempts (${MAX_FAILED_ATTEMPTS_PER_STEP}) for step`;
|
|
422
|
-
break;
|
|
423
|
-
}
|
|
424
|
-
let currentAttemptCommand;
|
|
425
|
-
let currentAttemptSuccess = false;
|
|
426
|
-
let currentAttemptError;
|
|
427
|
-
const attemptTimestamp = Date.now();
|
|
428
|
-
try {
|
|
429
|
-
this.log(`Step ${step.stepNumber} - Sub-action ${subActionCount + 1}, Attempt ${attempt + 1}: ${step.description}`);
|
|
430
|
-
// Get current page state - handle navigation in progress
|
|
431
|
-
let domSnapshot;
|
|
432
|
-
let pageInfo;
|
|
433
|
-
try {
|
|
434
|
-
domSnapshot = {
|
|
435
|
-
url: page.url(),
|
|
436
|
-
title: await page.title(),
|
|
437
|
-
accessibilityTree: await page.accessibility.snapshot()
|
|
438
|
-
};
|
|
439
|
-
pageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(domSnapshot);
|
|
440
|
-
}
|
|
441
|
-
catch (contextError) {
|
|
442
|
-
// If execution context was destroyed (navigation in progress), wait and retry
|
|
443
|
-
if (contextError.message && contextError.message.includes('Execution context was destroyed')) {
|
|
444
|
-
this.log(` ⏳ Navigation in progress, waiting for page to load...`);
|
|
445
|
-
await page.waitForLoadState('domcontentloaded', { timeout: 5000 }).catch(() => { });
|
|
446
|
-
// Retry page state capture
|
|
447
|
-
domSnapshot = {
|
|
448
|
-
url: page.url(),
|
|
449
|
-
title: await page.title(),
|
|
450
|
-
accessibilityTree: await page.accessibility.snapshot()
|
|
451
|
-
};
|
|
452
|
-
pageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(domSnapshot);
|
|
453
|
-
}
|
|
454
|
-
else {
|
|
455
|
-
throw contextError; // Re-throw if it's not a navigation issue
|
|
456
|
-
}
|
|
457
|
-
}
|
|
458
|
-
// Vision trigger: Liberal usage since gpt-5-mini vision is cost-effective
|
|
459
|
-
const modelToUse = job.model || model_constants_1.DEFAULT_MODEL;
|
|
460
|
-
let command;
|
|
461
|
-
// Enhanced logging for vision trigger logic
|
|
462
|
-
this.log(` 🔍 Vision trigger check: subAction=${subActionCount + 1}, attempt=${attempt}, totalFailed=${totalFailedAttemptsForStep}, usedVision=${usedVisionMode}`);
|
|
463
|
-
// Liberal vision strategy (gpt-5-mini is cost-effective):
|
|
464
|
-
// 1. After ANY failure (1+) → use vision
|
|
465
|
-
// 2. Complex steps → use vision from attempt 1
|
|
466
|
-
// 3. No LLM assessment gate → go directly to vision
|
|
467
|
-
const hasFailure = totalFailedAttemptsForStep >= 1 && lastError;
|
|
468
|
-
const shouldUseProactiveVision = isComplexStep && attempt === 0; // First attempt for complex steps
|
|
469
|
-
const shouldUseVision = (hasFailure || shouldUseProactiveVision) && !usedVisionMode;
|
|
470
|
-
if (shouldUseVision) {
|
|
471
|
-
if (shouldUseProactiveVision) {
|
|
472
|
-
this.log(` 🎯 PROACTIVE VISION: Complex step detected, using vision from first attempt`);
|
|
473
|
-
}
|
|
474
|
-
else {
|
|
475
|
-
this.log(` 🎯 VISION TRIGGER: ${totalFailedAttemptsForStep} failure(s) detected, using vision (no LLM gate)`);
|
|
476
|
-
}
|
|
477
|
-
// Two-step supervisor pattern:
|
|
478
|
-
// 1. Supervisor analyzes screenshot and provides instructions
|
|
479
|
-
// 2. Worker generates command based on those instructions
|
|
480
|
-
this.log(` 📸 Taking screenshot for supervisor analysis...`);
|
|
481
|
-
// Capture optimized screenshot using utility method
|
|
482
|
-
const imageDataUrl = await (0, browser_utils_1.captureOptimizedScreenshot)(page, { timeout: 10000 }, // Uses default quality 60
|
|
483
|
-
(msg) => this.log(msg));
|
|
484
|
-
this.log(` 👔 STEP 1: Supervisor analyzing screenshot (${model_constants_1.VISION_MODEL})...`);
|
|
485
|
-
const supervisorDiagnostics = await this.llmFacade.getVisionDiagnostics(step.description + stepContext, pageInfo, previousSteps, lastError, imageDataUrl, model_constants_1.VISION_MODEL);
|
|
486
|
-
// DEBUG: Log vision diagnostics
|
|
487
|
-
this.log(` 📸 Visual insights: ${supervisorDiagnostics.visualAnalysis}`);
|
|
488
|
-
this.log(` 🔍 Root cause: ${supervisorDiagnostics.rootCause}`);
|
|
489
|
-
this.log(` 💡 Recommended approach: ${supervisorDiagnostics.recommendedApproach}`);
|
|
490
|
-
if (supervisorDiagnostics.elementsFound.length > 0) {
|
|
491
|
-
this.log(` ✅ Elements found: ${supervisorDiagnostics.elementsFound.join(', ')}`);
|
|
492
|
-
}
|
|
493
|
-
if (supervisorDiagnostics.elementsNotFound.length > 0) {
|
|
494
|
-
this.log(` ❌ Elements not found: ${supervisorDiagnostics.elementsNotFound.join(', ')}`);
|
|
495
|
-
}
|
|
496
|
-
this.log(` 🔨 STEP 2: Worker generating command from supervisor instructions (${model_constants_1.DEFAULT_MODEL})...`);
|
|
497
|
-
command = await this.llmFacade.generateCommandFromSupervisorInstructions(step.description + stepContext, supervisorDiagnostics, pageInfo, modelToUse // Cheaper model for command generation
|
|
498
|
-
);
|
|
499
|
-
usedVisionMode = true;
|
|
500
|
-
}
|
|
501
|
-
else {
|
|
502
|
-
// Not using vision - use regular DOM-based approach
|
|
503
|
-
if (usedVisionMode) {
|
|
504
|
-
this.log(` 📝 Vision already used - using DOM-based approach`);
|
|
505
|
-
}
|
|
506
|
-
else if (isComplexStep) {
|
|
507
|
-
this.log(` 📝 Complex step, but first attempt - using DOM-based approach (vision on retry)`);
|
|
508
|
-
}
|
|
509
|
-
else {
|
|
510
|
-
this.log(` 📝 Using DOM-based approach (${totalFailedAttemptsForStep} failures so far)`);
|
|
511
|
-
}
|
|
512
|
-
const stepDescriptionWithContext = step.description + stepContext;
|
|
513
|
-
command = await this.llmFacade.generatePlaywrightCommand(stepDescriptionWithContext, pageInfo, previousSteps, lastError, step, modelToUse);
|
|
514
|
-
}
|
|
515
|
-
if (!command) {
|
|
516
|
-
throw new Error('LLM failed to generate a Playwright command.');
|
|
517
|
-
}
|
|
518
|
-
currentAttemptCommand = command;
|
|
519
|
-
this.log(` Command: ${command}`);
|
|
520
|
-
// Execute the command
|
|
521
|
-
await this.executePlaywrightCommand(page, browser, context, command);
|
|
522
|
-
// Success
|
|
523
|
-
subActionSuccess = true;
|
|
524
|
-
currentAttemptSuccess = true;
|
|
525
|
-
subActionCommand = command;
|
|
526
|
-
step.playwrightCommands.push(command);
|
|
527
|
-
this.log(` ✅ SUCCESS: ${command}${usedVisionMode ? ' (vision-aided)' : ''}`);
|
|
528
|
-
// Wait a bit for any navigation that might have been triggered
|
|
529
|
-
// This prevents "Execution context destroyed" errors when checking goal completion
|
|
530
|
-
await page.waitForLoadState('domcontentloaded', { timeout: 3000 }).catch(() => {
|
|
531
|
-
// Ignore timeout - page might not be navigating
|
|
532
|
-
});
|
|
533
|
-
break; // Sub-action successful, check if step is complete
|
|
534
|
-
}
|
|
535
|
-
catch (error) {
|
|
536
|
-
subActionError = error instanceof Error ? error.message : String(error);
|
|
537
|
-
currentAttemptError = subActionError;
|
|
538
|
-
// Get current URL for context (especially useful for navigation failures)
|
|
539
|
-
let currentUrl = 'unknown';
|
|
540
|
-
try {
|
|
541
|
-
currentUrl = page.url();
|
|
542
|
-
}
|
|
543
|
-
catch (e) {
|
|
544
|
-
// Ignore if we can't get URL
|
|
545
|
-
}
|
|
546
|
-
// Enhanced error message with current URL
|
|
547
|
-
const errorWithContext = `${subActionError} | Current URL: ${currentUrl}`;
|
|
548
|
-
this.logError(` ❌ FAILED (attempt ${attempt + 1}): ${subActionError}`);
|
|
549
|
-
this.logError(` Current URL: ${currentUrl}`);
|
|
550
|
-
this.logError(` Command attempted: ${currentAttemptCommand || 'N/A'}`);
|
|
551
|
-
subActionRetries++;
|
|
552
|
-
totalFailedAttemptsForStep++; // Increment failed attempts counter
|
|
553
|
-
// Only update lastError if this is the final attempt
|
|
554
|
-
if (attempt === MAX_RETRIES_PER_STEP) {
|
|
555
|
-
lastError = errorWithContext; // Include URL in error context for LLM
|
|
556
|
-
}
|
|
557
|
-
// If this is the last attempt, mark sub-action as failed
|
|
558
|
-
if (attempt === MAX_RETRIES_PER_STEP) {
|
|
559
|
-
subActionSuccess = false;
|
|
560
|
-
subActionCommand = currentAttemptCommand;
|
|
561
|
-
this.logError(` 🚫 SUB-ACTION FAILED after ${MAX_RETRIES_PER_STEP + 1} attempts (including vision mode if used)`);
|
|
562
|
-
break; // Exit retry loop
|
|
563
|
-
}
|
|
564
|
-
}
|
|
565
|
-
finally {
|
|
566
|
-
if (!step.attempts) {
|
|
567
|
-
step.attempts = [];
|
|
568
|
-
}
|
|
569
|
-
step.attempts.push({
|
|
570
|
-
attemptNumber: attempt + 1,
|
|
571
|
-
command: currentAttemptCommand,
|
|
572
|
-
success: currentAttemptSuccess,
|
|
573
|
-
error: currentAttemptError,
|
|
574
|
-
timestamp: attemptTimestamp
|
|
575
|
-
});
|
|
576
|
-
}
|
|
577
|
-
}
|
|
578
|
-
// Record the sub-action
|
|
579
|
-
if (subActionCommand) {
|
|
580
|
-
step.subActions.push({
|
|
581
|
-
command: subActionCommand,
|
|
582
|
-
success: subActionSuccess,
|
|
583
|
-
error: subActionError,
|
|
584
|
-
retryCount: subActionRetries
|
|
585
|
-
});
|
|
586
|
-
}
|
|
587
|
-
subActionCount++;
|
|
588
|
-
// Determine if step (goal) is complete
|
|
589
|
-
if (subActionSuccess) {
|
|
590
|
-
// After each successful sub-action, ask LLM if goal is complete
|
|
591
|
-
if (subActionCount >= MAX_SUBACTIONS_PER_STEP) {
|
|
592
|
-
// Safety limit - avoid infinite loops
|
|
593
|
-
stepComplete = true;
|
|
594
|
-
stepSuccess = true;
|
|
595
|
-
this.log(` ⚠️ Reached max sub-actions limit (${MAX_SUBACTIONS_PER_STEP}) with ${totalFailedAttemptsForStep} failed attempts, considering step complete`);
|
|
596
|
-
}
|
|
597
|
-
else {
|
|
598
|
-
// Ask LLM if goal is complete
|
|
599
|
-
try {
|
|
600
|
-
// Capture page state - if navigation is still happening, retry once
|
|
601
|
-
let domSnapshot;
|
|
602
|
-
let pageInfo;
|
|
603
|
-
try {
|
|
604
|
-
domSnapshot = {
|
|
605
|
-
url: page.url(),
|
|
606
|
-
title: await page.title(),
|
|
607
|
-
accessibilityTree: await page.accessibility.snapshot()
|
|
608
|
-
};
|
|
609
|
-
pageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(domSnapshot);
|
|
610
|
-
}
|
|
611
|
-
catch (contextError) {
|
|
612
|
-
// If execution context was destroyed (navigation in progress), wait and retry
|
|
613
|
-
if (contextError.message && contextError.message.includes('Execution context was destroyed')) {
|
|
614
|
-
this.log(` ⏳ Navigation detected, waiting for page to load...`);
|
|
615
|
-
await page.waitForLoadState('domcontentloaded', { timeout: 5000 }).catch(() => { });
|
|
616
|
-
// Retry page state capture
|
|
617
|
-
domSnapshot = {
|
|
618
|
-
url: page.url(),
|
|
619
|
-
title: await page.title(),
|
|
620
|
-
accessibilityTree: await page.accessibility.snapshot()
|
|
621
|
-
};
|
|
622
|
-
pageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(domSnapshot);
|
|
623
|
-
}
|
|
624
|
-
else {
|
|
625
|
-
throw contextError; // Re-throw if it's not a navigation issue
|
|
626
|
-
}
|
|
627
|
-
}
|
|
628
|
-
// Vision-backed goal completion for complex/verification steps
|
|
629
|
-
const shouldUseVisionForCompletion = isComplexStep && subActionCount >= 1; // At least one action done
|
|
630
|
-
let completionCheck;
|
|
631
|
-
if (shouldUseVisionForCompletion) {
|
|
632
|
-
this.log(` 🎯 Vision-backed goal completion check (complex step)`);
|
|
633
|
-
// Capture screenshot for visual verification
|
|
634
|
-
const imageDataUrl = await (0, browser_utils_1.captureOptimizedScreenshot)(page, { timeout: 10000 }, (msg) => this.log(msg));
|
|
635
|
-
// Use vision model to check goal completion with visual context
|
|
636
|
-
completionCheck = await this.llmFacade.checkGoalCompletionWithVision(step.description, step.playwrightCommands || [], pageInfo, imageDataUrl, model_constants_1.VISION_MODEL);
|
|
637
|
-
}
|
|
638
|
-
else {
|
|
639
|
-
// Regular DOM-based goal completion check
|
|
640
|
-
completionCheck = await this.llmFacade.checkGoalCompletion(step.description, step.playwrightCommands || [], pageInfo, job.model || model_constants_1.DEFAULT_MODEL);
|
|
641
|
-
}
|
|
642
|
-
this.log(` 🎯 Goal completion check: ${completionCheck.isComplete ? 'COMPLETE' : 'INCOMPLETE'} - ${completionCheck.reason}`);
|
|
643
|
-
if (completionCheck.isComplete) {
|
|
644
|
-
stepComplete = true;
|
|
645
|
-
stepSuccess = true;
|
|
646
|
-
}
|
|
647
|
-
else {
|
|
648
|
-
// Continue with next sub-action
|
|
649
|
-
if (completionCheck.nextSubGoal) {
|
|
650
|
-
this.log(` 📍 Next sub-goal: ${completionCheck.nextSubGoal}`);
|
|
651
|
-
}
|
|
652
|
-
// Continue looping to generate next command
|
|
653
|
-
}
|
|
654
|
-
}
|
|
655
|
-
catch (error) {
|
|
656
|
-
this.logError(`Error checking goal completion: ${error}`);
|
|
657
|
-
// Fallback: consider complete after 1 successful sub-action if we can't check
|
|
658
|
-
stepComplete = true;
|
|
659
|
-
stepSuccess = true;
|
|
660
|
-
}
|
|
661
|
-
}
|
|
662
|
-
}
|
|
663
|
-
else {
|
|
664
|
-
// Sub-action failed
|
|
665
|
-
stepComplete = true; // Move on after failure
|
|
666
|
-
stepSuccess = false;
|
|
667
|
-
stepError = subActionError;
|
|
668
|
-
overallSuccess = false;
|
|
669
|
-
}
|
|
242
|
+
overallSuccess = false;
|
|
243
|
+
break;
|
|
244
|
+
}
|
|
245
|
+
// Warn if approaching limit (even with continueOnFailure)
|
|
246
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES && CONTINUE_ON_FAILURE) {
|
|
247
|
+
this.log(`⚠️ ${consecutiveFailures} consecutive failures - continuing but scenario may have issues`);
|
|
248
|
+
}
|
|
249
|
+
const step = steps[i];
|
|
250
|
+
step.stepNumber = i + 1;
|
|
251
|
+
try {
|
|
252
|
+
// LIFECYCLE: Call beforeStepStart if provided
|
|
253
|
+
if (this.progressReporter?.beforeStepStart) {
|
|
254
|
+
await this.progressReporter.beforeStepStart({
|
|
255
|
+
stepNumber: step.stepNumber,
|
|
256
|
+
description: step.description
|
|
257
|
+
}, page);
|
|
670
258
|
}
|
|
671
|
-
//
|
|
672
|
-
|
|
673
|
-
|
|
259
|
+
// Use orchestrator to execute this step
|
|
260
|
+
// Pass accumulated site learnings so agent can build upon them
|
|
261
|
+
const result = await this.orchestratorAgent.executeStep(page, step.description, step.stepNumber, steps.length, steps.map(s => s.description), memory, job.id, undefined, // priorSteps
|
|
262
|
+
undefined, // nextSteps
|
|
263
|
+
undefined, // successfulCommandsInStep
|
|
264
|
+
undefined, // failingCommand
|
|
265
|
+
undefined, // remainingCommandsInStep
|
|
266
|
+
siteLearnings // Pass accumulated learnings
|
|
267
|
+
);
|
|
268
|
+
// Update step with result
|
|
269
|
+
step.success = result.success;
|
|
270
|
+
step.playwrightCommands = result.commands;
|
|
271
|
+
step.error = result.error;
|
|
272
|
+
// Merge site learnings from this step
|
|
273
|
+
if (result.siteLearnings) {
|
|
274
|
+
siteLearnings = this.mergeSiteLearningsSimple(siteLearnings, result.siteLearnings);
|
|
674
275
|
}
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
// Update consecutive failure counter
|
|
679
|
-
if (stepSuccess) {
|
|
680
|
-
consecutiveFailures = 0; // Reset on success
|
|
276
|
+
if (result.success) {
|
|
277
|
+
this.log(`✓ Step ${step.stepNumber} completed via orchestrator (${result.iterations} iterations, ${result.commands.length} commands)`);
|
|
278
|
+
consecutiveFailures = 0;
|
|
681
279
|
}
|
|
682
280
|
else {
|
|
281
|
+
this.log(`✗ Step ${step.stepNumber} failed via orchestrator: ${result.terminationReason}`);
|
|
282
|
+
this.log(` Reason: ${result.error || 'No error message'}`);
|
|
283
|
+
this.log(` Commands executed: ${result.commands.length}`);
|
|
683
284
|
consecutiveFailures++;
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
if (stepError) {
|
|
698
|
-
this.emit('log', job.id, `Error: ${stepError}\n`);
|
|
699
|
-
}
|
|
700
|
-
if (step.attempts && step.attempts.length > 0) {
|
|
701
|
-
this.emit('log', job.id, `Total attempts: ${step.attempts.length}\n`);
|
|
285
|
+
overallSuccess = false;
|
|
286
|
+
// CRITICAL: Stop on agent_stuck or infeasible (explicit agent decision)
|
|
287
|
+
// continueOnStepFailure only applies to command failures, not agent decisions
|
|
288
|
+
if (result.terminationReason === 'agent_stuck' || result.terminationReason === 'infeasible') {
|
|
289
|
+
this.log(`🛑 Stopping: Agent declared step ${result.terminationReason} - cannot continue`);
|
|
290
|
+
// Mark remaining steps as skipped
|
|
291
|
+
for (let j = i + 1; j < steps.length; j++) {
|
|
292
|
+
steps[j].success = false;
|
|
293
|
+
steps[j].error = `Skipped: Previous step was ${result.terminationReason}`;
|
|
294
|
+
steps[j].playwrightCommands = [];
|
|
295
|
+
}
|
|
296
|
+
break; // Exit loop
|
|
297
|
+
}
|
|
702
298
|
}
|
|
703
|
-
|
|
299
|
+
// REPORT FINAL STEP RESULT (after orchestrator completes all iterations)
|
|
300
|
+
// This gives the complete accumulated commands, not just one iteration
|
|
301
|
+
await this.reportStepProgress({
|
|
302
|
+
jobId: job.id,
|
|
303
|
+
stepNumber: step.stepNumber,
|
|
304
|
+
description: step.description,
|
|
305
|
+
code: step.playwrightCommands?.join('\n') || '', // All accumulated commands
|
|
306
|
+
status: step.success ? progress_reporter_1.StepExecutionStatus.SUCCESS : progress_reporter_1.StepExecutionStatus.FAILURE,
|
|
307
|
+
error: step.error,
|
|
308
|
+
agentIteration: result.iterations
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
catch (error) {
|
|
312
|
+
this.logError(`Orchestrator execution failed for step ${step.stepNumber}: ${error.message}`);
|
|
313
|
+
step.success = false;
|
|
314
|
+
step.error = error.message;
|
|
315
|
+
consecutiveFailures++;
|
|
316
|
+
overallSuccess = false;
|
|
704
317
|
}
|
|
705
|
-
|
|
318
|
+
previousSteps.push(step);
|
|
319
|
+
}
|
|
706
320
|
// Generate test name if not provided
|
|
707
321
|
const testName = job.testName || await this.llmFacade.generateTestName(job.scenario, job.model);
|
|
708
322
|
// Generate hashtags for semantic grouping
|
|
@@ -848,7 +462,7 @@ class ScenarioWorker extends events_1.EventEmitter {
|
|
|
848
462
|
executionTime: Date.now() - startTime,
|
|
849
463
|
testName,
|
|
850
464
|
preferredFileName,
|
|
851
|
-
siteLearnings:
|
|
465
|
+
siteLearnings: siteLearnings // Orchestrator always enabled - always return learnings
|
|
852
466
|
};
|
|
853
467
|
}
|
|
854
468
|
catch (error) {
|
|
@@ -877,8 +491,8 @@ class ScenarioWorker extends events_1.EventEmitter {
|
|
|
877
491
|
// LIFECYCLE: Call afterEndTest if provided
|
|
878
492
|
if (browser && this.progressReporter?.afterEndTest) {
|
|
879
493
|
try {
|
|
880
|
-
await this.progressReporter.afterEndTest(overallSuccess ? 'passed' : 'failed', overallSuccess ? undefined : 'Test execution had failures', page,
|
|
881
|
-
this.
|
|
494
|
+
await this.progressReporter.afterEndTest(overallSuccess ? 'passed' : 'failed', overallSuccess ? undefined : 'Test execution had failures', page, siteLearnings ?? undefined, // Pass siteLearnings here
|
|
495
|
+
this.orchestratorAgent.getDebugStats() ?? undefined // Pass debugStats here
|
|
882
496
|
);
|
|
883
497
|
}
|
|
884
498
|
catch (callbackError) {
|
|
@@ -892,31 +506,6 @@ class ScenarioWorker extends events_1.EventEmitter {
|
|
|
892
506
|
}
|
|
893
507
|
}
|
|
894
508
|
}
|
|
895
|
-
async executePlaywrightCommand(page, browser, context, command) {
|
|
896
|
-
// Detect if command contains navigation or load state operations that need longer timeout
|
|
897
|
-
const needsLongerTimeout = command.includes('waitForLoadState') ||
|
|
898
|
-
command.includes('goto(') ||
|
|
899
|
-
command.includes('waitForURL') ||
|
|
900
|
-
command.includes('waitForNavigation');
|
|
901
|
-
// Use appropriate timeout based on operation type
|
|
902
|
-
const timeout = needsLongerTimeout ? 30000 : 5000;
|
|
903
|
-
page.setDefaultTimeout(timeout);
|
|
904
|
-
try {
|
|
905
|
-
// Execute command directly without validation
|
|
906
|
-
const commandFunction = new Function('page', 'browser', 'context', 'expect', `
|
|
907
|
-
return (async () => {
|
|
908
|
-
${command}
|
|
909
|
-
})();
|
|
910
|
-
`);
|
|
911
|
-
// Dynamically import expect
|
|
912
|
-
const { expect } = require('@playwright/test');
|
|
913
|
-
await commandFunction(page, browser, context, expect);
|
|
914
|
-
}
|
|
915
|
-
finally {
|
|
916
|
-
// Reset to default timeout for element operations
|
|
917
|
-
page.setDefaultTimeout(5000);
|
|
918
|
-
}
|
|
919
|
-
}
|
|
920
509
|
/**
|
|
921
510
|
* Simple merge of site learnings (accumulate across steps)
|
|
922
511
|
*/
|
|
@@ -955,9 +544,6 @@ class ScenarioWorker extends events_1.EventEmitter {
|
|
|
955
544
|
* Execute exploration mode using orchestrator
|
|
956
545
|
*/
|
|
957
546
|
async executeExploration(page, explorationConfig, jobId, existingSiteLearnings) {
|
|
958
|
-
if (!this.useOrchestrator || !this.orchestratorAgent) {
|
|
959
|
-
throw new Error('Orchestrator not available - exploration mode requires orchestrator');
|
|
960
|
-
}
|
|
961
547
|
// Execute exploration via orchestrator
|
|
962
548
|
return this.orchestratorAgent.executeExploration(page, explorationConfig, jobId, existingSiteLearnings);
|
|
963
549
|
}
|