testchimp-runner-core 0.0.42 → 0.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/execution-service.d.ts.map +1 -1
  2. package/dist/execution-service.js +2 -1
  3. package/dist/execution-service.js.map +1 -1
  4. package/dist/index.d.ts +0 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js.map +1 -1
  7. package/dist/llm-facade.d.ts.map +1 -1
  8. package/dist/llm-facade.js +15 -13
  9. package/dist/llm-facade.js.map +1 -1
  10. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  11. package/dist/orchestrator/orchestrator-agent.js +54 -32
  12. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  13. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  14. package/dist/orchestrator/orchestrator-prompts.js +3 -0
  15. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  16. package/dist/progress-reporter.d.ts +7 -0
  17. package/dist/progress-reporter.d.ts.map +1 -1
  18. package/dist/prompts.d.ts.map +1 -1
  19. package/dist/prompts.js +27 -10
  20. package/dist/prompts.js.map +1 -1
  21. package/dist/scenario-service.d.ts +1 -3
  22. package/dist/scenario-service.d.ts.map +1 -1
  23. package/dist/scenario-service.js +7 -6
  24. package/dist/scenario-service.js.map +1 -1
  25. package/dist/scenario-worker-class.d.ts +2 -13
  26. package/dist/scenario-worker-class.d.ts.map +1 -1
  27. package/dist/scenario-worker-class.js +119 -518
  28. package/dist/scenario-worker-class.js.map +1 -1
  29. package/dist/utils/page-info-retry.d.ts.map +1 -1
  30. package/dist/utils/page-info-retry.js +3 -7
  31. package/dist/utils/page-info-retry.js.map +1 -1
  32. package/dist/utils/page-info-utils.js +3 -8
  33. package/dist/utils/page-info-utils.js.map +1 -1
  34. package/package.json +1 -1
@@ -2,24 +2,17 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.ScenarioWorker = void 0;
4
4
  const events_1 = require("events");
5
- const page_info_utils_1 = require("./utils/page-info-utils");
6
5
  const browser_utils_1 = require("./utils/browser-utils");
7
6
  const llm_facade_1 = require("./llm-facade");
8
7
  const script_utils_1 = require("./script-utils");
9
- const model_constants_1 = require("./model-constants");
10
8
  const progress_reporter_1 = require("./progress-reporter");
11
9
  const backend_proxy_llm_provider_1 = require("./providers/backend-proxy-llm-provider");
12
10
  const orchestrator_1 = require("./orchestrator");
13
- const MAX_RETRIES_PER_STEP = 3; // 4 total attempts per sub-action: 3 DOM-only, then 1 potential vision attempt
14
- const MAX_SUBACTIONS_PER_STEP = 5; // Maximum sub-actions to attempt for a single step (reduced from 10 to prevent excessive retries)
15
- const MAX_FAILED_ATTEMPTS_PER_STEP = 12; // Hard limit on FAILED attempts per step across all sub-actions
16
11
  class ScenarioWorker extends events_1.EventEmitter {
17
12
  constructor(fileHandler, llmProvider, progressReporter, authConfig, backendUrl, options, outputChannel) {
18
13
  super();
19
14
  this.initialized = false;
20
15
  this.sessionId = null;
21
- // Orchestrator mode
22
- this.useOrchestrator = false;
23
16
  this.debugMode = false;
24
17
  // Use provided LLM provider or default to backend proxy (backward compatible)
25
18
  const actualLLMProvider = llmProvider || new backend_proxy_llm_provider_1.BackendProxyLLMProvider(authConfig, backendUrl);
@@ -27,13 +20,9 @@ class ScenarioWorker extends events_1.EventEmitter {
27
20
  this.fileHandler = fileHandler;
28
21
  this.progressReporter = progressReporter;
29
22
  this.outputChannel = outputChannel; // Set outputChannel for log routing
30
- // Orchestrator setup
31
- this.useOrchestrator = options?.useOrchestrator || false;
32
23
  this.orchestratorConfig = options?.orchestratorConfig;
33
24
  this.debugMode = options?.debugMode || false;
34
- if (this.useOrchestrator) {
35
- this.initializeOrchestrator();
36
- }
25
+ this.initializeOrchestrator();
37
26
  }
38
27
  /**
39
28
  * Initialize orchestrator mode with tools
@@ -69,12 +58,20 @@ class ScenarioWorker extends events_1.EventEmitter {
69
58
  );
70
59
  // Minimal initialization logging - internal details not needed by consumer
71
60
  }
61
+ setLogger(logger) {
62
+ this.logger = logger;
63
+ }
72
64
  log(message) {
73
65
  // Let consumer add timestamps - just report the raw message
74
66
  const formattedMessage = `[ScenarioWorker] ${message}`;
75
- // Always log to console for debug visibility
76
- console.log(formattedMessage);
77
- // Also route to outputChannel if provided
67
+ // Use logger if provided, otherwise fall back to console
68
+ if (this.logger) {
69
+ this.logger(formattedMessage, 'log');
70
+ }
71
+ else {
72
+ console.log(formattedMessage);
73
+ }
74
+ // Also route to outputChannel if provided (for VS Code extension)
78
75
  if (this.outputChannel) {
79
76
  this.outputChannel.appendLine(formattedMessage);
80
77
  }
@@ -82,26 +79,16 @@ class ScenarioWorker extends events_1.EventEmitter {
82
79
  logError(message) {
83
80
  // Let consumer add timestamps - just report the raw message
84
81
  const formattedMessage = `[ScenarioWorker] ERROR: ${message}`;
85
- // Always log to console for debug visibility
86
- console.error(formattedMessage);
87
- // Also route to outputChannel if provided
88
- if (this.outputChannel) {
89
- this.outputChannel.appendLine(formattedMessage);
82
+ // Use logger if provided, otherwise fall back to console
83
+ if (this.logger) {
84
+ this.logger(formattedMessage, 'error');
90
85
  }
91
- }
92
- /**
93
- * Capture screenshot as data URL
94
- * Returns data:image/png;base64,... format
95
- */
96
- async captureStepScreenshot(page) {
97
- try {
98
- const screenshot = await page.screenshot({ type: 'png' });
99
- const base64 = screenshot.toString('base64');
100
- return `data:image/png;base64,${base64}`;
86
+ else {
87
+ console.error(formattedMessage);
101
88
  }
102
- catch (error) {
103
- this.log(`Failed to capture screenshot: ${error}`);
104
- return undefined;
89
+ // Also route to outputChannel if provided (for VS Code extension)
90
+ if (this.outputChannel) {
91
+ this.outputChannel.appendLine(formattedMessage);
105
92
  }
106
93
  }
107
94
  /**
@@ -126,36 +113,6 @@ class ScenarioWorker extends events_1.EventEmitter {
126
113
  // Also log for visibility
127
114
  this.progressReporter?.log?.(`Job ${progress.jobId}: ${progress.status}`);
128
115
  }
129
- /**
130
- * Detect if a step is complex and benefits from proactive vision usage
131
- * Complex steps: form filling, verification, navigation after actions
132
- */
133
- isComplexStep(stepDescription) {
134
- const description = stepDescription.toLowerCase();
135
- // Verification steps - often need visual confirmation
136
- if (description.includes('verify') || description.includes('check') ||
137
- description.includes('confirm') || description.includes('ensure')) {
138
- return true;
139
- }
140
- // Form-related steps - multiple fields, complex interactions
141
- if (description.includes('fill') && (description.includes('form') || description.includes('field'))) {
142
- return true;
143
- }
144
- if (description.includes('enter') && description.includes('information')) {
145
- return true;
146
- }
147
- // Steps that typically follow navigation (page may still be loading)
148
- if (description.includes('click') && (description.includes('menu') ||
149
- description.includes('tab') ||
150
- description.includes('link'))) {
151
- return true;
152
- }
153
- // Multi-step actions indicated by "and" or commas
154
- if (description.includes(' and ') || description.split(',').length > 1) {
155
- return true;
156
- }
157
- return false;
158
- }
159
116
  async initialize() {
160
117
  try {
161
118
  this.sessionId = `scenario_worker_${Date.now()}`;
@@ -250,444 +207,116 @@ class ScenarioWorker extends events_1.EventEmitter {
250
207
  this.log(`📚 Starting with existing learnings: ${screenCount} screens, ${patternCount} UX patterns`);
251
208
  }
252
209
  // 3a. ORCHESTRATOR MODE - Use orchestrator agent for execution
253
- if (this.useOrchestrator && this.orchestratorAgent) {
254
- this.log(`🤖 Using Orchestrator Mode (continueOnFailure: ${CONTINUE_ON_FAILURE})`);
255
- // Initialize journey memory
256
- const memory = {
257
- history: [],
258
- extractedData: {}
259
- };
260
- // Execute steps using orchestrator
261
- for (let i = 0; i < steps.length; i++) {
262
- // Only stop if consecutive failures exceed limit AND continueOnFailure is false
263
- if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES && !CONTINUE_ON_FAILURE) {
264
- this.log(`🛑 Stopping execution: ${consecutiveFailures} consecutive failures`);
265
- // Mark remaining steps as skipped
210
+ this.log(`🤖 Using Orchestrator Mode (continueOnFailure: ${CONTINUE_ON_FAILURE})`);
211
+ // Initialize journey memory
212
+ const memory = {
213
+ history: [],
214
+ extractedData: {}
215
+ };
216
+ // Execute steps using orchestrator
217
+ for (let i = 0; i < steps.length; i++) {
218
+ // Check if job was cancelled by user
219
+ if (this.progressReporter?.shouldContinue) {
220
+ const shouldContinue = await this.progressReporter.shouldContinue(job.id);
221
+ if (!shouldContinue) {
222
+ this.log(`🛑 Job ${job.id} cancelled by user - aborting execution`);
223
+ overallSuccess = false;
224
+ // Mark current and remaining steps as skipped
266
225
  for (let j = i; j < steps.length; j++) {
267
226
  steps[j].success = false;
268
- steps[j].error = `Skipped due to ${consecutiveFailures} consecutive failures`;
227
+ steps[j].error = 'Cancelled by user';
269
228
  steps[j].playwrightCommands = [];
270
229
  }
271
- overallSuccess = false;
272
- break;
273
- }
274
- // Warn if approaching limit (even with continueOnFailure)
275
- if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES && CONTINUE_ON_FAILURE) {
276
- this.log(`⚠️ ${consecutiveFailures} consecutive failures - continuing but scenario may have issues`);
277
- }
278
- const step = steps[i];
279
- step.stepNumber = i + 1;
280
- try {
281
- // LIFECYCLE: Call beforeStepStart if provided
282
- if (this.progressReporter?.beforeStepStart) {
283
- await this.progressReporter.beforeStepStart({
284
- stepNumber: step.stepNumber,
285
- description: step.description
286
- }, page);
287
- }
288
- // Use orchestrator to execute this step
289
- // Pass accumulated site learnings so agent can build upon them
290
- const result = await this.orchestratorAgent.executeStep(page, step.description, step.stepNumber, steps.length, steps.map(s => s.description), memory, job.id, undefined, // priorSteps
291
- undefined, // nextSteps
292
- undefined, // successfulCommandsInStep
293
- undefined, // failingCommand
294
- undefined, // remainingCommandsInStep
295
- siteLearnings // Pass accumulated learnings
296
- );
297
- // Update step with result
298
- step.success = result.success;
299
- step.playwrightCommands = result.commands;
300
- step.error = result.error;
301
- // Merge site learnings from this step
302
- if (result.siteLearnings) {
303
- siteLearnings = this.mergeSiteLearningsSimple(siteLearnings, result.siteLearnings);
304
- }
305
- if (result.success) {
306
- this.log(`✓ Step ${step.stepNumber} completed via orchestrator (${result.iterations} iterations, ${result.commands.length} commands)`);
307
- consecutiveFailures = 0;
308
- }
309
- else {
310
- this.log(`✗ Step ${step.stepNumber} failed via orchestrator: ${result.terminationReason}`);
311
- this.log(` Reason: ${result.error || 'No error message'}`);
312
- this.log(` Commands executed: ${result.commands.length}`);
313
- consecutiveFailures++;
314
- overallSuccess = false;
315
- // CRITICAL: Stop on agent_stuck or infeasible (explicit agent decision)
316
- // continueOnStepFailure only applies to command failures, not agent decisions
317
- if (result.terminationReason === 'agent_stuck' || result.terminationReason === 'infeasible') {
318
- this.log(`🛑 Stopping: Agent declared step ${result.terminationReason} - cannot continue`);
319
- // Mark remaining steps as skipped
320
- for (let j = i + 1; j < steps.length; j++) {
321
- steps[j].success = false;
322
- steps[j].error = `Skipped: Previous step was ${result.terminationReason}`;
323
- steps[j].playwrightCommands = [];
324
- }
325
- break; // Exit loop
326
- }
327
- }
328
- // REPORT FINAL STEP RESULT (after orchestrator completes all iterations)
329
- // This gives the complete accumulated commands, not just one iteration
330
- await this.reportStepProgress({
331
- jobId: job.id,
332
- stepNumber: step.stepNumber,
333
- description: step.description,
334
- code: step.playwrightCommands?.join('\n') || '', // All accumulated commands
335
- status: step.success ? progress_reporter_1.StepExecutionStatus.SUCCESS : progress_reporter_1.StepExecutionStatus.FAILURE,
336
- error: step.error,
337
- agentIteration: result.iterations
338
- });
230
+ break; // Exit loop
339
231
  }
340
- catch (error) {
341
- this.logError(`Orchestrator execution failed for step ${step.stepNumber}: ${error.message}`);
342
- step.success = false;
343
- step.error = error.message;
344
- consecutiveFailures++;
345
- overallSuccess = false;
346
- }
347
- previousSteps.push(step);
348
232
  }
349
- }
350
- else {
351
- // 3b. LEGACY MODE - Use existing retry loop
352
- // Execute each step (steps may require multiple commands)
353
- for (let i = 0; i < steps.length; i++) {
354
- // Check if we should stop execution due to consecutive failures
355
- if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
356
- this.log(`🛑 Stopping execution: ${consecutiveFailures} consecutive failures detected`);
357
- this.log(` Remaining ${steps.length - i} steps will be skipped to avoid wasting resources`);
358
- // Emit log events about early termination
359
- this.emit('log', job.id, `\n🛑 EARLY TERMINATION\n`);
360
- this.emit('log', job.id, `Reason: ${consecutiveFailures} consecutive step failures\n`);
361
- this.emit('log', job.id, `Steps attempted: ${i}\n`);
362
- this.emit('log', job.id, `Steps skipped: ${steps.length - i}\n\n`);
363
- // Mark remaining steps as skipped
364
- for (let j = i; j < steps.length; j++) {
365
- const skippedStep = steps[j];
366
- skippedStep.stepNumber = j + 1;
367
- skippedStep.success = false;
368
- skippedStep.error = `Skipped due to ${consecutiveFailures} consecutive failures in previous steps`;
369
- skippedStep.playwrightCommands = [];
370
- previousSteps.push(skippedStep);
371
- }
372
- overallSuccess = false;
373
- break; // Exit the loop
233
+ // Only stop if consecutive failures exceed limit AND continueOnFailure is false
234
+ if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES && !CONTINUE_ON_FAILURE) {
235
+ this.log(`🛑 Stopping execution: ${consecutiveFailures} consecutive failures`);
236
+ // Mark remaining steps as skipped
237
+ for (let j = i; j < steps.length; j++) {
238
+ steps[j].success = false;
239
+ steps[j].error = `Skipped due to ${consecutiveFailures} consecutive failures`;
240
+ steps[j].playwrightCommands = [];
374
241
  }
375
- const step = steps[i];
376
- step.stepNumber = i + 1;
377
- step.retryCount = 0;
378
- step.subActions = [];
379
- // Force new array and clear any previous command data
380
- step.playwrightCommands = [];
381
- step.playwrightCommand = undefined;
382
- let stepSuccess = false;
383
- let stepError;
384
- let subActionCount = 0;
385
- let stepComplete = false;
386
- let totalFailedAttemptsForStep = 0; // Track FAILED attempts across all sub-actions
387
- // Detect if this is a complex step that benefits from proactive vision
388
- const isComplexStep = this.isComplexStep(step.description);
389
- // A step might need multiple commands (sub-actions) to complete
390
- while (!stepComplete && subActionCount < MAX_SUBACTIONS_PER_STEP && totalFailedAttemptsForStep < MAX_FAILED_ATTEMPTS_PER_STEP) {
391
- let subActionSuccess = false;
392
- let subActionCommand;
393
- let subActionError;
394
- let subActionRetries = 0;
395
- let usedVisionMode = false;
396
- // Build context about what's been done so far in this step
397
- const stepContext = step.subActions && step.subActions.length > 0
398
- ? `\nSub-actions completed so far for this step:\n${step.subActions.map((sa, idx) => ` ${idx + 1}. ${sa.command} - ${sa.success ? 'SUCCESS' : 'FAILED'}`).join('\n')}`
399
- : '';
400
- for (let attempt = 0; attempt <= MAX_RETRIES_PER_STEP; attempt++) {
401
- // Check if we've exceeded failed attempts budget BEFORE attempting
402
- if (totalFailedAttemptsForStep >= MAX_FAILED_ATTEMPTS_PER_STEP) {
403
- this.log(` ⚠️ Exceeded failed attempts budget (${MAX_FAILED_ATTEMPTS_PER_STEP}) for this step`);
404
- stepComplete = true;
405
- stepSuccess = false;
406
- stepError = `Exceeded maximum failed attempts (${MAX_FAILED_ATTEMPTS_PER_STEP}) for step`;
407
- break;
408
- }
409
- let currentAttemptCommand;
410
- let currentAttemptSuccess = false;
411
- let currentAttemptError;
412
- const attemptTimestamp = Date.now();
413
- try {
414
- this.log(`Step ${step.stepNumber} - Sub-action ${subActionCount + 1}, Attempt ${attempt + 1}: ${step.description}`);
415
- // Get current page state - handle navigation in progress
416
- let domSnapshot;
417
- let pageInfo;
418
- try {
419
- domSnapshot = {
420
- url: page.url(),
421
- title: await page.title(),
422
- accessibilityTree: await page.accessibility.snapshot()
423
- };
424
- pageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(domSnapshot);
425
- }
426
- catch (contextError) {
427
- // If execution context was destroyed (navigation in progress), wait and retry
428
- if (contextError.message && contextError.message.includes('Execution context was destroyed')) {
429
- this.log(` ⏳ Navigation in progress, waiting for page to load...`);
430
- await page.waitForLoadState('domcontentloaded', { timeout: 5000 }).catch(() => { });
431
- // Retry page state capture
432
- domSnapshot = {
433
- url: page.url(),
434
- title: await page.title(),
435
- accessibilityTree: await page.accessibility.snapshot()
436
- };
437
- pageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(domSnapshot);
438
- }
439
- else {
440
- throw contextError; // Re-throw if it's not a navigation issue
441
- }
442
- }
443
- // Vision trigger: Liberal usage since gpt-5-mini vision is cost-effective
444
- const modelToUse = job.model || model_constants_1.DEFAULT_MODEL;
445
- let command;
446
- // Enhanced logging for vision trigger logic
447
- this.log(` 🔍 Vision trigger check: subAction=${subActionCount + 1}, attempt=${attempt}, totalFailed=${totalFailedAttemptsForStep}, usedVision=${usedVisionMode}`);
448
- // Liberal vision strategy (gpt-5-mini is cost-effective):
449
- // 1. After ANY failure (1+) → use vision
450
- // 2. Complex steps → use vision from attempt 1
451
- // 3. No LLM assessment gate → go directly to vision
452
- const hasFailure = totalFailedAttemptsForStep >= 1 && lastError;
453
- const shouldUseProactiveVision = isComplexStep && attempt === 0; // First attempt for complex steps
454
- const shouldUseVision = (hasFailure || shouldUseProactiveVision) && !usedVisionMode;
455
- if (shouldUseVision) {
456
- if (shouldUseProactiveVision) {
457
- this.log(` 🎯 PROACTIVE VISION: Complex step detected, using vision from first attempt`);
458
- }
459
- else {
460
- this.log(` 🎯 VISION TRIGGER: ${totalFailedAttemptsForStep} failure(s) detected, using vision (no LLM gate)`);
461
- }
462
- // Two-step supervisor pattern:
463
- // 1. Supervisor analyzes screenshot and provides instructions
464
- // 2. Worker generates command based on those instructions
465
- this.log(` 📸 Taking screenshot for supervisor analysis...`);
466
- // Capture optimized screenshot using utility method
467
- const imageDataUrl = await (0, browser_utils_1.captureOptimizedScreenshot)(page, { timeout: 10000 }, // Uses default quality 60
468
- (msg) => this.log(msg));
469
- this.log(` 👔 STEP 1: Supervisor analyzing screenshot (${model_constants_1.VISION_MODEL})...`);
470
- const supervisorDiagnostics = await this.llmFacade.getVisionDiagnostics(step.description + stepContext, pageInfo, previousSteps, lastError, imageDataUrl, model_constants_1.VISION_MODEL);
471
- // DEBUG: Log vision diagnostics
472
- this.log(` 📸 Visual insights: ${supervisorDiagnostics.visualAnalysis}`);
473
- this.log(` 🔍 Root cause: ${supervisorDiagnostics.rootCause}`);
474
- this.log(` 💡 Recommended approach: ${supervisorDiagnostics.recommendedApproach}`);
475
- if (supervisorDiagnostics.elementsFound.length > 0) {
476
- this.log(` ✅ Elements found: ${supervisorDiagnostics.elementsFound.join(', ')}`);
477
- }
478
- if (supervisorDiagnostics.elementsNotFound.length > 0) {
479
- this.log(` ❌ Elements not found: ${supervisorDiagnostics.elementsNotFound.join(', ')}`);
480
- }
481
- this.log(` 🔨 STEP 2: Worker generating command from supervisor instructions (${model_constants_1.DEFAULT_MODEL})...`);
482
- command = await this.llmFacade.generateCommandFromSupervisorInstructions(step.description + stepContext, supervisorDiagnostics, pageInfo, modelToUse // Cheaper model for command generation
483
- );
484
- usedVisionMode = true;
485
- }
486
- else {
487
- // Not using vision - use regular DOM-based approach
488
- if (usedVisionMode) {
489
- this.log(` 📝 Vision already used - using DOM-based approach`);
490
- }
491
- else if (isComplexStep) {
492
- this.log(` 📝 Complex step, but first attempt - using DOM-based approach (vision on retry)`);
493
- }
494
- else {
495
- this.log(` 📝 Using DOM-based approach (${totalFailedAttemptsForStep} failures so far)`);
496
- }
497
- const stepDescriptionWithContext = step.description + stepContext;
498
- command = await this.llmFacade.generatePlaywrightCommand(stepDescriptionWithContext, pageInfo, previousSteps, lastError, step, modelToUse);
499
- }
500
- if (!command) {
501
- throw new Error('LLM failed to generate a Playwright command.');
502
- }
503
- currentAttemptCommand = command;
504
- this.log(` Command: ${command}`);
505
- // Execute the command
506
- await this.executePlaywrightCommand(page, browser, context, command);
507
- // Success
508
- subActionSuccess = true;
509
- currentAttemptSuccess = true;
510
- subActionCommand = command;
511
- step.playwrightCommands.push(command);
512
- this.log(` ✅ SUCCESS: ${command}${usedVisionMode ? ' (vision-aided)' : ''}`);
513
- // Wait a bit for any navigation that might have been triggered
514
- // This prevents "Execution context destroyed" errors when checking goal completion
515
- await page.waitForLoadState('domcontentloaded', { timeout: 3000 }).catch(() => {
516
- // Ignore timeout - page might not be navigating
517
- });
518
- break; // Sub-action successful, check if step is complete
519
- }
520
- catch (error) {
521
- subActionError = error instanceof Error ? error.message : String(error);
522
- currentAttemptError = subActionError;
523
- // Get current URL for context (especially useful for navigation failures)
524
- let currentUrl = 'unknown';
525
- try {
526
- currentUrl = page.url();
527
- }
528
- catch (e) {
529
- // Ignore if we can't get URL
530
- }
531
- // Enhanced error message with current URL
532
- const errorWithContext = `${subActionError} | Current URL: ${currentUrl}`;
533
- this.logError(` ❌ FAILED (attempt ${attempt + 1}): ${subActionError}`);
534
- this.logError(` Current URL: ${currentUrl}`);
535
- this.logError(` Command attempted: ${currentAttemptCommand || 'N/A'}`);
536
- subActionRetries++;
537
- totalFailedAttemptsForStep++; // Increment failed attempts counter
538
- // Only update lastError if this is the final attempt
539
- if (attempt === MAX_RETRIES_PER_STEP) {
540
- lastError = errorWithContext; // Include URL in error context for LLM
541
- }
542
- // If this is the last attempt, mark sub-action as failed
543
- if (attempt === MAX_RETRIES_PER_STEP) {
544
- subActionSuccess = false;
545
- subActionCommand = currentAttemptCommand;
546
- this.logError(` 🚫 SUB-ACTION FAILED after ${MAX_RETRIES_PER_STEP + 1} attempts (including vision mode if used)`);
547
- break; // Exit retry loop
548
- }
549
- }
550
- finally {
551
- if (!step.attempts) {
552
- step.attempts = [];
553
- }
554
- step.attempts.push({
555
- attemptNumber: attempt + 1,
556
- command: currentAttemptCommand,
557
- success: currentAttemptSuccess,
558
- error: currentAttemptError,
559
- timestamp: attemptTimestamp
560
- });
561
- }
562
- }
563
- // Record the sub-action
564
- if (subActionCommand) {
565
- step.subActions.push({
566
- command: subActionCommand,
567
- success: subActionSuccess,
568
- error: subActionError,
569
- retryCount: subActionRetries
570
- });
571
- }
572
- subActionCount++;
573
- // Determine if step (goal) is complete
574
- if (subActionSuccess) {
575
- // After each successful sub-action, ask LLM if goal is complete
576
- if (subActionCount >= MAX_SUBACTIONS_PER_STEP) {
577
- // Safety limit - avoid infinite loops
578
- stepComplete = true;
579
- stepSuccess = true;
580
- this.log(` ⚠️ Reached max sub-actions limit (${MAX_SUBACTIONS_PER_STEP}) with ${totalFailedAttemptsForStep} failed attempts, considering step complete`);
581
- }
582
- else {
583
- // Ask LLM if goal is complete
584
- try {
585
- // Capture page state - if navigation is still happening, retry once
586
- let domSnapshot;
587
- let pageInfo;
588
- try {
589
- domSnapshot = {
590
- url: page.url(),
591
- title: await page.title(),
592
- accessibilityTree: await page.accessibility.snapshot()
593
- };
594
- pageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(domSnapshot);
595
- }
596
- catch (contextError) {
597
- // If execution context was destroyed (navigation in progress), wait and retry
598
- if (contextError.message && contextError.message.includes('Execution context was destroyed')) {
599
- this.log(` ⏳ Navigation detected, waiting for page to load...`);
600
- await page.waitForLoadState('domcontentloaded', { timeout: 5000 }).catch(() => { });
601
- // Retry page state capture
602
- domSnapshot = {
603
- url: page.url(),
604
- title: await page.title(),
605
- accessibilityTree: await page.accessibility.snapshot()
606
- };
607
- pageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(domSnapshot);
608
- }
609
- else {
610
- throw contextError; // Re-throw if it's not a navigation issue
611
- }
612
- }
613
- // Vision-backed goal completion for complex/verification steps
614
- const shouldUseVisionForCompletion = isComplexStep && subActionCount >= 1; // At least one action done
615
- let completionCheck;
616
- if (shouldUseVisionForCompletion) {
617
- this.log(` 🎯 Vision-backed goal completion check (complex step)`);
618
- // Capture screenshot for visual verification
619
- const imageDataUrl = await (0, browser_utils_1.captureOptimizedScreenshot)(page, { timeout: 10000 }, (msg) => this.log(msg));
620
- // Use vision model to check goal completion with visual context
621
- completionCheck = await this.llmFacade.checkGoalCompletionWithVision(step.description, step.playwrightCommands || [], pageInfo, imageDataUrl, model_constants_1.VISION_MODEL);
622
- }
623
- else {
624
- // Regular DOM-based goal completion check
625
- completionCheck = await this.llmFacade.checkGoalCompletion(step.description, step.playwrightCommands || [], pageInfo, job.model || model_constants_1.DEFAULT_MODEL);
626
- }
627
- this.log(` 🎯 Goal completion check: ${completionCheck.isComplete ? 'COMPLETE' : 'INCOMPLETE'} - ${completionCheck.reason}`);
628
- if (completionCheck.isComplete) {
629
- stepComplete = true;
630
- stepSuccess = true;
631
- }
632
- else {
633
- // Continue with next sub-action
634
- if (completionCheck.nextSubGoal) {
635
- this.log(` 📍 Next sub-goal: ${completionCheck.nextSubGoal}`);
636
- }
637
- // Continue looping to generate next command
638
- }
639
- }
640
- catch (error) {
641
- this.logError(`Error checking goal completion: ${error}`);
642
- // Fallback: consider complete after 1 successful sub-action if we can't check
643
- stepComplete = true;
644
- stepSuccess = true;
645
- }
646
- }
647
- }
648
- else {
649
- // Sub-action failed
650
- stepComplete = true; // Move on after failure
651
- stepSuccess = false;
652
- stepError = subActionError;
653
- overallSuccess = false;
654
- }
242
+ overallSuccess = false;
243
+ break;
244
+ }
245
+ // Warn if approaching limit (even with continueOnFailure)
246
+ if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES && CONTINUE_ON_FAILURE) {
247
+ this.log(`⚠️ ${consecutiveFailures} consecutive failures - continuing but scenario may have issues`);
248
+ }
249
+ const step = steps[i];
250
+ step.stepNumber = i + 1;
251
+ try {
252
+ // LIFECYCLE: Call beforeStepStart if provided
253
+ if (this.progressReporter?.beforeStepStart) {
254
+ await this.progressReporter.beforeStepStart({
255
+ stepNumber: step.stepNumber,
256
+ description: step.description
257
+ }, page);
655
258
  }
656
- // Set the step's final command (last successful or aggregate)
657
- if (step.playwrightCommands && step.playwrightCommands.length > 0) {
658
- step.playwrightCommand = step.playwrightCommands[step.playwrightCommands.length - 1];
259
+ // Use orchestrator to execute this step
260
+ // Pass accumulated site learnings so agent can build upon them
261
+ const result = await this.orchestratorAgent.executeStep(page, step.description, step.stepNumber, steps.length, steps.map(s => s.description), memory, job.id, undefined, // priorSteps
262
+ undefined, // nextSteps
263
+ undefined, // successfulCommandsInStep
264
+ undefined, // failingCommand
265
+ undefined, // remainingCommandsInStep
266
+ siteLearnings // Pass accumulated learnings
267
+ );
268
+ // Update step with result
269
+ step.success = result.success;
270
+ step.playwrightCommands = result.commands;
271
+ step.error = result.error;
272
+ // Merge site learnings from this step
273
+ if (result.siteLearnings) {
274
+ siteLearnings = this.mergeSiteLearningsSimple(siteLearnings, result.siteLearnings);
659
275
  }
660
- step.success = stepSuccess;
661
- step.error = stepError;
662
- previousSteps.push(step);
663
- // Update consecutive failure counter
664
- if (stepSuccess) {
665
- consecutiveFailures = 0; // Reset on success
276
+ if (result.success) {
277
+ this.log(`✓ Step ${step.stepNumber} completed via orchestrator (${result.iterations} iterations, ${result.commands.length} commands)`);
278
+ consecutiveFailures = 0;
666
279
  }
667
280
  else {
281
+ this.log(`✗ Step ${step.stepNumber} failed via orchestrator: ${result.terminationReason}`);
282
+ this.log(` Reason: ${result.error || 'No error message'}`);
283
+ this.log(` Commands executed: ${result.commands.length}`);
668
284
  consecutiveFailures++;
669
- this.log(`⚠️ Consecutive failures: ${consecutiveFailures}/${MAX_CONSECUTIVE_FAILURES}`);
670
- }
671
- // Emit step result log events
672
- this.emit('log', job.id, `### Step ${step.stepNumber}: ${step.description}\n`);
673
- this.emit('log', job.id, `Status: ${stepSuccess ? '✅ SUCCESS' : '❌ FAILED'}\n`);
674
- this.emit('log', job.id, `Sub-actions: ${subActionCount}\n`);
675
- this.emit('log', job.id, `Failed attempts: ${totalFailedAttemptsForStep}\n`);
676
- if (step.playwrightCommands && step.playwrightCommands.length > 0) {
677
- this.emit('log', job.id, `Commands:\n`);
678
- step.playwrightCommands.forEach((cmd, idx) => {
679
- this.emit('log', job.id, ` ${idx + 1}. ${cmd}\n`);
680
- });
681
- }
682
- if (stepError) {
683
- this.emit('log', job.id, `Error: ${stepError}\n`);
684
- }
685
- if (step.attempts && step.attempts.length > 0) {
686
- this.emit('log', job.id, `Total attempts: ${step.attempts.length}\n`);
285
+ overallSuccess = false;
286
+ // CRITICAL: Stop on agent_stuck or infeasible (explicit agent decision)
287
+ // continueOnStepFailure only applies to command failures, not agent decisions
288
+ if (result.terminationReason === 'agent_stuck' || result.terminationReason === 'infeasible') {
289
+ this.log(`🛑 Stopping: Agent declared step ${result.terminationReason} - cannot continue`);
290
+ // Mark remaining steps as skipped
291
+ for (let j = i + 1; j < steps.length; j++) {
292
+ steps[j].success = false;
293
+ steps[j].error = `Skipped: Previous step was ${result.terminationReason}`;
294
+ steps[j].playwrightCommands = [];
295
+ }
296
+ break; // Exit loop
297
+ }
687
298
  }
688
- this.emit('log', job.id, `\n`);
299
+ // REPORT FINAL STEP RESULT (after orchestrator completes all iterations)
300
+ // This gives the complete accumulated commands, not just one iteration
301
+ await this.reportStepProgress({
302
+ jobId: job.id,
303
+ stepNumber: step.stepNumber,
304
+ description: step.description,
305
+ code: step.playwrightCommands?.join('\n') || '', // All accumulated commands
306
+ status: step.success ? progress_reporter_1.StepExecutionStatus.SUCCESS : progress_reporter_1.StepExecutionStatus.FAILURE,
307
+ error: step.error,
308
+ agentIteration: result.iterations
309
+ });
310
+ }
311
+ catch (error) {
312
+ this.logError(`Orchestrator execution failed for step ${step.stepNumber}: ${error.message}`);
313
+ step.success = false;
314
+ step.error = error.message;
315
+ consecutiveFailures++;
316
+ overallSuccess = false;
689
317
  }
690
- } // End of else block (legacy mode)
318
+ previousSteps.push(step);
319
+ }
691
320
  // Generate test name if not provided
692
321
  const testName = job.testName || await this.llmFacade.generateTestName(job.scenario, job.model);
693
322
  // Generate hashtags for semantic grouping
@@ -833,7 +462,7 @@ class ScenarioWorker extends events_1.EventEmitter {
833
462
  executionTime: Date.now() - startTime,
834
463
  testName,
835
464
  preferredFileName,
836
- siteLearnings: this.useOrchestrator ? siteLearnings : undefined
465
+ siteLearnings: siteLearnings // Orchestrator always enabled - always return learnings
837
466
  };
838
467
  }
839
468
  catch (error) {
@@ -862,8 +491,8 @@ class ScenarioWorker extends events_1.EventEmitter {
862
491
  // LIFECYCLE: Call afterEndTest if provided
863
492
  if (browser && this.progressReporter?.afterEndTest) {
864
493
  try {
865
- await this.progressReporter.afterEndTest(overallSuccess ? 'passed' : 'failed', overallSuccess ? undefined : 'Test execution had failures', page, this.useOrchestrator ? siteLearnings : undefined, // Pass siteLearnings here
866
- this.useOrchestrator && this.orchestratorAgent ? this.orchestratorAgent.getDebugStats() : undefined // Pass debugStats here
494
+ await this.progressReporter.afterEndTest(overallSuccess ? 'passed' : 'failed', overallSuccess ? undefined : 'Test execution had failures', page, siteLearnings ?? undefined, // Pass siteLearnings here
495
+ this.orchestratorAgent.getDebugStats() ?? undefined // Pass debugStats here
867
496
  );
868
497
  }
869
498
  catch (callbackError) {
@@ -877,31 +506,6 @@ class ScenarioWorker extends events_1.EventEmitter {
877
506
  }
878
507
  }
879
508
  }
880
- async executePlaywrightCommand(page, browser, context, command) {
881
- // Detect if command contains navigation or load state operations that need longer timeout
882
- const needsLongerTimeout = command.includes('waitForLoadState') ||
883
- command.includes('goto(') ||
884
- command.includes('waitForURL') ||
885
- command.includes('waitForNavigation');
886
- // Use appropriate timeout based on operation type
887
- const timeout = needsLongerTimeout ? 30000 : 5000;
888
- page.setDefaultTimeout(timeout);
889
- try {
890
- // Execute command directly without validation
891
- const commandFunction = new Function('page', 'browser', 'context', 'expect', `
892
- return (async () => {
893
- ${command}
894
- })();
895
- `);
896
- // Dynamically import expect
897
- const { expect } = require('@playwright/test');
898
- await commandFunction(page, browser, context, expect);
899
- }
900
- finally {
901
- // Reset to default timeout for element operations
902
- page.setDefaultTimeout(5000);
903
- }
904
- }
905
509
  /**
906
510
  * Simple merge of site learnings (accumulate across steps)
907
511
  */
@@ -940,9 +544,6 @@ class ScenarioWorker extends events_1.EventEmitter {
940
544
  * Execute exploration mode using orchestrator
941
545
  */
942
546
  async executeExploration(page, explorationConfig, jobId, existingSiteLearnings) {
943
- if (!this.useOrchestrator || !this.orchestratorAgent) {
944
- throw new Error('Orchestrator not available - exploration mode requires orchestrator');
945
- }
946
547
  // Execute exploration via orchestrator
947
548
  return this.orchestratorAgent.executeExploration(page, explorationConfig, jobId, existingSiteLearnings);
948
549
  }