testchimp-runner-core 0.0.21 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VISION_DIAGNOSTICS_IMPROVEMENTS.md +336 -0
- package/dist/credit-usage-service.d.ts +9 -0
- package/dist/credit-usage-service.d.ts.map +1 -1
- package/dist/credit-usage-service.js +20 -5
- package/dist/credit-usage-service.js.map +1 -1
- package/dist/execution-service.d.ts +7 -2
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +91 -36
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +30 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +91 -26
- package/dist/index.js.map +1 -1
- package/dist/llm-facade.d.ts +64 -8
- package/dist/llm-facade.d.ts.map +1 -1
- package/dist/llm-facade.js +361 -109
- package/dist/llm-facade.js.map +1 -1
- package/dist/llm-provider.d.ts +39 -0
- package/dist/llm-provider.d.ts.map +1 -0
- package/dist/llm-provider.js +7 -0
- package/dist/llm-provider.js.map +1 -0
- package/dist/model-constants.d.ts +21 -0
- package/dist/model-constants.d.ts.map +1 -0
- package/dist/model-constants.js +24 -0
- package/dist/model-constants.js.map +1 -0
- package/dist/orchestrator/index.d.ts +8 -0
- package/dist/orchestrator/index.d.ts.map +1 -0
- package/dist/orchestrator/index.js +23 -0
- package/dist/orchestrator/index.js.map +1 -0
- package/dist/orchestrator/orchestrator-agent.d.ts +66 -0
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -0
- package/dist/orchestrator/orchestrator-agent.js +855 -0
- package/dist/orchestrator/orchestrator-agent.js.map +1 -0
- package/dist/orchestrator/tool-registry.d.ts +74 -0
- package/dist/orchestrator/tool-registry.d.ts.map +1 -0
- package/dist/orchestrator/tool-registry.js +131 -0
- package/dist/orchestrator/tool-registry.js.map +1 -0
- package/dist/orchestrator/tools/check-page-ready.d.ts +13 -0
- package/dist/orchestrator/tools/check-page-ready.d.ts.map +1 -0
- package/dist/orchestrator/tools/check-page-ready.js +72 -0
- package/dist/orchestrator/tools/check-page-ready.js.map +1 -0
- package/dist/orchestrator/tools/extract-data.d.ts +13 -0
- package/dist/orchestrator/tools/extract-data.d.ts.map +1 -0
- package/dist/orchestrator/tools/extract-data.js +84 -0
- package/dist/orchestrator/tools/extract-data.js.map +1 -0
- package/dist/orchestrator/tools/index.d.ts +10 -0
- package/dist/orchestrator/tools/index.d.ts.map +1 -0
- package/dist/orchestrator/tools/index.js +18 -0
- package/dist/orchestrator/tools/index.js.map +1 -0
- package/dist/orchestrator/tools/inspect-page.d.ts +13 -0
- package/dist/orchestrator/tools/inspect-page.d.ts.map +1 -0
- package/dist/orchestrator/tools/inspect-page.js +39 -0
- package/dist/orchestrator/tools/inspect-page.js.map +1 -0
- package/dist/orchestrator/tools/recall-history.d.ts +13 -0
- package/dist/orchestrator/tools/recall-history.d.ts.map +1 -0
- package/dist/orchestrator/tools/recall-history.js +64 -0
- package/dist/orchestrator/tools/recall-history.js.map +1 -0
- package/dist/orchestrator/tools/take-screenshot.d.ts +15 -0
- package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -0
- package/dist/orchestrator/tools/take-screenshot.js +112 -0
- package/dist/orchestrator/tools/take-screenshot.js.map +1 -0
- package/dist/orchestrator/types.d.ts +133 -0
- package/dist/orchestrator/types.d.ts.map +1 -0
- package/dist/orchestrator/types.js +28 -0
- package/dist/orchestrator/types.js.map +1 -0
- package/dist/playwright-mcp-service.d.ts +9 -0
- package/dist/playwright-mcp-service.d.ts.map +1 -1
- package/dist/playwright-mcp-service.js +20 -5
- package/dist/playwright-mcp-service.js.map +1 -1
- package/dist/progress-reporter.d.ts +97 -0
- package/dist/progress-reporter.d.ts.map +1 -0
- package/dist/progress-reporter.js +18 -0
- package/dist/progress-reporter.js.map +1 -0
- package/dist/prompts.d.ts +24 -0
- package/dist/prompts.d.ts.map +1 -1
- package/dist/prompts.js +593 -68
- package/dist/prompts.js.map +1 -1
- package/dist/providers/backend-proxy-llm-provider.d.ts +25 -0
- package/dist/providers/backend-proxy-llm-provider.d.ts.map +1 -0
- package/dist/providers/backend-proxy-llm-provider.js +76 -0
- package/dist/providers/backend-proxy-llm-provider.js.map +1 -0
- package/dist/providers/local-llm-provider.d.ts +21 -0
- package/dist/providers/local-llm-provider.d.ts.map +1 -0
- package/dist/providers/local-llm-provider.js +35 -0
- package/dist/providers/local-llm-provider.js.map +1 -0
- package/dist/scenario-service.d.ts +27 -1
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +48 -12
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +39 -2
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +614 -86
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/script-utils.d.ts +2 -0
- package/dist/script-utils.d.ts.map +1 -1
- package/dist/script-utils.js +44 -4
- package/dist/script-utils.js.map +1 -1
- package/dist/types.d.ts +11 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/browser-utils.d.ts +20 -1
- package/dist/utils/browser-utils.d.ts.map +1 -1
- package/dist/utils/browser-utils.js +102 -51
- package/dist/utils/browser-utils.js.map +1 -1
- package/dist/utils/page-info-utils.d.ts +23 -4
- package/dist/utils/page-info-utils.d.ts.map +1 -1
- package/dist/utils/page-info-utils.js +174 -43
- package/dist/utils/page-info-utils.js.map +1 -1
- package/package.json +1 -2
- package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +642 -0
- package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +844 -0
- package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +539 -0
- package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +241 -0
- package/plandocs/PHASE1_FINAL_STATUS.md +210 -0
- package/plandocs/PLANNING_SESSION_SUMMARY.md +372 -0
- package/plandocs/SCRIPT_CLEANUP_FEATURE.md +201 -0
- package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +364 -0
- package/plandocs/SELECTOR_IMPROVEMENTS.md +139 -0
- package/src/credit-usage-service.ts +23 -5
- package/src/execution-service.ts +152 -42
- package/src/index.ts +169 -26
- package/src/llm-facade.ts +500 -126
- package/src/llm-provider.ts +43 -0
- package/src/model-constants.ts +23 -0
- package/src/orchestrator/index.ts +33 -0
- package/src/orchestrator/orchestrator-agent.ts +1037 -0
- package/src/orchestrator/tool-registry.ts +182 -0
- package/src/orchestrator/tools/check-page-ready.ts +75 -0
- package/src/orchestrator/tools/extract-data.ts +92 -0
- package/src/orchestrator/tools/index.ts +11 -0
- package/src/orchestrator/tools/inspect-page.ts +42 -0
- package/src/orchestrator/tools/recall-history.ts +72 -0
- package/src/orchestrator/tools/take-screenshot.ts +128 -0
- package/src/orchestrator/types.ts +200 -0
- package/src/playwright-mcp-service.ts +23 -5
- package/src/progress-reporter.ts +109 -0
- package/src/prompts.ts +606 -69
- package/src/providers/backend-proxy-llm-provider.ts +91 -0
- package/src/providers/local-llm-provider.ts +38 -0
- package/src/scenario-service.ts +83 -13
- package/src/scenario-worker-class.ts +740 -72
- package/src/script-utils.ts +50 -5
- package/src/types.ts +13 -1
- package/src/utils/browser-utils.ts +123 -51
- package/src/utils/page-info-utils.ts +210 -53
- package/testchimp-runner-core-0.0.22.tgz +0 -0
|
@@ -1,12 +1,28 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
2
|
import path from 'path';
|
|
3
|
+
import { EventEmitter } from 'events';
|
|
3
4
|
import { getEnhancedPageInfo } from './utils/page-info-utils';
|
|
4
|
-
import { initializeBrowser } from './utils/browser-utils';
|
|
5
|
+
import { initializeBrowser, captureOptimizedScreenshot } from './utils/browser-utils';
|
|
5
6
|
import { LLMFacade } from './llm-facade';
|
|
6
7
|
import { ScenarioRunJob, ScenarioResponse, ScenarioStep } from './types';
|
|
7
8
|
import { FileHandler } from './file-handler';
|
|
8
9
|
import { AuthConfig } from './auth-config';
|
|
9
10
|
import { generateTestScript } from './script-utils';
|
|
11
|
+
import { DEFAULT_MODEL, VISION_MODEL } from './model-constants';
|
|
12
|
+
import { LLMProvider } from './llm-provider';
|
|
13
|
+
import { ProgressReporter, StepProgress, JobProgress, StepExecutionStatus } from './progress-reporter';
|
|
14
|
+
import { BackendProxyLLMProvider } from './providers/backend-proxy-llm-provider';
|
|
15
|
+
import {
|
|
16
|
+
OrchestratorAgent,
|
|
17
|
+
ToolRegistry,
|
|
18
|
+
JourneyMemory,
|
|
19
|
+
AgentConfig,
|
|
20
|
+
TakeScreenshotTool,
|
|
21
|
+
RecallHistoryTool,
|
|
22
|
+
InspectPageTool,
|
|
23
|
+
CheckPageReadyTool,
|
|
24
|
+
ExtractDataTool
|
|
25
|
+
} from './orchestrator';
|
|
10
26
|
|
|
11
27
|
// Define a simple logging interface for compatibility
|
|
12
28
|
interface OutputChannel {
|
|
@@ -24,37 +40,216 @@ interface ScenarioJob {
|
|
|
24
40
|
}
|
|
25
41
|
|
|
26
42
|
|
|
27
|
-
const MAX_RETRIES_PER_STEP =
|
|
43
|
+
const MAX_RETRIES_PER_STEP = 3; // 4 total attempts per sub-action: 3 DOM-only, then 1 potential vision attempt
|
|
44
|
+
const MAX_SUBACTIONS_PER_STEP = 5; // Maximum sub-actions to attempt for a single step (reduced from 10 to prevent excessive retries)
|
|
45
|
+
const MAX_FAILED_ATTEMPTS_PER_STEP = 12; // Hard limit on FAILED attempts per step across all sub-actions
|
|
28
46
|
|
|
29
|
-
export class ScenarioWorker {
|
|
47
|
+
export class ScenarioWorker extends EventEmitter {
|
|
30
48
|
private initialized = false;
|
|
31
49
|
private sessionId: string | null = null;
|
|
32
50
|
private llmFacade: LLMFacade;
|
|
33
51
|
private fileHandler?: FileHandler;
|
|
34
52
|
private outputChannel?: OutputChannel;
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
53
|
+
private progressReporter?: ProgressReporter;
|
|
54
|
+
private currentJobId?: string;
|
|
55
|
+
|
|
56
|
+
// Orchestrator mode
|
|
57
|
+
private useOrchestrator: boolean = false;
|
|
58
|
+
private orchestratorAgent?: OrchestratorAgent;
|
|
59
|
+
private toolRegistry?: ToolRegistry;
|
|
60
|
+
private orchestratorConfig?: Partial<AgentConfig>;
|
|
61
|
+
private debugMode: boolean = false;
|
|
62
|
+
|
|
63
|
+
constructor(
|
|
64
|
+
fileHandler?: FileHandler,
|
|
65
|
+
llmProvider?: LLMProvider,
|
|
66
|
+
progressReporter?: ProgressReporter,
|
|
67
|
+
authConfig?: AuthConfig,
|
|
68
|
+
backendUrl?: string,
|
|
69
|
+
options?: {
|
|
70
|
+
useOrchestrator?: boolean;
|
|
71
|
+
orchestratorConfig?: Partial<AgentConfig>;
|
|
72
|
+
debugMode?: boolean;
|
|
73
|
+
},
|
|
74
|
+
outputChannel?: OutputChannel
|
|
75
|
+
) {
|
|
76
|
+
super();
|
|
77
|
+
|
|
78
|
+
// Use provided LLM provider or default to backend proxy (backward compatible)
|
|
79
|
+
const actualLLMProvider = llmProvider || new BackendProxyLLMProvider(authConfig, backendUrl);
|
|
80
|
+
this.llmFacade = new LLMFacade(actualLLMProvider);
|
|
81
|
+
|
|
38
82
|
this.fileHandler = fileHandler;
|
|
39
|
-
this.
|
|
83
|
+
this.progressReporter = progressReporter;
|
|
84
|
+
this.outputChannel = outputChannel; // Set outputChannel for log routing
|
|
85
|
+
|
|
86
|
+
// Orchestrator setup
|
|
87
|
+
this.useOrchestrator = options?.useOrchestrator || false;
|
|
88
|
+
this.orchestratorConfig = options?.orchestratorConfig;
|
|
89
|
+
this.debugMode = options?.debugMode || false;
|
|
90
|
+
|
|
91
|
+
if (this.useOrchestrator) {
|
|
92
|
+
this.initializeOrchestrator();
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Initialize orchestrator mode with tools
|
|
98
|
+
*/
|
|
99
|
+
private initializeOrchestrator(): void {
|
|
100
|
+
this.log('🤖 Initializing Orchestrator Mode');
|
|
101
|
+
|
|
102
|
+
// Create tool registry
|
|
103
|
+
this.toolRegistry = new ToolRegistry();
|
|
104
|
+
|
|
105
|
+
// Create tools (READ-ONLY information gathering only)
|
|
106
|
+
const takeScreenshotTool = new TakeScreenshotTool();
|
|
107
|
+
takeScreenshotTool.setLLMFacade(this.llmFacade); // Inject LLM for vision analysis
|
|
108
|
+
|
|
109
|
+
// Register 5 information-gathering tools (state changes via Playwright commands)
|
|
110
|
+
this.toolRegistry.register(takeScreenshotTool);
|
|
111
|
+
this.toolRegistry.register(new RecallHistoryTool());
|
|
112
|
+
this.toolRegistry.register(new InspectPageTool());
|
|
113
|
+
this.toolRegistry.register(new CheckPageReadyTool());
|
|
114
|
+
this.toolRegistry.register(new ExtractDataTool());
|
|
115
|
+
|
|
116
|
+
// Create orchestrator agent
|
|
117
|
+
this.orchestratorAgent = new OrchestratorAgent(
|
|
118
|
+
this.llmFacade,
|
|
119
|
+
this.toolRegistry,
|
|
120
|
+
this.orchestratorConfig,
|
|
121
|
+
this.progressReporter,
|
|
122
|
+
(message, level) => {
|
|
123
|
+
if (level === 'error') {
|
|
124
|
+
this.logError(message);
|
|
125
|
+
} else if (level === 'warn') {
|
|
126
|
+
this.log(`⚠️ ${message}`);
|
|
127
|
+
} else if (level === 'debug') {
|
|
128
|
+
if (this.debugMode) {
|
|
129
|
+
this.log(`🐛 ${message}`);
|
|
130
|
+
}
|
|
131
|
+
} else {
|
|
132
|
+
this.log(message);
|
|
133
|
+
}
|
|
134
|
+
},
|
|
135
|
+
this.debugMode // Pass debug mode
|
|
136
|
+
);
|
|
137
|
+
|
|
138
|
+
this.log(`✓ Orchestrator initialized with 5 tools${this.debugMode ? ' (DEBUG MODE)' : ''} (information-gathering only)`);
|
|
40
139
|
}
|
|
41
140
|
|
|
42
141
|
private log(message: string): void {
|
|
43
|
-
|
|
142
|
+
const timestamp = new Date().toISOString().substring(11, 23); // HH:MM:SS.mmm
|
|
143
|
+
const formattedMessage = `[${timestamp}] [ScenarioWorker] ${message}`;
|
|
144
|
+
// Always log to console for debug visibility
|
|
145
|
+
console.log(formattedMessage);
|
|
146
|
+
// Also route to outputChannel if provided
|
|
44
147
|
if (this.outputChannel) {
|
|
45
|
-
this.outputChannel.appendLine(
|
|
148
|
+
this.outputChannel.appendLine(formattedMessage);
|
|
46
149
|
}
|
|
47
150
|
}
|
|
48
151
|
|
|
49
152
|
private logError(message: string): void {
|
|
50
|
-
|
|
153
|
+
const timestamp = new Date().toISOString().substring(11, 23); // HH:MM:SS.mmm
|
|
154
|
+
const formattedMessage = `[${timestamp}] [ScenarioWorker] ERROR: ${message}`;
|
|
155
|
+
// Always log to console for debug visibility
|
|
156
|
+
console.error(formattedMessage);
|
|
157
|
+
// Also route to outputChannel if provided
|
|
51
158
|
if (this.outputChannel) {
|
|
52
|
-
this.outputChannel.appendLine(
|
|
159
|
+
this.outputChannel.appendLine(formattedMessage);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Capture screenshot as data URL
|
|
165
|
+
* Returns data:image/png;base64,... format
|
|
166
|
+
*/
|
|
167
|
+
private async captureStepScreenshot(page: any): Promise<string | undefined> {
|
|
168
|
+
try {
|
|
169
|
+
const screenshot = await page.screenshot({ type: 'png' });
|
|
170
|
+
const base64 = screenshot.toString('base64');
|
|
171
|
+
return `data:image/png;base64,${base64}`;
|
|
172
|
+
} catch (error) {
|
|
173
|
+
this.log(`Failed to capture screenshot: ${error}`);
|
|
174
|
+
return undefined;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Report step progress to progress reporter
|
|
180
|
+
*/
|
|
181
|
+
private async reportStepProgress(progress: StepProgress): Promise<void> {
|
|
182
|
+
// Report to progress reporter if available
|
|
183
|
+
await this.progressReporter?.onStepProgress?.(progress);
|
|
184
|
+
|
|
185
|
+
// Still emit events for backward compatibility
|
|
186
|
+
this.emit('stepProgress', progress);
|
|
187
|
+
|
|
188
|
+
// Also log for visibility
|
|
189
|
+
this.progressReporter?.log?.(
|
|
190
|
+
`Step ${progress.stepNumber} [${progress.status}]: ${progress.description}`,
|
|
191
|
+
progress.status === StepExecutionStatus.FAILURE ? 'error' : 'log'
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Report job progress to progress reporter
|
|
197
|
+
*/
|
|
198
|
+
private async reportJobProgress(progress: JobProgress): Promise<void> {
|
|
199
|
+
// Report to progress reporter if available
|
|
200
|
+
await this.progressReporter?.onJobProgress?.(progress);
|
|
201
|
+
|
|
202
|
+
// Still emit events for backward compatibility
|
|
203
|
+
this.emit('jobProgress', progress);
|
|
204
|
+
|
|
205
|
+
// Also log for visibility
|
|
206
|
+
this.progressReporter?.log?.(`Job ${progress.jobId}: ${progress.status}`);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Detect if a step is complex and benefits from proactive vision usage
|
|
211
|
+
* Complex steps: form filling, verification, navigation after actions
|
|
212
|
+
*/
|
|
213
|
+
private isComplexStep(stepDescription: string): boolean {
|
|
214
|
+
const description = stepDescription.toLowerCase();
|
|
215
|
+
|
|
216
|
+
// Verification steps - often need visual confirmation
|
|
217
|
+
if (description.includes('verify') || description.includes('check') ||
|
|
218
|
+
description.includes('confirm') || description.includes('ensure')) {
|
|
219
|
+
return true;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Form-related steps - multiple fields, complex interactions
|
|
223
|
+
if (description.includes('fill') && (description.includes('form') || description.includes('field'))) {
|
|
224
|
+
return true;
|
|
53
225
|
}
|
|
226
|
+
if (description.includes('enter') && description.includes('information')) {
|
|
227
|
+
return true;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Steps that typically follow navigation (page may still be loading)
|
|
231
|
+
if (description.includes('click') && (
|
|
232
|
+
description.includes('menu') ||
|
|
233
|
+
description.includes('tab') ||
|
|
234
|
+
description.includes('link')
|
|
235
|
+
)) {
|
|
236
|
+
return true;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Multi-step actions indicated by "and" or commas
|
|
240
|
+
if (description.includes(' and ') || description.split(',').length > 1) {
|
|
241
|
+
return true;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
return false;
|
|
54
245
|
}
|
|
55
246
|
|
|
56
247
|
async initialize(): Promise<void> {
|
|
57
248
|
try {
|
|
249
|
+
const RUNNER_CORE_VERSION = "v1.5.0-vision-preserve-values";
|
|
250
|
+
this.log('═══════════════════════════════════════════════════════');
|
|
251
|
+
this.log(`🚀 RUNNER-CORE VERSION: ${RUNNER_CORE_VERSION}`);
|
|
252
|
+
this.log('═══════════════════════════════════════════════════════');
|
|
58
253
|
this.log('Initializing Scenario worker...');
|
|
59
254
|
this.sessionId = `scenario_worker_${Date.now()}`;
|
|
60
255
|
this.initialized = true;
|
|
@@ -70,6 +265,14 @@ export class ScenarioWorker {
|
|
|
70
265
|
throw new Error('Scenario worker not initialized');
|
|
71
266
|
}
|
|
72
267
|
|
|
268
|
+
// Set current job ID for progress reporting
|
|
269
|
+
this.currentJobId = job.id;
|
|
270
|
+
|
|
271
|
+
// VERSION MARKER - increment this number with each significant change
|
|
272
|
+
const RUNNER_CORE_VERSION = "v1.5.0-vision-preserve-values";
|
|
273
|
+
this.log(`🚀 RUNNER-CORE VERSION: ${RUNNER_CORE_VERSION}`);
|
|
274
|
+
this.log(`📋 Processing scenario: ${job.scenario}`);
|
|
275
|
+
|
|
73
276
|
const startTime = Date.now();
|
|
74
277
|
const steps: ScenarioStep[] = [];
|
|
75
278
|
let generatedScript = '';
|
|
@@ -80,88 +283,381 @@ export class ScenarioWorker {
|
|
|
80
283
|
let overallSuccess = true;
|
|
81
284
|
|
|
82
285
|
try {
|
|
286
|
+
// Report job started
|
|
287
|
+
await this.reportJobProgress({
|
|
288
|
+
jobId: job.id,
|
|
289
|
+
status: 'started',
|
|
290
|
+
testName: job.testName
|
|
291
|
+
});
|
|
292
|
+
|
|
83
293
|
// 1. Break down scenario into steps using LLM
|
|
84
294
|
const scenarioSteps = await this.llmFacade.breakdownScenario(job.scenario, job.model);
|
|
85
295
|
steps.push(...scenarioSteps);
|
|
296
|
+
|
|
297
|
+
// Report total steps
|
|
298
|
+
await this.reportJobProgress({
|
|
299
|
+
jobId: job.id,
|
|
300
|
+
status: 'in_progress',
|
|
301
|
+
testName: job.testName,
|
|
302
|
+
totalSteps: steps.length,
|
|
303
|
+
currentStep: 0
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
// Emit log events for steps breakdown
|
|
307
|
+
this.emit('log', job.id, `\n## Steps Identified (${steps.length} total)\n`);
|
|
308
|
+
for (const step of steps) {
|
|
309
|
+
this.emit('log', job.id, `${step.stepNumber}. ${step.description}\n`);
|
|
310
|
+
}
|
|
311
|
+
this.emit('log', job.id, `\n## Execution Progress\n\n`);
|
|
86
312
|
|
|
87
313
|
// 2. Start a new browser session using centralized utility
|
|
88
314
|
// Default to headed mode (headless: false) for better debugging
|
|
89
|
-
|
|
315
|
+
// Create logger function from outputChannel for browser initialization
|
|
316
|
+
const logger = this.outputChannel ? (message: string, level?: 'log' | 'error' | 'warn') => {
|
|
317
|
+
this.outputChannel!.appendLine(`[Browser] ${message}`);
|
|
318
|
+
} : undefined;
|
|
319
|
+
const browserInstance = await initializeBrowser(job.playwrightConfig, false, undefined, logger);
|
|
90
320
|
browser = browserInstance.browser;
|
|
91
321
|
context = browserInstance.context;
|
|
92
322
|
page = browserInstance.page;
|
|
93
323
|
|
|
94
|
-
// Set reasonable timeout for
|
|
95
|
-
|
|
324
|
+
// Set reasonable timeout for most operations
|
|
325
|
+
// 5 seconds for element interactions (fast feedback on wrong selectors)
|
|
326
|
+
// Navigation operations should use explicit longer timeouts
|
|
327
|
+
page.setDefaultTimeout(5000);
|
|
96
328
|
|
|
97
329
|
let previousSteps: ScenarioStep[] = [];
|
|
98
330
|
let lastError: string | undefined;
|
|
331
|
+
let consecutiveFailures = 0;
|
|
332
|
+
const MAX_CONSECUTIVE_FAILURES = this.orchestratorConfig?.maxConsecutiveStepFailures || 3;
|
|
333
|
+
const CONTINUE_ON_FAILURE = this.orchestratorConfig?.continueOnStepFailure !== false; // Default true
|
|
99
334
|
|
|
100
|
-
//
|
|
101
|
-
|
|
335
|
+
// 3a. ORCHESTRATOR MODE - Use orchestrator agent for execution
|
|
336
|
+
if (this.useOrchestrator && this.orchestratorAgent) {
|
|
337
|
+
this.log(`🤖 Using Orchestrator Mode (continueOnFailure: ${CONTINUE_ON_FAILURE})`);
|
|
338
|
+
|
|
339
|
+
// Initialize journey memory
|
|
340
|
+
const memory: JourneyMemory = {
|
|
341
|
+
history: [],
|
|
342
|
+
experiences: [],
|
|
343
|
+
extractedData: {}
|
|
344
|
+
};
|
|
345
|
+
|
|
346
|
+
// Execute steps using orchestrator
|
|
347
|
+
for (let i = 0; i < steps.length; i++) {
|
|
348
|
+
// Only stop if consecutive failures exceed limit AND continueOnFailure is false
|
|
349
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES && !CONTINUE_ON_FAILURE) {
|
|
350
|
+
this.log(`🛑 Stopping execution: ${consecutiveFailures} consecutive failures`);
|
|
351
|
+
// Mark remaining steps as skipped
|
|
352
|
+
for (let j = i; j < steps.length; j++) {
|
|
353
|
+
steps[j].success = false;
|
|
354
|
+
steps[j].error = `Skipped due to ${consecutiveFailures} consecutive failures`;
|
|
355
|
+
steps[j].playwrightCommands = [];
|
|
356
|
+
}
|
|
357
|
+
overallSuccess = false;
|
|
358
|
+
break;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// Warn if approaching limit (even with continueOnFailure)
|
|
362
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES && CONTINUE_ON_FAILURE) {
|
|
363
|
+
this.log(`⚠️ ${consecutiveFailures} consecutive failures - continuing but scenario may have issues`);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
const step = steps[i];
|
|
367
|
+
step.stepNumber = i + 1;
|
|
368
|
+
|
|
369
|
+
try {
|
|
370
|
+
// Use orchestrator to execute this step
|
|
371
|
+
const result = await this.orchestratorAgent.executeStep(
|
|
372
|
+
page,
|
|
373
|
+
step.description,
|
|
374
|
+
step.stepNumber,
|
|
375
|
+
steps.length,
|
|
376
|
+
steps.map(s => s.description),
|
|
377
|
+
memory,
|
|
378
|
+
job.id
|
|
379
|
+
);
|
|
380
|
+
|
|
381
|
+
// Update step with result
|
|
382
|
+
step.success = result.success;
|
|
383
|
+
step.playwrightCommands = result.commands;
|
|
384
|
+
step.error = result.error;
|
|
385
|
+
|
|
386
|
+
if (result.success) {
|
|
387
|
+
this.log(`✓ Step ${step.stepNumber} completed via orchestrator (${result.iterations} iterations)`);
|
|
388
|
+
consecutiveFailures = 0;
|
|
389
|
+
} else {
|
|
390
|
+
this.log(`✗ Step ${step.stepNumber} failed via orchestrator: ${result.terminationReason}`);
|
|
391
|
+
consecutiveFailures++;
|
|
392
|
+
overallSuccess = false;
|
|
393
|
+
|
|
394
|
+
// CRITICAL: Stop on agent_stuck or infeasible (explicit agent decision)
|
|
395
|
+
// continueOnStepFailure only applies to command failures, not agent decisions
|
|
396
|
+
if (result.terminationReason === 'agent_stuck' || result.terminationReason === 'infeasible') {
|
|
397
|
+
this.log(`🛑 Stopping: Agent declared step ${result.terminationReason} - cannot continue`);
|
|
398
|
+
// Mark remaining steps as skipped
|
|
399
|
+
for (let j = i + 1; j < steps.length; j++) {
|
|
400
|
+
steps[j].success = false;
|
|
401
|
+
steps[j].error = `Skipped: Previous step was ${result.terminationReason}`;
|
|
402
|
+
steps[j].playwrightCommands = [];
|
|
403
|
+
}
|
|
404
|
+
break; // Exit loop
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
} catch (error: any) {
|
|
409
|
+
this.logError(`Orchestrator execution failed for step ${step.stepNumber}: ${error.message}`);
|
|
410
|
+
step.success = false;
|
|
411
|
+
step.error = error.message;
|
|
412
|
+
consecutiveFailures++;
|
|
413
|
+
overallSuccess = false;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
previousSteps.push(step);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
} else {
|
|
420
|
+
// 3b. LEGACY MODE - Use existing retry loop
|
|
421
|
+
// Execute each step (steps may require multiple commands)
|
|
422
|
+
for (let i = 0; i < steps.length; i++) {
|
|
423
|
+
// Check if we should stop execution due to consecutive failures
|
|
424
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
425
|
+
this.log(`🛑 Stopping execution: ${consecutiveFailures} consecutive failures detected`);
|
|
426
|
+
this.log(` Remaining ${steps.length - i} steps will be skipped to avoid wasting resources`);
|
|
427
|
+
|
|
428
|
+
// Emit log events about early termination
|
|
429
|
+
this.emit('log', job.id, `\n🛑 EARLY TERMINATION\n`);
|
|
430
|
+
this.emit('log', job.id, `Reason: ${consecutiveFailures} consecutive step failures\n`);
|
|
431
|
+
this.emit('log', job.id, `Steps attempted: ${i}\n`);
|
|
432
|
+
this.emit('log', job.id, `Steps skipped: ${steps.length - i}\n\n`);
|
|
433
|
+
|
|
434
|
+
// Mark remaining steps as skipped
|
|
435
|
+
for (let j = i; j < steps.length; j++) {
|
|
436
|
+
const skippedStep = steps[j];
|
|
437
|
+
skippedStep.stepNumber = j + 1;
|
|
438
|
+
skippedStep.success = false;
|
|
439
|
+
skippedStep.error = `Skipped due to ${consecutiveFailures} consecutive failures in previous steps`;
|
|
440
|
+
skippedStep.playwrightCommands = [];
|
|
441
|
+
previousSteps.push(skippedStep);
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
overallSuccess = false;
|
|
445
|
+
break; // Exit the loop
|
|
446
|
+
}
|
|
102
447
|
const step = steps[i];
|
|
103
448
|
step.stepNumber = i + 1;
|
|
104
449
|
step.retryCount = 0;
|
|
450
|
+
step.subActions = [];
|
|
451
|
+
// Force new array and clear any previous command data
|
|
452
|
+
step.playwrightCommands = [];
|
|
453
|
+
step.playwrightCommand = undefined;
|
|
105
454
|
|
|
106
455
|
let stepSuccess = false;
|
|
107
|
-
let stepOutput = '';
|
|
108
456
|
let stepError: string | undefined;
|
|
457
|
+
let subActionCount = 0;
|
|
458
|
+
let stepComplete = false;
|
|
459
|
+
let totalFailedAttemptsForStep = 0; // Track FAILED attempts across all sub-actions
|
|
460
|
+
|
|
461
|
+
// Detect if this is a complex step that benefits from proactive vision
|
|
462
|
+
const isComplexStep = this.isComplexStep(step.description);
|
|
463
|
+
|
|
464
|
+
// A step might need multiple commands (sub-actions) to complete
|
|
465
|
+
while (!stepComplete && subActionCount < MAX_SUBACTIONS_PER_STEP && totalFailedAttemptsForStep < MAX_FAILED_ATTEMPTS_PER_STEP) {
|
|
466
|
+
let subActionSuccess = false;
|
|
467
|
+
let subActionCommand: string | undefined;
|
|
468
|
+
let subActionError: string | undefined;
|
|
469
|
+
let subActionRetries = 0;
|
|
470
|
+
let usedVisionMode = false;
|
|
471
|
+
|
|
472
|
+
// Build context about what's been done so far in this step
|
|
473
|
+
const stepContext = step.subActions && step.subActions.length > 0
|
|
474
|
+
? `\nSub-actions completed so far for this step:\n${step.subActions.map((sa, idx) => ` ${idx + 1}. ${sa.command} - ${sa.success ? 'SUCCESS' : 'FAILED'}`).join('\n')}`
|
|
475
|
+
: '';
|
|
476
|
+
|
|
477
|
+
for (let attempt = 0; attempt <= MAX_RETRIES_PER_STEP; attempt++) {
|
|
478
|
+
// Check if we've exceeded failed attempts budget BEFORE attempting
|
|
479
|
+
if (totalFailedAttemptsForStep >= MAX_FAILED_ATTEMPTS_PER_STEP) {
|
|
480
|
+
this.log(` ⚠️ Exceeded failed attempts budget (${MAX_FAILED_ATTEMPTS_PER_STEP}) for this step`);
|
|
481
|
+
stepComplete = true;
|
|
482
|
+
stepSuccess = false;
|
|
483
|
+
stepError = `Exceeded maximum failed attempts (${MAX_FAILED_ATTEMPTS_PER_STEP}) for step`;
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
let currentAttemptCommand: string | undefined;
|
|
488
|
+
let currentAttemptSuccess = false;
|
|
489
|
+
let currentAttemptError: string | undefined;
|
|
490
|
+
const attemptTimestamp = Date.now();
|
|
491
|
+
|
|
492
|
+
try {
|
|
493
|
+
this.log(`Step ${step.stepNumber} - Sub-action ${subActionCount + 1}, Attempt ${attempt + 1}: ${step.description}`);
|
|
494
|
+
|
|
495
|
+
// Get current page state - handle navigation in progress
|
|
496
|
+
let domSnapshot;
|
|
497
|
+
let pageInfo;
|
|
498
|
+
try {
|
|
499
|
+
domSnapshot = {
|
|
500
|
+
url: page.url(),
|
|
501
|
+
title: await page.title(),
|
|
502
|
+
accessibilityTree: await page.accessibility.snapshot()
|
|
503
|
+
};
|
|
504
|
+
pageInfo = await getEnhancedPageInfo(domSnapshot);
|
|
505
|
+
} catch (contextError: any) {
|
|
506
|
+
// If execution context was destroyed (navigation in progress), wait and retry
|
|
507
|
+
if (contextError.message && contextError.message.includes('Execution context was destroyed')) {
|
|
508
|
+
this.log(` ⏳ Navigation in progress, waiting for page to load...`);
|
|
509
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 5000 }).catch(() => {});
|
|
510
|
+
// Retry page state capture
|
|
511
|
+
domSnapshot = {
|
|
512
|
+
url: page.url(),
|
|
513
|
+
title: await page.title(),
|
|
514
|
+
accessibilityTree: await page.accessibility.snapshot()
|
|
515
|
+
};
|
|
516
|
+
pageInfo = await getEnhancedPageInfo(domSnapshot);
|
|
517
|
+
} else {
|
|
518
|
+
throw contextError; // Re-throw if it's not a navigation issue
|
|
519
|
+
}
|
|
520
|
+
}
|
|
109
521
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
522
|
+
// Vision trigger: Liberal usage since gpt-5-mini vision is cost-effective
|
|
523
|
+
const modelToUse = job.model || DEFAULT_MODEL;
|
|
524
|
+
let command: string | null;
|
|
525
|
+
|
|
526
|
+
// Enhanced logging for vision trigger logic
|
|
527
|
+
this.log(` 🔍 Vision trigger check: subAction=${subActionCount + 1}, attempt=${attempt}, totalFailed=${totalFailedAttemptsForStep}, usedVision=${usedVisionMode}`);
|
|
528
|
+
|
|
529
|
+
// Liberal vision strategy (gpt-5-mini is cost-effective):
|
|
530
|
+
// 1. After ANY failure (1+) → use vision
|
|
531
|
+
// 2. Complex steps → use vision from attempt 1
|
|
532
|
+
// 3. No LLM assessment gate → go directly to vision
|
|
533
|
+
const hasFailure = totalFailedAttemptsForStep >= 1 && lastError;
|
|
534
|
+
const shouldUseProactiveVision = isComplexStep && attempt === 0; // First attempt for complex steps
|
|
535
|
+
const shouldUseVision = (hasFailure || shouldUseProactiveVision) && !usedVisionMode;
|
|
536
|
+
|
|
537
|
+
if (shouldUseVision) {
|
|
538
|
+
if (shouldUseProactiveVision) {
|
|
539
|
+
this.log(` 🎯 PROACTIVE VISION: Complex step detected, using vision from first attempt`);
|
|
540
|
+
} else {
|
|
541
|
+
this.log(` 🎯 VISION TRIGGER: ${totalFailedAttemptsForStep} failure(s) detected, using vision (no LLM gate)`);
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
// Two-step supervisor pattern:
|
|
545
|
+
// 1. Supervisor analyzes screenshot and provides instructions
|
|
546
|
+
// 2. Worker generates command based on those instructions
|
|
547
|
+
|
|
548
|
+
this.log(` 📸 Taking screenshot for supervisor analysis...`);
|
|
549
|
+
|
|
550
|
+
// Capture optimized screenshot using utility method
|
|
551
|
+
const imageDataUrl = await captureOptimizedScreenshot(
|
|
552
|
+
page,
|
|
553
|
+
{ timeout: 10000 }, // Uses default quality 60
|
|
554
|
+
(msg) => this.log(msg)
|
|
555
|
+
);
|
|
556
|
+
|
|
557
|
+
this.log(` 👔 STEP 1: Supervisor analyzing screenshot (${VISION_MODEL})...`);
|
|
558
|
+
const supervisorDiagnostics = await this.llmFacade.getVisionDiagnostics(
|
|
559
|
+
step.description + stepContext,
|
|
560
|
+
pageInfo,
|
|
561
|
+
previousSteps,
|
|
562
|
+
lastError,
|
|
563
|
+
imageDataUrl,
|
|
564
|
+
VISION_MODEL
|
|
565
|
+
);
|
|
566
|
+
|
|
567
|
+
// DEBUG: Log vision diagnostics
|
|
568
|
+
this.log(` 📸 Visual insights: ${supervisorDiagnostics.visualAnalysis}`);
|
|
569
|
+
this.log(` 🔍 Root cause: ${supervisorDiagnostics.rootCause}`);
|
|
570
|
+
this.log(` 💡 Recommended approach: ${supervisorDiagnostics.recommendedApproach}`);
|
|
571
|
+
if (supervisorDiagnostics.elementsFound.length > 0) {
|
|
572
|
+
this.log(` ✅ Elements found: ${supervisorDiagnostics.elementsFound.join(', ')}`);
|
|
573
|
+
}
|
|
574
|
+
if (supervisorDiagnostics.elementsNotFound.length > 0) {
|
|
575
|
+
this.log(` ❌ Elements not found: ${supervisorDiagnostics.elementsNotFound.join(', ')}`);
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
this.log(` 🔨 STEP 2: Worker generating command from supervisor instructions (${DEFAULT_MODEL})...`);
|
|
579
|
+
command = await this.llmFacade.generateCommandFromSupervisorInstructions(
|
|
580
|
+
step.description + stepContext,
|
|
581
|
+
supervisorDiagnostics,
|
|
582
|
+
pageInfo,
|
|
583
|
+
modelToUse // Cheaper model for command generation
|
|
584
|
+
);
|
|
585
|
+
usedVisionMode = true;
|
|
586
|
+
} else {
|
|
587
|
+
// Not using vision - use regular DOM-based approach
|
|
588
|
+
if (usedVisionMode) {
|
|
589
|
+
this.log(` 📝 Vision already used - using DOM-based approach`);
|
|
590
|
+
} else if (isComplexStep) {
|
|
591
|
+
this.log(` 📝 Complex step, but first attempt - using DOM-based approach (vision on retry)`);
|
|
592
|
+
} else {
|
|
593
|
+
this.log(` 📝 Using DOM-based approach (${totalFailedAttemptsForStep} failures so far)`);
|
|
594
|
+
}
|
|
595
|
+
const stepDescriptionWithContext = step.description + stepContext;
|
|
596
|
+
command = await this.llmFacade.generatePlaywrightCommand(
|
|
597
|
+
stepDescriptionWithContext,
|
|
598
|
+
pageInfo,
|
|
599
|
+
previousSteps,
|
|
600
|
+
lastError,
|
|
601
|
+
step,
|
|
602
|
+
modelToUse
|
|
603
|
+
);
|
|
604
|
+
}
|
|
129
605
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
606
|
+
if (!command) {
|
|
607
|
+
throw new Error('LLM failed to generate a Playwright command.');
|
|
608
|
+
}
|
|
133
609
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
this.log(` Command: ${command}`);
|
|
610
|
+
currentAttemptCommand = command;
|
|
611
|
+
this.log(` Command: ${command}`);
|
|
137
612
|
|
|
138
|
-
|
|
139
|
-
|
|
613
|
+
// Execute the command
|
|
614
|
+
await this.executePlaywrightCommand(page, browser, context, command);
|
|
140
615
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
616
|
+
// Success
|
|
617
|
+
subActionSuccess = true;
|
|
618
|
+
currentAttemptSuccess = true;
|
|
619
|
+
subActionCommand = command;
|
|
620
|
+
step.playwrightCommands!.push(command);
|
|
621
|
+
this.log(` ✅ SUCCESS: ${command}${usedVisionMode ? ' (vision-aided)' : ''}`);
|
|
622
|
+
|
|
623
|
+
// Wait a bit for any navigation that might have been triggered
|
|
624
|
+
// This prevents "Execution context destroyed" errors when checking goal completion
|
|
625
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 3000 }).catch(() => {
|
|
626
|
+
// Ignore timeout - page might not be navigating
|
|
627
|
+
});
|
|
628
|
+
|
|
629
|
+
break; // Sub-action successful, check if step is complete
|
|
147
630
|
} catch (error: any) {
|
|
148
|
-
|
|
149
|
-
currentAttemptError =
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
631
|
+
subActionError = error instanceof Error ? error.message : String(error);
|
|
632
|
+
currentAttemptError = subActionError;
|
|
633
|
+
|
|
634
|
+
// Get current URL for context (especially useful for navigation failures)
|
|
635
|
+
let currentUrl = 'unknown';
|
|
636
|
+
try {
|
|
637
|
+
currentUrl = page.url();
|
|
638
|
+
} catch (e) {
|
|
639
|
+
// Ignore if we can't get URL
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
// Enhanced error message with current URL
|
|
643
|
+
const errorWithContext = `${subActionError} | Current URL: ${currentUrl}`;
|
|
644
|
+
|
|
645
|
+
this.logError(` ❌ FAILED (attempt ${attempt + 1}): ${subActionError}`);
|
|
646
|
+
this.logError(` Current URL: ${currentUrl}`);
|
|
647
|
+
this.logError(` Command attempted: ${currentAttemptCommand || 'N/A'}`);
|
|
648
|
+
subActionRetries++;
|
|
649
|
+
totalFailedAttemptsForStep++; // Increment failed attempts counter
|
|
153
650
|
|
|
154
651
|
// Only update lastError if this is the final attempt
|
|
155
652
|
if (attempt === MAX_RETRIES_PER_STEP) {
|
|
156
|
-
lastError =
|
|
653
|
+
lastError = errorWithContext; // Include URL in error context for LLM
|
|
157
654
|
}
|
|
158
655
|
|
|
159
|
-
// If this is the last attempt, mark as failed
|
|
656
|
+
// If this is the last attempt, mark sub-action as failed
|
|
160
657
|
if (attempt === MAX_RETRIES_PER_STEP) {
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
console.error(` 🚫 STEP FAILED after ${MAX_RETRIES_PER_STEP + 1} attempts`);
|
|
658
|
+
subActionSuccess = false;
|
|
659
|
+
subActionCommand = currentAttemptCommand;
|
|
660
|
+
this.logError(` 🚫 SUB-ACTION FAILED after ${MAX_RETRIES_PER_STEP + 1} attempts (including vision mode if used)`);
|
|
165
661
|
break; // Exit retry loop
|
|
166
662
|
}
|
|
167
663
|
} finally {
|
|
@@ -178,10 +674,158 @@ export class ScenarioWorker {
|
|
|
178
674
|
}
|
|
179
675
|
}
|
|
180
676
|
|
|
677
|
+
// Record the sub-action
|
|
678
|
+
if (subActionCommand) {
|
|
679
|
+
step.subActions!.push({
|
|
680
|
+
command: subActionCommand,
|
|
681
|
+
success: subActionSuccess,
|
|
682
|
+
error: subActionError,
|
|
683
|
+
retryCount: subActionRetries
|
|
684
|
+
});
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
subActionCount++;
|
|
688
|
+
|
|
689
|
+
// Determine if step (goal) is complete
|
|
690
|
+
if (subActionSuccess) {
|
|
691
|
+
// After each successful sub-action, ask LLM if goal is complete
|
|
692
|
+
if (subActionCount >= MAX_SUBACTIONS_PER_STEP) {
|
|
693
|
+
// Safety limit - avoid infinite loops
|
|
694
|
+
stepComplete = true;
|
|
695
|
+
stepSuccess = true;
|
|
696
|
+
this.log(` ⚠️ Reached max sub-actions limit (${MAX_SUBACTIONS_PER_STEP}) with ${totalFailedAttemptsForStep} failed attempts, considering step complete`);
|
|
697
|
+
} else {
|
|
698
|
+
// Ask LLM if goal is complete
|
|
699
|
+
try {
|
|
700
|
+
// Capture page state - if navigation is still happening, retry once
|
|
701
|
+
let domSnapshot;
|
|
702
|
+
let pageInfo;
|
|
703
|
+
try {
|
|
704
|
+
domSnapshot = {
|
|
705
|
+
url: page.url(),
|
|
706
|
+
title: await page.title(),
|
|
707
|
+
accessibilityTree: await page.accessibility.snapshot()
|
|
708
|
+
};
|
|
709
|
+
pageInfo = await getEnhancedPageInfo(domSnapshot);
|
|
710
|
+
} catch (contextError: any) {
|
|
711
|
+
// If execution context was destroyed (navigation in progress), wait and retry
|
|
712
|
+
if (contextError.message && contextError.message.includes('Execution context was destroyed')) {
|
|
713
|
+
this.log(` ⏳ Navigation detected, waiting for page to load...`);
|
|
714
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 5000 }).catch(() => {});
|
|
715
|
+
// Retry page state capture
|
|
716
|
+
domSnapshot = {
|
|
717
|
+
url: page.url(),
|
|
718
|
+
title: await page.title(),
|
|
719
|
+
accessibilityTree: await page.accessibility.snapshot()
|
|
720
|
+
};
|
|
721
|
+
pageInfo = await getEnhancedPageInfo(domSnapshot);
|
|
722
|
+
} else {
|
|
723
|
+
throw contextError; // Re-throw if it's not a navigation issue
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
// Vision-backed goal completion for complex/verification steps
|
|
728
|
+
const shouldUseVisionForCompletion = isComplexStep && subActionCount >= 1; // At least one action done
|
|
729
|
+
let completionCheck;
|
|
730
|
+
|
|
731
|
+
if (shouldUseVisionForCompletion) {
|
|
732
|
+
this.log(` 🎯 Vision-backed goal completion check (complex step)`);
|
|
733
|
+
|
|
734
|
+
// Capture screenshot for visual verification
|
|
735
|
+
const imageDataUrl = await captureOptimizedScreenshot(
|
|
736
|
+
page,
|
|
737
|
+
{ timeout: 10000 },
|
|
738
|
+
(msg) => this.log(msg)
|
|
739
|
+
);
|
|
740
|
+
|
|
741
|
+
// Use vision model to check goal completion with visual context
|
|
742
|
+
completionCheck = await this.llmFacade.checkGoalCompletionWithVision(
|
|
743
|
+
step.description,
|
|
744
|
+
step.playwrightCommands || [],
|
|
745
|
+
pageInfo,
|
|
746
|
+
imageDataUrl,
|
|
747
|
+
VISION_MODEL
|
|
748
|
+
);
|
|
749
|
+
} else {
|
|
750
|
+
// Regular DOM-based goal completion check
|
|
751
|
+
completionCheck = await this.llmFacade.checkGoalCompletion(
|
|
752
|
+
step.description,
|
|
753
|
+
step.playwrightCommands || [],
|
|
754
|
+
pageInfo,
|
|
755
|
+
job.model || DEFAULT_MODEL
|
|
756
|
+
);
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
this.log(` 🎯 Goal completion check: ${completionCheck.isComplete ? 'COMPLETE' : 'INCOMPLETE'} - ${completionCheck.reason}`);
|
|
760
|
+
|
|
761
|
+
if (completionCheck.isComplete) {
|
|
762
|
+
stepComplete = true;
|
|
763
|
+
stepSuccess = true;
|
|
764
|
+
} else {
|
|
765
|
+
// Continue with next sub-action
|
|
766
|
+
if (completionCheck.nextSubGoal) {
|
|
767
|
+
this.log(` 📍 Next sub-goal: ${completionCheck.nextSubGoal}`);
|
|
768
|
+
}
|
|
769
|
+
// Continue looping to generate next command
|
|
770
|
+
}
|
|
771
|
+
} catch (error) {
|
|
772
|
+
this.logError(`Error checking goal completion: ${error}`);
|
|
773
|
+
// Fallback: consider complete after 1 successful sub-action if we can't check
|
|
774
|
+
stepComplete = true;
|
|
775
|
+
stepSuccess = true;
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
} else {
|
|
779
|
+
// Sub-action failed
|
|
780
|
+
stepComplete = true; // Move on after failure
|
|
781
|
+
stepSuccess = false;
|
|
782
|
+
stepError = subActionError;
|
|
783
|
+
overallSuccess = false;
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
// Set the step's final command (last successful or aggregate)
|
|
788
|
+
if (step.playwrightCommands && step.playwrightCommands.length > 0) {
|
|
789
|
+
step.playwrightCommand = step.playwrightCommands[step.playwrightCommands.length - 1];
|
|
790
|
+
}
|
|
791
|
+
|
|
181
792
|
step.success = stepSuccess;
|
|
182
793
|
step.error = stepError;
|
|
183
794
|
previousSteps.push(step);
|
|
795
|
+
|
|
796
|
+
// Update consecutive failure counter
|
|
797
|
+
if (stepSuccess) {
|
|
798
|
+
consecutiveFailures = 0; // Reset on success
|
|
799
|
+
} else {
|
|
800
|
+
consecutiveFailures++;
|
|
801
|
+
this.log(`⚠️ Consecutive failures: ${consecutiveFailures}/${MAX_CONSECUTIVE_FAILURES}`);
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
// Emit step result log events
|
|
805
|
+
this.emit('log', job.id, `### Step ${step.stepNumber}: ${step.description}\n`);
|
|
806
|
+
this.emit('log', job.id, `Status: ${stepSuccess ? '✅ SUCCESS' : '❌ FAILED'}\n`);
|
|
807
|
+
this.emit('log', job.id, `Sub-actions: ${subActionCount}\n`);
|
|
808
|
+
this.emit('log', job.id, `Failed attempts: ${totalFailedAttemptsForStep}\n`);
|
|
809
|
+
|
|
810
|
+
if (step.playwrightCommands && step.playwrightCommands.length > 0) {
|
|
811
|
+
this.emit('log', job.id, `Commands:\n`);
|
|
812
|
+
step.playwrightCommands.forEach((cmd, idx) => {
|
|
813
|
+
this.emit('log', job.id, ` ${idx + 1}. ${cmd}\n`);
|
|
814
|
+
});
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
if (stepError) {
|
|
818
|
+
this.emit('log', job.id, `Error: ${stepError}\n`);
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
if (step.attempts && step.attempts.length > 0) {
|
|
822
|
+
this.emit('log', job.id, `Total attempts: ${step.attempts.length}\n`);
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
this.emit('log', job.id, `\n`);
|
|
184
826
|
}
|
|
827
|
+
|
|
828
|
+
} // End of else block (legacy mode)
|
|
185
829
|
|
|
186
830
|
// Generate test name if not provided
|
|
187
831
|
const testName = job.testName || await this.llmFacade.generateTestName(job.scenario, job.model);
|
|
@@ -216,6 +860,27 @@ export class ScenarioWorker {
|
|
|
216
860
|
// Generate clean script with TestChimp comment and code
|
|
217
861
|
generatedScript = generateTestScript(testName, steps, undefined, hashtags);
|
|
218
862
|
|
|
863
|
+
// Perform final cleanup pass to remove redundancies and make minor adjustments
|
|
864
|
+
this.log(`[ScenarioWorker] Performing final script cleanup...`);
|
|
865
|
+
try {
|
|
866
|
+
const cleanupResult = await this.llmFacade.cleanupScript(generatedScript, job.model);
|
|
867
|
+
|
|
868
|
+
if (cleanupResult.changes && cleanupResult.changes.length > 0) {
|
|
869
|
+
this.log(`[ScenarioWorker] Cleanup made ${cleanupResult.changes.length} improvement(s):`);
|
|
870
|
+
cleanupResult.changes.forEach((change, i) => {
|
|
871
|
+
this.log(`[ScenarioWorker] ${i + 1}. ${change}`);
|
|
872
|
+
});
|
|
873
|
+
generatedScript = cleanupResult.script;
|
|
874
|
+
} else if (cleanupResult.skipped) {
|
|
875
|
+
this.log(`[ScenarioWorker] Cleanup skipped: ${cleanupResult.skipped}`);
|
|
876
|
+
} else {
|
|
877
|
+
this.log(`[ScenarioWorker] Cleanup completed - no changes needed`);
|
|
878
|
+
}
|
|
879
|
+
} catch (error: any) {
|
|
880
|
+
this.log(`[ScenarioWorker] Cleanup failed, using original script: ${error.message}`);
|
|
881
|
+
// Continue with original script on error
|
|
882
|
+
}
|
|
883
|
+
|
|
219
884
|
// Generate detailed execution log
|
|
220
885
|
const logLines: string[] = [];
|
|
221
886
|
logLines.push(`# Scenario Execution Log`);
|
|
@@ -225,6 +890,14 @@ export class ScenarioWorker {
|
|
|
225
890
|
logLines.push(`End Time: ${new Date().toISOString()}`);
|
|
226
891
|
logLines.push(`Total Execution Time: ${Date.now() - startTime}ms`);
|
|
227
892
|
logLines.push(`Overall Success: ${overallSuccess ? 'YES' : 'NO'}`);
|
|
893
|
+
|
|
894
|
+
// Add early termination info if applicable
|
|
895
|
+
const skippedSteps = steps.filter(s => s.error?.includes('Skipped due to'));
|
|
896
|
+
if (skippedSteps.length > 0) {
|
|
897
|
+
logLines.push(`Early Termination: YES (${consecutiveFailures} consecutive failures)`);
|
|
898
|
+
logLines.push(`Steps Skipped: ${skippedSteps.length}`);
|
|
899
|
+
}
|
|
900
|
+
|
|
228
901
|
logLines.push(``);
|
|
229
902
|
|
|
230
903
|
for (const step of steps) {
|
|
@@ -270,7 +943,7 @@ export class ScenarioWorker {
|
|
|
270
943
|
|
|
271
944
|
} catch (error: any) {
|
|
272
945
|
overallSuccess = false;
|
|
273
|
-
|
|
946
|
+
this.logError(`Overall scenario processing error: ${error}`);
|
|
274
947
|
return {
|
|
275
948
|
success: false,
|
|
276
949
|
steps,
|
|
@@ -299,19 +972,14 @@ export class ScenarioWorker {
|
|
|
299
972
|
context: any,
|
|
300
973
|
command: string
|
|
301
974
|
): Promise<void> {
|
|
302
|
-
//
|
|
303
|
-
page.setDefaultTimeout(5000);
|
|
975
|
+
// Keep default timeout for element operations
|
|
976
|
+
page.setDefaultTimeout(5000);
|
|
304
977
|
|
|
305
978
|
try {
|
|
306
979
|
// Execute command directly without validation
|
|
307
980
|
const commandFunction = new Function('page', 'browser', 'context', 'expect', `
|
|
308
981
|
return (async () => {
|
|
309
|
-
|
|
310
|
-
${command}
|
|
311
|
-
} catch (error) {
|
|
312
|
-
console.error('Command execution error:', error);
|
|
313
|
-
throw error;
|
|
314
|
-
}
|
|
982
|
+
${command}
|
|
315
983
|
})();
|
|
316
984
|
`);
|
|
317
985
|
|
|
@@ -320,8 +988,8 @@ export class ScenarioWorker {
|
|
|
320
988
|
await commandFunction(page, browser, context, expect);
|
|
321
989
|
|
|
322
990
|
} finally {
|
|
323
|
-
//
|
|
324
|
-
page.setDefaultTimeout(
|
|
991
|
+
// Ensure timeout is consistent
|
|
992
|
+
page.setDefaultTimeout(5000);
|
|
325
993
|
}
|
|
326
994
|
}
|
|
327
995
|
|