testchimp-runner-core 0.0.35 → 0.0.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -1
- package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
- package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
- package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
- package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
- package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
- package/plandocs/INTEGRATION_COMPLETE.md +0 -322
- package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
- package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
- package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
- package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
- package/plandocs/PHASE_1_COMPLETE.md +0 -165
- package/plandocs/PHASE_1_SUMMARY.md +0 -184
- package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
- package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
- package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
- package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
- package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
- package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
- package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
- package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
- package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
- package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
- package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
- package/plandocs/exploratory-mode-support-v2.plan.md +0 -953
- package/plandocs/exploratory-mode-support.plan.md +0 -928
- package/plandocs/journey-id-tracking-addendum.md +0 -227
- package/releasenotes/RELEASE_0.0.26.md +0 -165
- package/releasenotes/RELEASE_0.0.27.md +0 -236
- package/releasenotes/RELEASE_0.0.28.md +0 -286
- package/src/auth-config.ts +0 -84
- package/src/credit-usage-service.ts +0 -188
- package/src/env-loader.ts +0 -103
- package/src/execution-service.ts +0 -996
- package/src/file-handler.ts +0 -104
- package/src/index.ts +0 -432
- package/src/llm-facade.ts +0 -821
- package/src/llm-provider.ts +0 -53
- package/src/model-constants.ts +0 -35
- package/src/orchestrator/decision-parser.ts +0 -139
- package/src/orchestrator/index.ts +0 -58
- package/src/orchestrator/orchestrator-agent.ts +0 -1282
- package/src/orchestrator/orchestrator-prompts.ts +0 -786
- package/src/orchestrator/page-som-handler.ts +0 -1565
- package/src/orchestrator/som-types.ts +0 -188
- package/src/orchestrator/tool-registry.ts +0 -184
- package/src/orchestrator/tools/check-page-ready.ts +0 -75
- package/src/orchestrator/tools/extract-data.ts +0 -92
- package/src/orchestrator/tools/index.ts +0 -15
- package/src/orchestrator/tools/inspect-page.ts +0 -42
- package/src/orchestrator/tools/recall-history.ts +0 -72
- package/src/orchestrator/tools/refresh-som-markers.ts +0 -69
- package/src/orchestrator/tools/take-screenshot.ts +0 -128
- package/src/orchestrator/tools/verify-action-result.ts +0 -159
- package/src/orchestrator/tools/view-previous-screenshot.ts +0 -103
- package/src/orchestrator/types.ts +0 -291
- package/src/playwright-mcp-service.ts +0 -224
- package/src/progress-reporter.ts +0 -144
- package/src/prompts.ts +0 -842
- package/src/providers/backend-proxy-llm-provider.ts +0 -91
- package/src/providers/local-llm-provider.ts +0 -38
- package/src/scenario-service.ts +0 -252
- package/src/scenario-worker-class.ts +0 -1110
- package/src/script-utils.ts +0 -203
- package/src/types.ts +0 -239
- package/src/utils/browser-utils.ts +0 -348
- package/src/utils/coordinate-converter.ts +0 -162
- package/src/utils/page-info-retry.ts +0 -65
- package/src/utils/page-info-utils.ts +0 -285
- package/testchimp-runner-core-0.0.35.tgz +0 -0
- package/tsconfig.json +0 -19
package/src/execution-service.ts
DELETED
|
@@ -1,996 +0,0 @@
|
|
|
1
|
-
import { PlaywrightMCPService as PlaywrightService } from './playwright-mcp-service';
|
|
2
|
-
import {
|
|
3
|
-
PlaywrightExecutionRequest,
|
|
4
|
-
PlaywrightExecutionResponse,
|
|
5
|
-
ScriptResult,
|
|
6
|
-
ScriptExecutionRequest,
|
|
7
|
-
ScriptExecutionResponse,
|
|
8
|
-
ScriptStep,
|
|
9
|
-
ExecutionMode,
|
|
10
|
-
StepOperation,
|
|
11
|
-
StepRepairAction
|
|
12
|
-
} from './types';
|
|
13
|
-
import { RepairSuggestionResponse, RepairConfidenceResponse } from './llm-facade';
|
|
14
|
-
import { getEnhancedPageInfo, PageInfo } from './utils/page-info-utils';
|
|
15
|
-
import { initializeBrowser, captureOptimizedScreenshot } from './utils/browser-utils';
|
|
16
|
-
import { LLMFacade } from './llm-facade';
|
|
17
|
-
import { AuthConfig } from './auth-config';
|
|
18
|
-
import { addTestChimpComment } from './script-utils';
|
|
19
|
-
import { CreditUsageService } from './credit-usage-service';
|
|
20
|
-
import { DEFAULT_MODEL, VISION_MODEL } from './model-constants';
|
|
21
|
-
import { LLMProvider } from './llm-provider';
|
|
22
|
-
import { ProgressReporter } from './progress-reporter';
|
|
23
|
-
import { BackendProxyLLMProvider } from './providers/backend-proxy-llm-provider';
|
|
24
|
-
import { OrchestratorAgent, ToolRegistry, DEFAULT_AGENT_CONFIG } from './orchestrator';
|
|
25
|
-
import type { AgentConfig, JourneyMemory } from './orchestrator';
|
|
26
|
-
|
|
27
|
-
/**
|
|
28
|
-
* Service for orchestrating Playwright script execution
|
|
29
|
-
*/
|
|
30
|
-
export class ExecutionService {
|
|
31
|
-
private playwrightService: PlaywrightService;
|
|
32
|
-
private llmFacade: LLMFacade;
|
|
33
|
-
private llmProvider: LLMProvider;
|
|
34
|
-
private progressReporter?: ProgressReporter;
|
|
35
|
-
private creditUsageService: CreditUsageService;
|
|
36
|
-
private maxConcurrentExecutions: number;
|
|
37
|
-
private activeExecutions: Set<Promise<any>> = new Set();
|
|
38
|
-
private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
|
|
39
|
-
private orchestratorAgent: OrchestratorAgent;
|
|
40
|
-
|
|
41
|
-
constructor(
|
|
42
|
-
authConfig?: AuthConfig,
|
|
43
|
-
backendUrl?: string,
|
|
44
|
-
maxConcurrentExecutions: number = 10,
|
|
45
|
-
llmProvider?: LLMProvider,
|
|
46
|
-
progressReporter?: ProgressReporter
|
|
47
|
-
) {
|
|
48
|
-
this.playwrightService = new PlaywrightService();
|
|
49
|
-
|
|
50
|
-
// Use provided LLM provider or default to backend proxy (backward compatible)
|
|
51
|
-
this.llmProvider = llmProvider || new BackendProxyLLMProvider(authConfig, backendUrl);
|
|
52
|
-
this.llmFacade = new LLMFacade(this.llmProvider);
|
|
53
|
-
|
|
54
|
-
this.progressReporter = progressReporter;
|
|
55
|
-
this.creditUsageService = new CreditUsageService(authConfig, backendUrl);
|
|
56
|
-
this.maxConcurrentExecutions = maxConcurrentExecutions;
|
|
57
|
-
|
|
58
|
-
// Initialize orchestrator for repair mode (reuses all SoM infrastructure)
|
|
59
|
-
const toolRegistry = new ToolRegistry();
|
|
60
|
-
const repairConfig: Partial<AgentConfig> = {
|
|
61
|
-
useSoM: true,
|
|
62
|
-
somRestrictCoordinates: true // Prefer SoM markers for repairs
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
this.orchestratorAgent = new OrchestratorAgent(
|
|
66
|
-
this.llmFacade,
|
|
67
|
-
toolRegistry,
|
|
68
|
-
repairConfig,
|
|
69
|
-
progressReporter,
|
|
70
|
-
(msg, level) => this.log(msg)
|
|
71
|
-
);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
/**
|
|
75
|
-
* Set a logger callback for capturing execution logs
|
|
76
|
-
*/
|
|
77
|
-
setLogger(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void {
|
|
78
|
-
this.logger = logger;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
/**
|
|
82
|
-
* Log a message using the configured logger
|
|
83
|
-
*/
|
|
84
|
-
private log(message: string, level: 'log' | 'error' | 'warn' = 'log'): void {
|
|
85
|
-
if (this.logger) {
|
|
86
|
-
this.logger(message, level);
|
|
87
|
-
}
|
|
88
|
-
// No console fallback - logs are routed to consumer
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
/**
|
|
92
|
-
* Initialize the execution service
|
|
93
|
-
*/
|
|
94
|
-
async initialize(): Promise<void> {
|
|
95
|
-
await this.playwrightService.initialize();
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
/**
|
|
99
|
-
* Set authentication configuration for the service
|
|
100
|
-
* Note: This recreates the LLM provider with new auth config
|
|
101
|
-
*/
|
|
102
|
-
setAuthConfig(authConfig: AuthConfig): void {
|
|
103
|
-
// Recreate LLM provider with new auth config
|
|
104
|
-
this.llmProvider = new BackendProxyLLMProvider(authConfig, undefined);
|
|
105
|
-
this.llmFacade = new LLMFacade(this.llmProvider);
|
|
106
|
-
this.creditUsageService.setAuthConfig(authConfig);
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
/**
|
|
111
|
-
* Execute a script with optional AI repair capabilities
|
|
112
|
-
*/
|
|
113
|
-
async executeScript(request: ScriptExecutionRequest): Promise<ScriptExecutionResponse> {
|
|
114
|
-
// Wait for available slot if at max concurrency
|
|
115
|
-
while (this.activeExecutions.size >= this.maxConcurrentExecutions) {
|
|
116
|
-
await Promise.race(this.activeExecutions);
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// Create execution promise and track it
|
|
120
|
-
const executionPromise = this.executeScriptInternal(request);
|
|
121
|
-
this.activeExecutions.add(executionPromise);
|
|
122
|
-
|
|
123
|
-
try {
|
|
124
|
-
const result = await executionPromise;
|
|
125
|
-
return result;
|
|
126
|
-
} finally {
|
|
127
|
-
this.activeExecutions.delete(executionPromise);
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
/**
|
|
132
|
-
* Internal script execution method
|
|
133
|
-
*/
|
|
134
|
-
private async executeScriptInternal(request: ScriptExecutionRequest): Promise<ScriptExecutionResponse> {
|
|
135
|
-
const startTime = Date.now();
|
|
136
|
-
const model = request.model || DEFAULT_MODEL;
|
|
137
|
-
|
|
138
|
-
try {
|
|
139
|
-
if (request.mode === ExecutionMode.RUN_EXACTLY) {
|
|
140
|
-
return await this.runExactly(request, startTime);
|
|
141
|
-
} else {
|
|
142
|
-
return await this.runWithAIRepair(request, startTime, model);
|
|
143
|
-
}
|
|
144
|
-
} catch (error) {
|
|
145
|
-
return {
|
|
146
|
-
runStatus: 'failed',
|
|
147
|
-
executionTime: Date.now() - startTime,
|
|
148
|
-
error: error instanceof Error ? error.message : 'Unknown error'
|
|
149
|
-
};
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
/**
|
|
154
|
-
* Execute a complete Playwright test suite as a single job
|
|
155
|
-
*/
|
|
156
|
-
async executeTestSuite(request: PlaywrightExecutionRequest): Promise<PlaywrightExecutionResponse> {
|
|
157
|
-
try {
|
|
158
|
-
// Parse Playwright configuration
|
|
159
|
-
const config = this.parsePlaywrightConfig(request.playwrightConfig);
|
|
160
|
-
|
|
161
|
-
// Execute the entire job (prescript + script + postscript) as one unit
|
|
162
|
-
const jobResult = await this.playwrightService.executeJob(
|
|
163
|
-
request.prescript,
|
|
164
|
-
request.script,
|
|
165
|
-
request.postscript,
|
|
166
|
-
config
|
|
167
|
-
);
|
|
168
|
-
|
|
169
|
-
return {
|
|
170
|
-
success: jobResult.success,
|
|
171
|
-
results: jobResult.results,
|
|
172
|
-
executionTime: jobResult.executionTime,
|
|
173
|
-
error: jobResult.error
|
|
174
|
-
};
|
|
175
|
-
|
|
176
|
-
} catch (error) {
|
|
177
|
-
return {
|
|
178
|
-
success: false,
|
|
179
|
-
results: {
|
|
180
|
-
script: { success: false, output: '', error: '', executionTime: 0 }
|
|
181
|
-
},
|
|
182
|
-
executionTime: 0,
|
|
183
|
-
error: error instanceof Error ? error.message : 'Unknown error occurred'
|
|
184
|
-
};
|
|
185
|
-
}
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
/**
|
|
189
|
-
* Parse Playwright configuration from string
|
|
190
|
-
*/
|
|
191
|
-
private parsePlaywrightConfig(configString: string): any {
|
|
192
|
-
try {
|
|
193
|
-
// Try to parse as JSON first
|
|
194
|
-
const config = JSON.parse(configString);
|
|
195
|
-
return {
|
|
196
|
-
browserType: config.browserType || 'chromium',
|
|
197
|
-
headless: config.headless === true,
|
|
198
|
-
viewport: config.viewport || { width: 1280, height: 720 },
|
|
199
|
-
options: config.options || {}
|
|
200
|
-
};
|
|
201
|
-
} catch {
|
|
202
|
-
// If not JSON, try to extract basic config from JavaScript
|
|
203
|
-
try {
|
|
204
|
-
// Simple regex-based extraction for common config patterns
|
|
205
|
-
const headlessMatch = configString.match(/headless:\s*(true|false)/);
|
|
206
|
-
const viewportMatch = configString.match(/viewport:\s*\{\s*width:\s*(\d+),\s*height:\s*(\d+)\s*\}/);
|
|
207
|
-
const browserMatch = configString.match(/browserType:\s*['"`](chromium|firefox|webkit)['"`]/);
|
|
208
|
-
|
|
209
|
-
return {
|
|
210
|
-
browserType: browserMatch ? browserMatch[1] : 'chromium',
|
|
211
|
-
headless: headlessMatch ? headlessMatch[1] === 'true' : true,
|
|
212
|
-
viewport: viewportMatch ?
|
|
213
|
-
{ width: parseInt(viewportMatch[1]), height: parseInt(viewportMatch[2]) } :
|
|
214
|
-
{ width: 1280, height: 720 },
|
|
215
|
-
options: {}
|
|
216
|
-
};
|
|
217
|
-
} catch {
|
|
218
|
-
// Return default config if parsing fails
|
|
219
|
-
return {
|
|
220
|
-
browserType: 'chromium',
|
|
221
|
-
headless: false,
|
|
222
|
-
viewport: { width: 1280, height: 720 },
|
|
223
|
-
options: {}
|
|
224
|
-
};
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
/**
|
|
230
|
-
* Close the execution service
|
|
231
|
-
*/
|
|
232
|
-
async close(): Promise<void> {
|
|
233
|
-
await this.playwrightService.close();
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
/**
|
|
237
|
-
* Check if the service is ready
|
|
238
|
-
*/
|
|
239
|
-
isReady(): boolean {
|
|
240
|
-
return this.playwrightService.isReady();
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
private async runExactly(request: ScriptExecutionRequest, startTime: number, model?: string): Promise<ScriptExecutionResponse> {
|
|
244
|
-
// deflakeRunCount: number of deflake attempts (0 means no deflaking, just one attempt)
|
|
245
|
-
const deflakeRunCount = request.deflakeRunCount !== undefined ? request.deflakeRunCount : 0;
|
|
246
|
-
const totalAttempts = deflakeRunCount + 1; // Original run + deflake attempts
|
|
247
|
-
let lastError: Error | null = null;
|
|
248
|
-
|
|
249
|
-
this.log(`runExactly: deflakeRunCount = ${deflakeRunCount}, totalAttempts = ${totalAttempts}`);
|
|
250
|
-
|
|
251
|
-
// Script content should be provided by the caller (TestChimpService)
|
|
252
|
-
// The TestChimpService handles file reading through the appropriate FileHandler
|
|
253
|
-
if (!request.script) {
|
|
254
|
-
throw new Error('Script content is required for execution. The TestChimpService should read the file and provide script content.');
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
// Check if we should use existing browser or create new one
|
|
258
|
-
const useExistingBrowser = !!(request.existingBrowser && request.existingContext && request.existingPage);
|
|
259
|
-
|
|
260
|
-
if (useExistingBrowser) {
|
|
261
|
-
this.log('Using existing browser/page provided by caller - single attempt only (no internal deflaking)');
|
|
262
|
-
// Single attempt with existing browser (caller handles deflaking by creating fresh browsers)
|
|
263
|
-
const browser = request.existingBrowser;
|
|
264
|
-
const context = request.existingContext;
|
|
265
|
-
const page = request.existingPage;
|
|
266
|
-
|
|
267
|
-
try {
|
|
268
|
-
// LIFECYCLE: Call beforeStartTest if provided
|
|
269
|
-
if (this.progressReporter?.beforeStartTest) {
|
|
270
|
-
await this.progressReporter.beforeStartTest(page, browser, context);
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
// Execute the script as-is
|
|
274
|
-
await this.executeStepCode(request.script, page);
|
|
275
|
-
|
|
276
|
-
// LIFECYCLE: Call afterEndTest on success
|
|
277
|
-
if (this.progressReporter?.afterEndTest) {
|
|
278
|
-
await this.progressReporter.afterEndTest('passed', undefined, page);
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
// Don't close browser - caller owns it
|
|
282
|
-
|
|
283
|
-
return {
|
|
284
|
-
runStatus: 'success',
|
|
285
|
-
numDeflakeRuns: 0,
|
|
286
|
-
executionTime: Date.now() - startTime
|
|
287
|
-
};
|
|
288
|
-
} catch (error) {
|
|
289
|
-
lastError = error instanceof Error ? error : new Error('Script execution failed');
|
|
290
|
-
this.log(`Execution failed: ${lastError.message}`);
|
|
291
|
-
|
|
292
|
-
// LIFECYCLE: Call afterEndTest on failure
|
|
293
|
-
if (this.progressReporter?.afterEndTest) {
|
|
294
|
-
try {
|
|
295
|
-
await this.progressReporter.afterEndTest('failed', lastError.message, page);
|
|
296
|
-
} catch (callbackError) {
|
|
297
|
-
this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
return {
|
|
302
|
-
runStatus: 'failed',
|
|
303
|
-
numDeflakeRuns: 0,
|
|
304
|
-
executionTime: Date.now() - startTime,
|
|
305
|
-
error: lastError.message
|
|
306
|
-
};
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
// Create our own browser (original behavior)
|
|
311
|
-
for (let attempt = 1; attempt <= totalAttempts; attempt++) {
|
|
312
|
-
this.log(`Attempting deflake run ${attempt}/${totalAttempts}`);
|
|
313
|
-
const { browser, context, page } = await this.initializeBrowser(request.playwrightConfig, request.headless, request.playwrightConfigFilePath);
|
|
314
|
-
|
|
315
|
-
try {
|
|
316
|
-
// LIFECYCLE: Call beforeStartTest if provided
|
|
317
|
-
if (this.progressReporter?.beforeStartTest) {
|
|
318
|
-
await this.progressReporter.beforeStartTest(page, browser, context);
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
// Execute the script as-is
|
|
322
|
-
await this.executeStepCode(request.script, page);
|
|
323
|
-
|
|
324
|
-
// LIFECYCLE: Call afterEndTest on success
|
|
325
|
-
if (this.progressReporter?.afterEndTest) {
|
|
326
|
-
await this.progressReporter.afterEndTest('passed', undefined, page);
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
await browser.close();
|
|
330
|
-
|
|
331
|
-
// Success! Return immediately
|
|
332
|
-
return {
|
|
333
|
-
runStatus: 'success',
|
|
334
|
-
numDeflakeRuns: attempt - 1, // Count only deflaking runs (exclude original run)
|
|
335
|
-
executionTime: Date.now() - startTime
|
|
336
|
-
};
|
|
337
|
-
} catch (error) {
|
|
338
|
-
lastError = error instanceof Error ? error : new Error('Script execution failed');
|
|
339
|
-
this.log(`Initial run failed: ${lastError.message}`);
|
|
340
|
-
|
|
341
|
-
// LIFECYCLE: Call afterEndTest on failure
|
|
342
|
-
if (this.progressReporter?.afterEndTest) {
|
|
343
|
-
try {
|
|
344
|
-
await this.progressReporter.afterEndTest('failed', lastError.message, page);
|
|
345
|
-
} catch (callbackError) {
|
|
346
|
-
this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
try {
|
|
351
|
-
await browser.close();
|
|
352
|
-
} catch (closeError) {
|
|
353
|
-
// Browser might already be closed
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
// If this is not the last attempt, continue to next attempt
|
|
357
|
-
if (attempt < totalAttempts) {
|
|
358
|
-
this.log(`Deflaking attempt ${attempt} failed, trying again... (${attempt + 1}/${totalAttempts})`);
|
|
359
|
-
continue;
|
|
360
|
-
}
|
|
361
|
-
}
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
// All attempts failed
|
|
365
|
-
return {
|
|
366
|
-
runStatus: 'failed',
|
|
367
|
-
numDeflakeRuns: deflakeRunCount, // Count only deflaking runs (exclude original run)
|
|
368
|
-
executionTime: Date.now() - startTime,
|
|
369
|
-
error: lastError?.message || 'All deflaking attempts failed'
|
|
370
|
-
};
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
private async runWithAIRepair(request: ScriptExecutionRequest, startTime: number, model: string): Promise<ScriptExecutionResponse> {
|
|
374
|
-
const repairFlexibility = request.repairFlexibility || 3;
|
|
375
|
-
const attemptRunExactlyFirst = request.attemptRunExactlyFirst || false;
|
|
376
|
-
|
|
377
|
-
// Script content is required UNLESS pre-parsed steps are provided
|
|
378
|
-
if (!request.script && (!request.steps || request.steps.length === 0)) {
|
|
379
|
-
throw new Error('Script content is required for AI repair. The TestChimpService should read the file and provide script content.');
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
// Check if we should use existing browser
|
|
383
|
-
const useExistingBrowser = !!(request.existingBrowser && request.existingContext && request.existingPage);
|
|
384
|
-
|
|
385
|
-
// Optionally try runExactly first (with deflaking if specified)
|
|
386
|
-
if (attemptRunExactlyFirst) {
|
|
387
|
-
const deflakeCount = request.deflakeRunCount || 0;
|
|
388
|
-
this.log(`Attempting runExactly first with ${deflakeCount} deflake attempts...`);
|
|
389
|
-
const runExactlyResult = await this.runExactly({
|
|
390
|
-
...request,
|
|
391
|
-
mode: ExecutionMode.RUN_EXACTLY,
|
|
392
|
-
deflakeRunCount: deflakeCount
|
|
393
|
-
}, startTime, model);
|
|
394
|
-
|
|
395
|
-
if (runExactlyResult.runStatus === 'success') {
|
|
396
|
-
this.log('runExactly succeeded, returning without AI repair');
|
|
397
|
-
return runExactlyResult;
|
|
398
|
-
}
|
|
399
|
-
|
|
400
|
-
this.log('runExactly failed, proceeding with AI repair...');
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
// Start AI repair process
|
|
404
|
-
this.log('Starting AI repair process...');
|
|
405
|
-
|
|
406
|
-
let repairBrowser: any = null;
|
|
407
|
-
let repairContext: any = null;
|
|
408
|
-
let repairPage: any = null;
|
|
409
|
-
|
|
410
|
-
try {
|
|
411
|
-
let steps, updatedSteps;
|
|
412
|
-
|
|
413
|
-
if (useExistingBrowser) {
|
|
414
|
-
// Use existing browser
|
|
415
|
-
this.log('Using existing browser for AI repair...');
|
|
416
|
-
repairBrowser = request.existingBrowser;
|
|
417
|
-
repairContext = request.existingContext;
|
|
418
|
-
repairPage = request.existingPage;
|
|
419
|
-
|
|
420
|
-
// Use pre-parsed steps if provided (preserves step IDs from canonical tree),
|
|
421
|
-
// otherwise parse script into steps
|
|
422
|
-
if (request.steps && request.steps.length > 0) {
|
|
423
|
-
this.log(`Using ${request.steps.length} pre-parsed steps (IDs preserved)`);
|
|
424
|
-
steps = request.steps;
|
|
425
|
-
} else {
|
|
426
|
-
this.log('Parsing script into steps...');
|
|
427
|
-
if (!request.script) {
|
|
428
|
-
throw new Error('Script is required when steps are not provided');
|
|
429
|
-
}
|
|
430
|
-
steps = await this.parseScriptIntoSteps(request.script, model);
|
|
431
|
-
}
|
|
432
|
-
|
|
433
|
-
// LIFECYCLE: Call beforeStartTest if provided
|
|
434
|
-
if (this.progressReporter?.beforeStartTest) {
|
|
435
|
-
await this.progressReporter.beforeStartTest(repairPage, repairBrowser, repairContext);
|
|
436
|
-
}
|
|
437
|
-
|
|
438
|
-
this.log('Starting AI repair with parsed steps...');
|
|
439
|
-
updatedSteps = await this.repairStepsWithAI(steps, repairPage, repairFlexibility, model, request.jobId);
|
|
440
|
-
} else {
|
|
441
|
-
// Use pre-parsed steps if provided, otherwise parse script
|
|
442
|
-
if (request.steps && request.steps.length > 0) {
|
|
443
|
-
this.log(`Using ${request.steps.length} pre-parsed steps (IDs preserved)`);
|
|
444
|
-
this.log('Initializing repair browser...');
|
|
445
|
-
steps = request.steps;
|
|
446
|
-
const browserInstance = await this.initializeBrowser(request.playwrightConfig, request.headless, request.playwrightConfigFilePath);
|
|
447
|
-
repairBrowser = browserInstance.browser;
|
|
448
|
-
repairContext = browserInstance.context;
|
|
449
|
-
repairPage = browserInstance.page;
|
|
450
|
-
} else {
|
|
451
|
-
// Start browser initialization and script parsing in parallel for faster startup
|
|
452
|
-
this.log('Initializing repair browser and parsing script...');
|
|
453
|
-
if (!request.script) {
|
|
454
|
-
throw new Error('Script is required when steps are not provided');
|
|
455
|
-
}
|
|
456
|
-
const results = await Promise.all([
|
|
457
|
-
this.parseScriptIntoSteps(request.script, model),
|
|
458
|
-
this.initializeBrowser(request.playwrightConfig, request.headless, request.playwrightConfigFilePath) // Use request.headless (defaults to false/headed)
|
|
459
|
-
]);
|
|
460
|
-
|
|
461
|
-
steps = results[0];
|
|
462
|
-
repairBrowser = results[1].browser;
|
|
463
|
-
repairContext = results[1].context;
|
|
464
|
-
repairPage = results[1].page;
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
// LIFECYCLE: Call beforeStartTest if provided
|
|
468
|
-
if (this.progressReporter?.beforeStartTest) {
|
|
469
|
-
await this.progressReporter.beforeStartTest(repairPage, repairBrowser, repairContext);
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
this.log('Starting AI repair with parsed steps...');
|
|
473
|
-
updatedSteps = await this.repairStepsWithAI(steps, repairPage, repairFlexibility, model, request.jobId);
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
// Always generate the updated script (preserve original test name)
|
|
477
|
-
const updatedScript = this.generateUpdatedScript(updatedSteps, undefined, request.script);
|
|
478
|
-
|
|
479
|
-
// Check if repair was successful by seeing if we completed all steps
|
|
480
|
-
const allStepsSuccessful = updatedSteps.length > 0 && updatedSteps.every(step => step.success);
|
|
481
|
-
|
|
482
|
-
// Check if we have any successful repairs (partial success)
|
|
483
|
-
const hasSuccessfulRepairs = updatedSteps.some(step => step.success);
|
|
484
|
-
|
|
485
|
-
// Debug: Log step success status
|
|
486
|
-
this.log('Step success status: ' + updatedSteps.map((step, index) => `Step ${index + 1}: ${step.success ? 'SUCCESS' : 'FAILED'}`).join(', '));
|
|
487
|
-
this.log(`All steps successful: ${allStepsSuccessful}`);
|
|
488
|
-
this.log(`Has successful repairs: ${hasSuccessfulRepairs}`);
|
|
489
|
-
|
|
490
|
-
// Debug: Log individual step details
|
|
491
|
-
updatedSteps.forEach((step, index) => {
|
|
492
|
-
this.log(`Step ${index + 1} details: success=${step.success}, description="${step.description}"`);
|
|
493
|
-
});
|
|
494
|
-
|
|
495
|
-
// Update file if we have any successful repairs (partial or complete)
|
|
496
|
-
if (hasSuccessfulRepairs) {
|
|
497
|
-
// IMPORTANT: Use the orchestrator-generated script directly (already has proper Playwright commands)
|
|
498
|
-
// Don't regenerate via LLM as it loses the actual repairs
|
|
499
|
-
this.log('Using orchestrator-generated script (skipping LLM regeneration to preserve repairs)');
|
|
500
|
-
|
|
501
|
-
// For repair advice, compare original vs repaired
|
|
502
|
-
const confidenceResponse = await this.llmFacade.assessRepairConfidence(request.script!, updatedScript, model);
|
|
503
|
-
|
|
504
|
-
// Add TestChimp comment with repair advice
|
|
505
|
-
const scriptWithAdvice = addTestChimpComment(updatedScript, confidenceResponse.advice);
|
|
506
|
-
|
|
507
|
-
// Polish the script with minor LLM cleanup (removes redundancies, fixes formatting)
|
|
508
|
-
this.log('Applying final LLM polish to repaired script (minor cleanup only)...');
|
|
509
|
-
const cleanupResult = await this.llmFacade.cleanupScript(scriptWithAdvice, model);
|
|
510
|
-
|
|
511
|
-
if (cleanupResult.changes.length > 0) {
|
|
512
|
-
this.log(`Script cleanup made ${cleanupResult.changes.length} minor improvements:`);
|
|
513
|
-
cleanupResult.changes.forEach((change, i) => {
|
|
514
|
-
this.log(` ${i + 1}. ${change}`);
|
|
515
|
-
});
|
|
516
|
-
} else if (cleanupResult.skipped) {
|
|
517
|
-
this.log(`Script cleanup skipped: ${cleanupResult.skipped}`);
|
|
518
|
-
} else {
|
|
519
|
-
this.log('Script cleanup: no changes needed');
|
|
520
|
-
}
|
|
521
|
-
|
|
522
|
-
const scriptWithRepairAdvice = cleanupResult.script;
|
|
523
|
-
|
|
524
|
-
// Report credit usage for successful AI repair
|
|
525
|
-
this.creditUsageService.reportAIRepairCredit().catch(error => {
|
|
526
|
-
this.log(`Failed to report credit usage for AI repair: ${error}`, 'warn');
|
|
527
|
-
});
|
|
528
|
-
|
|
529
|
-
// LIFECYCLE: Call afterEndTest (partial or complete success)
|
|
530
|
-
if (this.progressReporter?.afterEndTest) {
|
|
531
|
-
try {
|
|
532
|
-
await this.progressReporter.afterEndTest(
|
|
533
|
-
allStepsSuccessful ? 'passed' : 'failed',
|
|
534
|
-
allStepsSuccessful ? undefined : 'Partial repair success',
|
|
535
|
-
repairPage
|
|
536
|
-
);
|
|
537
|
-
} catch (callbackError) {
|
|
538
|
-
this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
|
|
539
|
-
}
|
|
540
|
-
}
|
|
541
|
-
|
|
542
|
-
return {
|
|
543
|
-
runStatus: 'failed', // Original script failed
|
|
544
|
-
repairStatus: allStepsSuccessful ? 'success' : 'partial', // Complete or partial repair success
|
|
545
|
-
repairConfidence: confidenceResponse.confidence,
|
|
546
|
-
repairAdvice: confidenceResponse.advice,
|
|
547
|
-
updatedScript: scriptWithRepairAdvice, // Return the drop-in replacement script with proper TestChimp comment
|
|
548
|
-
numDeflakeRuns: 0, // Deflaking is handled by caller before calling runWithAIRepair
|
|
549
|
-
executionTime: Date.now() - startTime
|
|
550
|
-
};
|
|
551
|
-
} else {
|
|
552
|
-
// No successful repairs at all
|
|
553
|
-
|
|
554
|
-
// LIFECYCLE: Call afterEndTest (complete failure)
|
|
555
|
-
if (this.progressReporter?.afterEndTest) {
|
|
556
|
-
try {
|
|
557
|
-
await this.progressReporter.afterEndTest('failed', 'AI repair could not fix any steps', repairPage);
|
|
558
|
-
} catch (callbackError) {
|
|
559
|
-
this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
|
|
560
|
-
}
|
|
561
|
-
}
|
|
562
|
-
|
|
563
|
-
return {
|
|
564
|
-
runStatus: 'failed', // Original script failed
|
|
565
|
-
repairStatus: 'failed',
|
|
566
|
-
repairConfidence: 0,
|
|
567
|
-
repairAdvice: 'AI repair could not fix any steps',
|
|
568
|
-
updatedScript: request.script!, // Return original script since no repairs were successful
|
|
569
|
-
numDeflakeRuns: 0, // Deflaking is handled by caller before calling runWithAIRepair
|
|
570
|
-
executionTime: Date.now() - startTime,
|
|
571
|
-
error: 'AI repair could not fix any steps'
|
|
572
|
-
};
|
|
573
|
-
}
|
|
574
|
-
} catch (error) {
|
|
575
|
-
return {
|
|
576
|
-
runStatus: 'failed',
|
|
577
|
-
repairStatus: 'failed',
|
|
578
|
-
numDeflakeRuns: 0, // Deflaking is handled by caller before calling runWithAIRepair
|
|
579
|
-
executionTime: Date.now() - startTime,
|
|
580
|
-
error: error instanceof Error ? error.message : 'Script execution failed'
|
|
581
|
-
};
|
|
582
|
-
} finally {
|
|
583
|
-
// Clean up browser resources if we created them (not provided by caller)
|
|
584
|
-
if (!useExistingBrowser && repairBrowser) {
|
|
585
|
-
try {
|
|
586
|
-
await repairBrowser.close();
|
|
587
|
-
this.log('AI repair browser closed');
|
|
588
|
-
} catch (closeError) {
|
|
589
|
-
this.log(`Error closing AI repair browser: ${closeError}`, 'warn');
|
|
590
|
-
}
|
|
591
|
-
}
|
|
592
|
-
}
|
|
593
|
-
}
|
|
594
|
-
|
|
595
|
-
private async parseScriptIntoSteps(script: string, model: string): Promise<(ScriptStep & { success?: boolean; error?: string })[]> {
|
|
596
|
-
// First try LLM-based parsing
|
|
597
|
-
try {
|
|
598
|
-
this.log('Attempting LLM-based script parsing...');
|
|
599
|
-
const result = await this.llmFacade.parseScriptIntoSteps(script, model);
|
|
600
|
-
this.log(`LLM parsing successful, got ${result.length} steps`);
|
|
601
|
-
return result;
|
|
602
|
-
} catch (error) {
|
|
603
|
-
this.log(`LLM parsing failed, falling back to code parsing: ${error}`);
|
|
604
|
-
const fallbackResult = this.parseScriptIntoStepsFallback(script);
|
|
605
|
-
this.log(`Fallback parsing successful, got ${fallbackResult.length} steps`);
|
|
606
|
-
return fallbackResult;
|
|
607
|
-
}
|
|
608
|
-
}
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
private parseScriptIntoStepsFallback(script: string): (ScriptStep & { success?: boolean; error?: string })[] {
|
|
612
|
-
const lines = script.split('\n');
|
|
613
|
-
const steps: (ScriptStep & { success?: boolean; error?: string })[] = [];
|
|
614
|
-
let currentStep: ScriptStep | null = null;
|
|
615
|
-
let currentCode: string[] = [];
|
|
616
|
-
|
|
617
|
-
for (const line of lines) {
|
|
618
|
-
const trimmedLine = line.trim();
|
|
619
|
-
|
|
620
|
-
// Check for step comment
|
|
621
|
-
if (trimmedLine.startsWith('// Step ')) {
|
|
622
|
-
// Save previous step if exists and has code
|
|
623
|
-
if (currentStep) {
|
|
624
|
-
const code = currentCode.join('\n').trim();
|
|
625
|
-
const cleanedCode = this.cleanStepCode(code);
|
|
626
|
-
if (cleanedCode) {
|
|
627
|
-
currentStep.code = cleanedCode;
|
|
628
|
-
steps.push(currentStep);
|
|
629
|
-
}
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
// Start new step
|
|
633
|
-
const description = trimmedLine.replace(/^\/\/\s*Step\s*\d+:\s*/, '').replace(/\s*\[FAILED\]\s*$/, '').trim();
|
|
634
|
-
currentStep = { description, code: '' };
|
|
635
|
-
currentCode = [];
|
|
636
|
-
} else if (trimmedLine && !trimmedLine.startsWith('import') && !trimmedLine.startsWith('test(') && !trimmedLine.startsWith('});')) {
|
|
637
|
-
// Add code line to current step
|
|
638
|
-
if (currentStep) {
|
|
639
|
-
currentCode.push(line);
|
|
640
|
-
}
|
|
641
|
-
}
|
|
642
|
-
}
|
|
643
|
-
|
|
644
|
-
// Add the last step if it has code
|
|
645
|
-
if (currentStep) {
|
|
646
|
-
const code = currentCode.join('\n').trim();
|
|
647
|
-
const cleanedCode = this.cleanStepCode(code);
|
|
648
|
-
if (cleanedCode) {
|
|
649
|
-
currentStep.code = cleanedCode;
|
|
650
|
-
steps.push(currentStep);
|
|
651
|
-
}
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
return steps;
|
|
655
|
-
}
|
|
656
|
-
|
|
657
|
-
private async repairStepsWithAI(
|
|
658
|
-
steps: (ScriptStep & { success?: boolean; error?: string })[],
|
|
659
|
-
page: any,
|
|
660
|
-
repairFlexibility: number,
|
|
661
|
-
model: string,
|
|
662
|
-
jobId?: string
|
|
663
|
-
): Promise<(ScriptStep & { success?: boolean; error?: string })[]> {
|
|
664
|
-
let updatedSteps = [...steps];
|
|
665
|
-
const maxTries = 3;
|
|
666
|
-
const recentRepairs: Array<{
|
|
667
|
-
stepNumber: number;
|
|
668
|
-
operation: string;
|
|
669
|
-
originalDescription?: string;
|
|
670
|
-
newDescription?: string;
|
|
671
|
-
originalCode?: string;
|
|
672
|
-
newCode?: string;
|
|
673
|
-
}> = [];
|
|
674
|
-
|
|
675
|
-
// Track actual executed steps (including agent repairs) for proper history
|
|
676
|
-
const executedStepDescriptions: string[] = [];
|
|
677
|
-
|
|
678
|
-
// Create a shared execution context that accumulates all executed code for variable tracking
|
|
679
|
-
let executionContext = '';
|
|
680
|
-
const contextVariables = new Map<string, any>();
|
|
681
|
-
|
|
682
|
-
let i = 0;
|
|
683
|
-
while (i < updatedSteps.length) {
|
|
684
|
-
const step = updatedSteps[i];
|
|
685
|
-
this.log(`Loop iteration: i=${i}, step description="${step.description}", total steps=${updatedSteps.length}`);
|
|
686
|
-
|
|
687
|
-
try {
|
|
688
|
-
// LIFECYCLE: Call beforeStepStart if provided
|
|
689
|
-
if (this.progressReporter?.beforeStepStart) {
|
|
690
|
-
await this.progressReporter.beforeStepStart(
|
|
691
|
-
{
|
|
692
|
-
stepId: step.id, // Preserve original step ID if provided
|
|
693
|
-
stepNumber: i + 1,
|
|
694
|
-
description: step.description,
|
|
695
|
-
code: step.code
|
|
696
|
-
},
|
|
697
|
-
page
|
|
698
|
-
);
|
|
699
|
-
}
|
|
700
|
-
|
|
701
|
-
// Try to execute the step directly without context replay
|
|
702
|
-
this.log(`Attempting Step ${i + 1}: ${step.description}`);
|
|
703
|
-
this.log(` Code: ${step.code}`);
|
|
704
|
-
await this.executeStepCode(step.code, page);
|
|
705
|
-
step.success = true;
|
|
706
|
-
this.log(`Step ${i + 1} executed successfully: ${step.description}`);
|
|
707
|
-
this.log(`Step ${i + 1} success status set to: ${step.success}`);
|
|
708
|
-
|
|
709
|
-
// Track executed step description for agent context
|
|
710
|
-
executedStepDescriptions.push(step.description);
|
|
711
|
-
|
|
712
|
-
// Report successful step execution
|
|
713
|
-
this.log(`DEBUG: About to check callback - progressReporter=${!!this.progressReporter}, onStepProgress=${!!this.progressReporter?.onStepProgress}, jobId=${jobId}`);
|
|
714
|
-
if (this.progressReporter?.onStepProgress && jobId) {
|
|
715
|
-
this.log(`DEBUG: Firing onStepProgress callback for step ${i + 1}, stepId=${step.id}`);
|
|
716
|
-
await this.progressReporter.onStepProgress({
|
|
717
|
-
jobId,
|
|
718
|
-
stepId: step.id, // Preserve original step ID if provided
|
|
719
|
-
stepNumber: i + 1,
|
|
720
|
-
description: step.description,
|
|
721
|
-
code: step.code,
|
|
722
|
-
status: 'SUCCESS_STEP_EXECUTION' as any,
|
|
723
|
-
wasRepaired: false
|
|
724
|
-
});
|
|
725
|
-
this.log(`DEBUG: onStepProgress callback completed for step ${i + 1}`);
|
|
726
|
-
} else {
|
|
727
|
-
this.log(`DEBUG: Skipping callback - conditions not met`);
|
|
728
|
-
}
|
|
729
|
-
|
|
730
|
-
// Add this step's code to the execution context for future steps (for variable tracking)
|
|
731
|
-
executionContext += step.code + '\n';
|
|
732
|
-
i++; // Move to next step
|
|
733
|
-
} catch (error) {
|
|
734
|
-
this.log(`Step ${i + 1} failed: ${step.description}`);
|
|
735
|
-
this.log(` Failed code: ${step.code}`);
|
|
736
|
-
this.log(` Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
737
|
-
if (error instanceof Error && error.stack) {
|
|
738
|
-
this.log(` Stack trace: ${error.stack}`);
|
|
739
|
-
}
|
|
740
|
-
step.success = false;
|
|
741
|
-
step.error = this.safeSerializeError(error);
|
|
742
|
-
|
|
743
|
-
// Use orchestrator for repair (reuses all SoM infrastructure)
|
|
744
|
-
this.log(`Calling orchestrator in REPAIR mode for step ${i + 1}`);
|
|
745
|
-
|
|
746
|
-
// Prepare repair context - use executedStepDescriptions (includes agent repairs)
|
|
747
|
-
const priorSteps = executedStepDescriptions; // What was ACTUALLY executed (scripted + agent)
|
|
748
|
-
const nextSteps = updatedSteps.slice(i + 1).map(s => s.description);
|
|
749
|
-
|
|
750
|
-
this.log(` Prior steps executed: ${priorSteps.length}, Next steps: ${nextSteps.length}`);
|
|
751
|
-
this.log(` Prior steps context:\n ${priorSteps.map((s, idx) => `${idx + 1}. ${s}`).join('\n ')}`);
|
|
752
|
-
|
|
753
|
-
// Create minimal memory for repair
|
|
754
|
-
const memory: JourneyMemory = {
|
|
755
|
-
experiences: [],
|
|
756
|
-
extractedData: {},
|
|
757
|
-
history: [],
|
|
758
|
-
latestNote: undefined
|
|
759
|
-
};
|
|
760
|
-
|
|
761
|
-
let repairSuccess = false;
|
|
762
|
-
|
|
763
|
-
try {
|
|
764
|
-
// Call orchestrator with repair context (page object persisted)
|
|
765
|
-
const repairResult = await this.orchestratorAgent.executeStep(
|
|
766
|
-
page, // Same page object (persisted state)
|
|
767
|
-
step.description, // Goal with testdata embedded
|
|
768
|
-
i + 1, // Current step number
|
|
769
|
-
updatedSteps.length, // Total steps
|
|
770
|
-
updatedSteps.map(s => s.description), // All step descriptions
|
|
771
|
-
memory, // Memory (empty for repair)
|
|
772
|
-
jobId || 'repair',
|
|
773
|
-
priorSteps, // NEW: What was already completed
|
|
774
|
-
nextSteps // NEW: What comes after this
|
|
775
|
-
);
|
|
776
|
-
|
|
777
|
-
if (repairResult.success && repairResult.commands.length > 0) {
|
|
778
|
-
// MODIFY: Orchestrator fixed the step - replace with new code
|
|
779
|
-
const repairedCode = repairResult.commands.join('\n');
|
|
780
|
-
|
|
781
|
-
updatedSteps[i] = {
|
|
782
|
-
...step,
|
|
783
|
-
code: repairedCode,
|
|
784
|
-
success: true,
|
|
785
|
-
error: undefined
|
|
786
|
-
};
|
|
787
|
-
|
|
788
|
-
this.log(`✓ Step ${i + 1} MODIFIED by orchestrator (repair successful)`);
|
|
789
|
-
this.log(` Original code: ${step.code}`);
|
|
790
|
-
this.log(` New code (${repairResult.commands.length} commands):\n ${repairResult.commands.join('\n ')}`);
|
|
791
|
-
|
|
792
|
-
// Track what agent actually did in history (for future repair context)
|
|
793
|
-
const agentActionSummary = `${step.description} [AI-repaired: ${repairResult.commands.length} commands]`;
|
|
794
|
-
executedStepDescriptions.push(agentActionSummary);
|
|
795
|
-
|
|
796
|
-
// Report repaired step
|
|
797
|
-
if (this.progressReporter?.onStepProgress && jobId) {
|
|
798
|
-
await this.progressReporter.onStepProgress({
|
|
799
|
-
jobId,
|
|
800
|
-
stepId: step.id,
|
|
801
|
-
stepNumber: i + 1,
|
|
802
|
-
description: updatedSteps[i].description,
|
|
803
|
-
code: updatedSteps[i].code,
|
|
804
|
-
status: 'SUCCESS_STEP_EXECUTION' as any,
|
|
805
|
-
wasRepaired: true
|
|
806
|
-
});
|
|
807
|
-
}
|
|
808
|
-
|
|
809
|
-
// Ensure page is stable after agent repairs before returning control to script
|
|
810
|
-
this.log(`Waiting for page stability after agent repair...`);
|
|
811
|
-
try {
|
|
812
|
-
await page.waitForLoadState('networkidle', { timeout: 5000 });
|
|
813
|
-
this.log(`Page stabilized (networkidle) after agent repair`);
|
|
814
|
-
} catch (stabilityError) {
|
|
815
|
-
try {
|
|
816
|
-
await page.waitForLoadState('domcontentloaded', { timeout: 3000 });
|
|
817
|
-
this.log(`Page loaded (domcontentloaded) after agent repair`);
|
|
818
|
-
} catch (fallbackError) {
|
|
819
|
-
this.log(`Page stability wait timed out (continuing anyway)`, 'warn');
|
|
820
|
-
}
|
|
821
|
-
}
|
|
822
|
-
|
|
823
|
-
repairSuccess = true;
|
|
824
|
-
i++; // Continue to NEXT step (hand control back to script)
|
|
825
|
-
|
|
826
|
-
} else if (repairResult.success && repairResult.commands.length === 0) {
|
|
827
|
-
// DELETE: Step goal already achieved or no longer needed (e.g., modal already dismissed)
|
|
828
|
-
this.log(`✓ Step ${i + 1} DELETED by orchestrator (goal already achieved, step obsolete)`);
|
|
829
|
-
this.log(` Reason: Orchestrator completed with 0 commands - step no longer needed`);
|
|
830
|
-
|
|
831
|
-
// Track deletion in history (helps agent understand what was skipped)
|
|
832
|
-
executedStepDescriptions.push(`${step.description} [AI-deleted: step obsolete/already done]`);
|
|
833
|
-
|
|
834
|
-
// Remove the step from array
|
|
835
|
-
updatedSteps.splice(i, 1);
|
|
836
|
-
|
|
837
|
-
repairSuccess = true;
|
|
838
|
-
// Don't increment i - next step moved to current position
|
|
839
|
-
|
|
840
|
-
} else {
|
|
841
|
-
this.log(`✗ Step ${i + 1} could not be repaired by orchestrator (reason: ${repairResult.terminationReason})`);
|
|
842
|
-
}
|
|
843
|
-
} catch (repairError: any) {
|
|
844
|
-
this.log(`✗ Orchestrator repair failed: ${repairError.message}`);
|
|
845
|
-
}
|
|
846
|
-
|
|
847
|
-
// Legacy repair code removed - now using orchestrator
|
|
848
|
-
|
|
849
|
-
if (!repairSuccess) {
|
|
850
|
-
this.log(`Step ${i + 1} could not be repaired - stopping execution`);
|
|
851
|
-
break;
|
|
852
|
-
}
|
|
853
|
-
}
|
|
854
|
-
}
|
|
855
|
-
|
|
856
|
-
return updatedSteps;
|
|
857
|
-
}
|
|
858
|
-
|
|
859
|
-
private async executeStepCode(code: string, page: any): Promise<void> {
|
|
860
|
-
// Keep default timeout (5 seconds) for fast feedback on wrong selectors
|
|
861
|
-
// Navigation operations should use explicit longer timeouts in generated code
|
|
862
|
-
page.setDefaultTimeout(5000);
|
|
863
|
-
|
|
864
|
-
try {
|
|
865
|
-
// Clean and validate the code before execution
|
|
866
|
-
const cleanedCode = this.cleanStepCode(code);
|
|
867
|
-
|
|
868
|
-
if (!cleanedCode || cleanedCode.trim().length === 0) {
|
|
869
|
-
throw new Error('Step code is empty or contains only comments');
|
|
870
|
-
}
|
|
871
|
-
|
|
872
|
-
// Dynamically import expect
|
|
873
|
-
const { expect } = require('@playwright/test');
|
|
874
|
-
|
|
875
|
-
// Create an async function that has access to page, expect, and other Playwright globals
|
|
876
|
-
const executeCode = new Function('page', 'expect', `return (async () => { ${cleanedCode} })()`);
|
|
877
|
-
const result = executeCode(page, expect);
|
|
878
|
-
await result;
|
|
879
|
-
} finally {
|
|
880
|
-
// Ensure timeout remains consistent
|
|
881
|
-
page.setDefaultTimeout(5000);
|
|
882
|
-
}
|
|
883
|
-
}
|
|
884
|
-
|
|
885
|
-
/**
|
|
886
|
-
* Validate step code has executable content (preserves comments)
|
|
887
|
-
*/
|
|
888
|
-
private cleanStepCode(code: string): string {
|
|
889
|
-
if (!code || code.trim().length === 0) {
|
|
890
|
-
return '';
|
|
891
|
-
}
|
|
892
|
-
|
|
893
|
-
// Check if there are any executable statements (including those with comments)
|
|
894
|
-
const hasExecutableCode = /[a-zA-Z_$][a-zA-Z0-9_$]*\s*\(|await\s+|return\s+|if\s*\(|for\s*\(|while\s*\(|switch\s*\(|try\s*\{|catch\s*\(/.test(code);
|
|
895
|
-
|
|
896
|
-
if (!hasExecutableCode) {
|
|
897
|
-
return '';
|
|
898
|
-
}
|
|
899
|
-
|
|
900
|
-
return code; // Return the original code without removing comments
|
|
901
|
-
}
|
|
902
|
-
|
|
903
|
-
// Legacy repair helper methods (now unused but kept for compilation)
|
|
904
|
-
private buildFailureHistory(): string { return ''; }
|
|
905
|
-
private buildRecentRepairsContext(): string { return ''; }
|
|
906
|
-
private async applyRepairActionInContext(): Promise<{ success: boolean; error?: string }> {
|
|
907
|
-
return { success: false };
|
|
908
|
-
}
|
|
909
|
-
|
|
910
|
-
private generateUpdatedScript(steps: (ScriptStep & { success?: boolean; error?: string })[], repairAdvice?: string, originalScript?: string): string {
|
|
911
|
-
// Extract test name and hashtags from original script if provided
|
|
912
|
-
let testName = 'repairedTest';
|
|
913
|
-
let hashtags: string[] = [];
|
|
914
|
-
|
|
915
|
-
if (originalScript) {
|
|
916
|
-
const testNameMatch = originalScript.match(/test\(['"]([^'"]+)['"]/);
|
|
917
|
-
if (testNameMatch) {
|
|
918
|
-
testName = testNameMatch[1];
|
|
919
|
-
}
|
|
920
|
-
|
|
921
|
-
// Extract hashtags from TestChimp comment
|
|
922
|
-
const hashtagMatch = originalScript.match(/#\w+(?:\s+#\w+)*/);
|
|
923
|
-
if (hashtagMatch) {
|
|
924
|
-
hashtags = hashtagMatch[0].split(/\s+/).filter(tag => tag.startsWith('#'));
|
|
925
|
-
}
|
|
926
|
-
}
|
|
927
|
-
|
|
928
|
-
const scriptLines = [
|
|
929
|
-
"import { test, expect } from '@playwright/test';",
|
|
930
|
-
`test('${testName}', async ({ page, browser, context }) => {`
|
|
931
|
-
];
|
|
932
|
-
|
|
933
|
-
steps.forEach((step, index) => {
|
|
934
|
-
// Only add step if it has code to execute
|
|
935
|
-
if (step.code && step.code.trim().length > 0) {
|
|
936
|
-
scriptLines.push(` // ${step.description}`);
|
|
937
|
-
const codeLines = step.code.split('\n');
|
|
938
|
-
codeLines.forEach(line => {
|
|
939
|
-
scriptLines.push(` ${line}`);
|
|
940
|
-
});
|
|
941
|
-
}
|
|
942
|
-
});
|
|
943
|
-
|
|
944
|
-
scriptLines.push('});');
|
|
945
|
-
const script = scriptLines.join('\n');
|
|
946
|
-
|
|
947
|
-
// Add TestChimp comment with hashtags and repair advice
|
|
948
|
-
return addTestChimpComment(script, repairAdvice, hashtags);
|
|
949
|
-
}
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
/**
|
|
953
|
-
* Initialize browser with configuration (delegates to utility function)
|
|
954
|
-
*/
|
|
955
|
-
private async initializeBrowser(playwrightConfig?: string, headless?: boolean, playwrightConfigFilePath?: string): Promise<{ browser: any; context: any; page: any }> {
|
|
956
|
-
return initializeBrowser(playwrightConfig, headless, playwrightConfigFilePath, this.logger);
|
|
957
|
-
}
|
|
958
|
-
|
|
959
|
-
/**
|
|
960
|
-
* Safely serialize error information, filtering out non-serializable values
|
|
961
|
-
*/
|
|
962
|
-
private safeSerializeError(error: any): string {
|
|
963
|
-
try {
|
|
964
|
-
if (error instanceof Error) {
|
|
965
|
-
return error.message;
|
|
966
|
-
}
|
|
967
|
-
|
|
968
|
-
if (typeof error === 'string') {
|
|
969
|
-
return error;
|
|
970
|
-
}
|
|
971
|
-
|
|
972
|
-
if (typeof error === 'object' && error !== null) {
|
|
973
|
-
// Try to extract meaningful information without serializing the entire object
|
|
974
|
-
const safeError: any = {};
|
|
975
|
-
|
|
976
|
-
// Copy safe properties
|
|
977
|
-
if (error.message) safeError.message = error.message;
|
|
978
|
-
if (error.name) safeError.name = error.name;
|
|
979
|
-
if (error.code) safeError.code = error.code;
|
|
980
|
-
if (error.status) safeError.status = error.status;
|
|
981
|
-
|
|
982
|
-
// Try to get stack trace safely
|
|
983
|
-
if (error.stack && typeof error.stack === 'string') {
|
|
984
|
-
safeError.stack = error.stack;
|
|
985
|
-
}
|
|
986
|
-
|
|
987
|
-
return JSON.stringify(safeError);
|
|
988
|
-
}
|
|
989
|
-
|
|
990
|
-
return String(error);
|
|
991
|
-
} catch (serializationError) {
|
|
992
|
-
// If even safe serialization fails, return a basic string representation
|
|
993
|
-
return `Error: ${String(error)}`;
|
|
994
|
-
}
|
|
995
|
-
}
|
|
996
|
-
}
|