testchimp-runner-core 0.0.35 → 0.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/package.json +6 -1
  2. package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
  3. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
  4. package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
  5. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
  6. package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
  7. package/plandocs/INTEGRATION_COMPLETE.md +0 -322
  8. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
  9. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
  10. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
  11. package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
  12. package/plandocs/PHASE_1_COMPLETE.md +0 -165
  13. package/plandocs/PHASE_1_SUMMARY.md +0 -184
  14. package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
  15. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
  16. package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
  17. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
  18. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
  19. package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
  20. package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
  21. package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
  22. package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
  23. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
  24. package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
  25. package/plandocs/exploratory-mode-support-v2.plan.md +0 -953
  26. package/plandocs/exploratory-mode-support.plan.md +0 -928
  27. package/plandocs/journey-id-tracking-addendum.md +0 -227
  28. package/releasenotes/RELEASE_0.0.26.md +0 -165
  29. package/releasenotes/RELEASE_0.0.27.md +0 -236
  30. package/releasenotes/RELEASE_0.0.28.md +0 -286
  31. package/src/auth-config.ts +0 -84
  32. package/src/credit-usage-service.ts +0 -188
  33. package/src/env-loader.ts +0 -103
  34. package/src/execution-service.ts +0 -996
  35. package/src/file-handler.ts +0 -104
  36. package/src/index.ts +0 -432
  37. package/src/llm-facade.ts +0 -821
  38. package/src/llm-provider.ts +0 -53
  39. package/src/model-constants.ts +0 -35
  40. package/src/orchestrator/decision-parser.ts +0 -139
  41. package/src/orchestrator/index.ts +0 -58
  42. package/src/orchestrator/orchestrator-agent.ts +0 -1282
  43. package/src/orchestrator/orchestrator-prompts.ts +0 -786
  44. package/src/orchestrator/page-som-handler.ts +0 -1565
  45. package/src/orchestrator/som-types.ts +0 -188
  46. package/src/orchestrator/tool-registry.ts +0 -184
  47. package/src/orchestrator/tools/check-page-ready.ts +0 -75
  48. package/src/orchestrator/tools/extract-data.ts +0 -92
  49. package/src/orchestrator/tools/index.ts +0 -15
  50. package/src/orchestrator/tools/inspect-page.ts +0 -42
  51. package/src/orchestrator/tools/recall-history.ts +0 -72
  52. package/src/orchestrator/tools/refresh-som-markers.ts +0 -69
  53. package/src/orchestrator/tools/take-screenshot.ts +0 -128
  54. package/src/orchestrator/tools/verify-action-result.ts +0 -159
  55. package/src/orchestrator/tools/view-previous-screenshot.ts +0 -103
  56. package/src/orchestrator/types.ts +0 -291
  57. package/src/playwright-mcp-service.ts +0 -224
  58. package/src/progress-reporter.ts +0 -144
  59. package/src/prompts.ts +0 -842
  60. package/src/providers/backend-proxy-llm-provider.ts +0 -91
  61. package/src/providers/local-llm-provider.ts +0 -38
  62. package/src/scenario-service.ts +0 -252
  63. package/src/scenario-worker-class.ts +0 -1110
  64. package/src/script-utils.ts +0 -203
  65. package/src/types.ts +0 -239
  66. package/src/utils/browser-utils.ts +0 -348
  67. package/src/utils/coordinate-converter.ts +0 -162
  68. package/src/utils/page-info-retry.ts +0 -65
  69. package/src/utils/page-info-utils.ts +0 -285
  70. package/testchimp-runner-core-0.0.35.tgz +0 -0
  71. package/tsconfig.json +0 -19
@@ -1,996 +0,0 @@
1
- import { PlaywrightMCPService as PlaywrightService } from './playwright-mcp-service';
2
- import {
3
- PlaywrightExecutionRequest,
4
- PlaywrightExecutionResponse,
5
- ScriptResult,
6
- ScriptExecutionRequest,
7
- ScriptExecutionResponse,
8
- ScriptStep,
9
- ExecutionMode,
10
- StepOperation,
11
- StepRepairAction
12
- } from './types';
13
- import { RepairSuggestionResponse, RepairConfidenceResponse } from './llm-facade';
14
- import { getEnhancedPageInfo, PageInfo } from './utils/page-info-utils';
15
- import { initializeBrowser, captureOptimizedScreenshot } from './utils/browser-utils';
16
- import { LLMFacade } from './llm-facade';
17
- import { AuthConfig } from './auth-config';
18
- import { addTestChimpComment } from './script-utils';
19
- import { CreditUsageService } from './credit-usage-service';
20
- import { DEFAULT_MODEL, VISION_MODEL } from './model-constants';
21
- import { LLMProvider } from './llm-provider';
22
- import { ProgressReporter } from './progress-reporter';
23
- import { BackendProxyLLMProvider } from './providers/backend-proxy-llm-provider';
24
- import { OrchestratorAgent, ToolRegistry, DEFAULT_AGENT_CONFIG } from './orchestrator';
25
- import type { AgentConfig, JourneyMemory } from './orchestrator';
26
-
27
- /**
28
- * Service for orchestrating Playwright script execution
29
- */
30
- export class ExecutionService {
31
- private playwrightService: PlaywrightService;
32
- private llmFacade: LLMFacade;
33
- private llmProvider: LLMProvider;
34
- private progressReporter?: ProgressReporter;
35
- private creditUsageService: CreditUsageService;
36
- private maxConcurrentExecutions: number;
37
- private activeExecutions: Set<Promise<any>> = new Set();
38
- private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
39
- private orchestratorAgent: OrchestratorAgent;
40
-
41
- constructor(
42
- authConfig?: AuthConfig,
43
- backendUrl?: string,
44
- maxConcurrentExecutions: number = 10,
45
- llmProvider?: LLMProvider,
46
- progressReporter?: ProgressReporter
47
- ) {
48
- this.playwrightService = new PlaywrightService();
49
-
50
- // Use provided LLM provider or default to backend proxy (backward compatible)
51
- this.llmProvider = llmProvider || new BackendProxyLLMProvider(authConfig, backendUrl);
52
- this.llmFacade = new LLMFacade(this.llmProvider);
53
-
54
- this.progressReporter = progressReporter;
55
- this.creditUsageService = new CreditUsageService(authConfig, backendUrl);
56
- this.maxConcurrentExecutions = maxConcurrentExecutions;
57
-
58
- // Initialize orchestrator for repair mode (reuses all SoM infrastructure)
59
- const toolRegistry = new ToolRegistry();
60
- const repairConfig: Partial<AgentConfig> = {
61
- useSoM: true,
62
- somRestrictCoordinates: true // Prefer SoM markers for repairs
63
- };
64
-
65
- this.orchestratorAgent = new OrchestratorAgent(
66
- this.llmFacade,
67
- toolRegistry,
68
- repairConfig,
69
- progressReporter,
70
- (msg, level) => this.log(msg)
71
- );
72
- }
73
-
74
- /**
75
- * Set a logger callback for capturing execution logs
76
- */
77
- setLogger(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void {
78
- this.logger = logger;
79
- }
80
-
81
- /**
82
- * Log a message using the configured logger
83
- */
84
- private log(message: string, level: 'log' | 'error' | 'warn' = 'log'): void {
85
- if (this.logger) {
86
- this.logger(message, level);
87
- }
88
- // No console fallback - logs are routed to consumer
89
- }
90
-
91
- /**
92
- * Initialize the execution service
93
- */
94
- async initialize(): Promise<void> {
95
- await this.playwrightService.initialize();
96
- }
97
-
98
- /**
99
- * Set authentication configuration for the service
100
- * Note: This recreates the LLM provider with new auth config
101
- */
102
- setAuthConfig(authConfig: AuthConfig): void {
103
- // Recreate LLM provider with new auth config
104
- this.llmProvider = new BackendProxyLLMProvider(authConfig, undefined);
105
- this.llmFacade = new LLMFacade(this.llmProvider);
106
- this.creditUsageService.setAuthConfig(authConfig);
107
- }
108
-
109
-
110
- /**
111
- * Execute a script with optional AI repair capabilities
112
- */
113
- async executeScript(request: ScriptExecutionRequest): Promise<ScriptExecutionResponse> {
114
- // Wait for available slot if at max concurrency
115
- while (this.activeExecutions.size >= this.maxConcurrentExecutions) {
116
- await Promise.race(this.activeExecutions);
117
- }
118
-
119
- // Create execution promise and track it
120
- const executionPromise = this.executeScriptInternal(request);
121
- this.activeExecutions.add(executionPromise);
122
-
123
- try {
124
- const result = await executionPromise;
125
- return result;
126
- } finally {
127
- this.activeExecutions.delete(executionPromise);
128
- }
129
- }
130
-
131
- /**
132
- * Internal script execution method
133
- */
134
- private async executeScriptInternal(request: ScriptExecutionRequest): Promise<ScriptExecutionResponse> {
135
- const startTime = Date.now();
136
- const model = request.model || DEFAULT_MODEL;
137
-
138
- try {
139
- if (request.mode === ExecutionMode.RUN_EXACTLY) {
140
- return await this.runExactly(request, startTime);
141
- } else {
142
- return await this.runWithAIRepair(request, startTime, model);
143
- }
144
- } catch (error) {
145
- return {
146
- runStatus: 'failed',
147
- executionTime: Date.now() - startTime,
148
- error: error instanceof Error ? error.message : 'Unknown error'
149
- };
150
- }
151
- }
152
-
153
- /**
154
- * Execute a complete Playwright test suite as a single job
155
- */
156
- async executeTestSuite(request: PlaywrightExecutionRequest): Promise<PlaywrightExecutionResponse> {
157
- try {
158
- // Parse Playwright configuration
159
- const config = this.parsePlaywrightConfig(request.playwrightConfig);
160
-
161
- // Execute the entire job (prescript + script + postscript) as one unit
162
- const jobResult = await this.playwrightService.executeJob(
163
- request.prescript,
164
- request.script,
165
- request.postscript,
166
- config
167
- );
168
-
169
- return {
170
- success: jobResult.success,
171
- results: jobResult.results,
172
- executionTime: jobResult.executionTime,
173
- error: jobResult.error
174
- };
175
-
176
- } catch (error) {
177
- return {
178
- success: false,
179
- results: {
180
- script: { success: false, output: '', error: '', executionTime: 0 }
181
- },
182
- executionTime: 0,
183
- error: error instanceof Error ? error.message : 'Unknown error occurred'
184
- };
185
- }
186
- }
187
-
188
- /**
189
- * Parse Playwright configuration from string
190
- */
191
- private parsePlaywrightConfig(configString: string): any {
192
- try {
193
- // Try to parse as JSON first
194
- const config = JSON.parse(configString);
195
- return {
196
- browserType: config.browserType || 'chromium',
197
- headless: config.headless === true,
198
- viewport: config.viewport || { width: 1280, height: 720 },
199
- options: config.options || {}
200
- };
201
- } catch {
202
- // If not JSON, try to extract basic config from JavaScript
203
- try {
204
- // Simple regex-based extraction for common config patterns
205
- const headlessMatch = configString.match(/headless:\s*(true|false)/);
206
- const viewportMatch = configString.match(/viewport:\s*\{\s*width:\s*(\d+),\s*height:\s*(\d+)\s*\}/);
207
- const browserMatch = configString.match(/browserType:\s*['"`](chromium|firefox|webkit)['"`]/);
208
-
209
- return {
210
- browserType: browserMatch ? browserMatch[1] : 'chromium',
211
- headless: headlessMatch ? headlessMatch[1] === 'true' : true,
212
- viewport: viewportMatch ?
213
- { width: parseInt(viewportMatch[1]), height: parseInt(viewportMatch[2]) } :
214
- { width: 1280, height: 720 },
215
- options: {}
216
- };
217
- } catch {
218
- // Return default config if parsing fails
219
- return {
220
- browserType: 'chromium',
221
- headless: false,
222
- viewport: { width: 1280, height: 720 },
223
- options: {}
224
- };
225
- }
226
- }
227
- }
228
-
229
- /**
230
- * Close the execution service
231
- */
232
- async close(): Promise<void> {
233
- await this.playwrightService.close();
234
- }
235
-
236
- /**
237
- * Check if the service is ready
238
- */
239
- isReady(): boolean {
240
- return this.playwrightService.isReady();
241
- }
242
-
243
- private async runExactly(request: ScriptExecutionRequest, startTime: number, model?: string): Promise<ScriptExecutionResponse> {
244
- // deflakeRunCount: number of deflake attempts (0 means no deflaking, just one attempt)
245
- const deflakeRunCount = request.deflakeRunCount !== undefined ? request.deflakeRunCount : 0;
246
- const totalAttempts = deflakeRunCount + 1; // Original run + deflake attempts
247
- let lastError: Error | null = null;
248
-
249
- this.log(`runExactly: deflakeRunCount = ${deflakeRunCount}, totalAttempts = ${totalAttempts}`);
250
-
251
- // Script content should be provided by the caller (TestChimpService)
252
- // The TestChimpService handles file reading through the appropriate FileHandler
253
- if (!request.script) {
254
- throw new Error('Script content is required for execution. The TestChimpService should read the file and provide script content.');
255
- }
256
-
257
- // Check if we should use existing browser or create new one
258
- const useExistingBrowser = !!(request.existingBrowser && request.existingContext && request.existingPage);
259
-
260
- if (useExistingBrowser) {
261
- this.log('Using existing browser/page provided by caller - single attempt only (no internal deflaking)');
262
- // Single attempt with existing browser (caller handles deflaking by creating fresh browsers)
263
- const browser = request.existingBrowser;
264
- const context = request.existingContext;
265
- const page = request.existingPage;
266
-
267
- try {
268
- // LIFECYCLE: Call beforeStartTest if provided
269
- if (this.progressReporter?.beforeStartTest) {
270
- await this.progressReporter.beforeStartTest(page, browser, context);
271
- }
272
-
273
- // Execute the script as-is
274
- await this.executeStepCode(request.script, page);
275
-
276
- // LIFECYCLE: Call afterEndTest on success
277
- if (this.progressReporter?.afterEndTest) {
278
- await this.progressReporter.afterEndTest('passed', undefined, page);
279
- }
280
-
281
- // Don't close browser - caller owns it
282
-
283
- return {
284
- runStatus: 'success',
285
- numDeflakeRuns: 0,
286
- executionTime: Date.now() - startTime
287
- };
288
- } catch (error) {
289
- lastError = error instanceof Error ? error : new Error('Script execution failed');
290
- this.log(`Execution failed: ${lastError.message}`);
291
-
292
- // LIFECYCLE: Call afterEndTest on failure
293
- if (this.progressReporter?.afterEndTest) {
294
- try {
295
- await this.progressReporter.afterEndTest('failed', lastError.message, page);
296
- } catch (callbackError) {
297
- this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
298
- }
299
- }
300
-
301
- return {
302
- runStatus: 'failed',
303
- numDeflakeRuns: 0,
304
- executionTime: Date.now() - startTime,
305
- error: lastError.message
306
- };
307
- }
308
- }
309
-
310
- // Create our own browser (original behavior)
311
- for (let attempt = 1; attempt <= totalAttempts; attempt++) {
312
- this.log(`Attempting deflake run ${attempt}/${totalAttempts}`);
313
- const { browser, context, page } = await this.initializeBrowser(request.playwrightConfig, request.headless, request.playwrightConfigFilePath);
314
-
315
- try {
316
- // LIFECYCLE: Call beforeStartTest if provided
317
- if (this.progressReporter?.beforeStartTest) {
318
- await this.progressReporter.beforeStartTest(page, browser, context);
319
- }
320
-
321
- // Execute the script as-is
322
- await this.executeStepCode(request.script, page);
323
-
324
- // LIFECYCLE: Call afterEndTest on success
325
- if (this.progressReporter?.afterEndTest) {
326
- await this.progressReporter.afterEndTest('passed', undefined, page);
327
- }
328
-
329
- await browser.close();
330
-
331
- // Success! Return immediately
332
- return {
333
- runStatus: 'success',
334
- numDeflakeRuns: attempt - 1, // Count only deflaking runs (exclude original run)
335
- executionTime: Date.now() - startTime
336
- };
337
- } catch (error) {
338
- lastError = error instanceof Error ? error : new Error('Script execution failed');
339
- this.log(`Initial run failed: ${lastError.message}`);
340
-
341
- // LIFECYCLE: Call afterEndTest on failure
342
- if (this.progressReporter?.afterEndTest) {
343
- try {
344
- await this.progressReporter.afterEndTest('failed', lastError.message, page);
345
- } catch (callbackError) {
346
- this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
347
- }
348
- }
349
-
350
- try {
351
- await browser.close();
352
- } catch (closeError) {
353
- // Browser might already be closed
354
- }
355
-
356
- // If this is not the last attempt, continue to next attempt
357
- if (attempt < totalAttempts) {
358
- this.log(`Deflaking attempt ${attempt} failed, trying again... (${attempt + 1}/${totalAttempts})`);
359
- continue;
360
- }
361
- }
362
- }
363
-
364
- // All attempts failed
365
- return {
366
- runStatus: 'failed',
367
- numDeflakeRuns: deflakeRunCount, // Count only deflaking runs (exclude original run)
368
- executionTime: Date.now() - startTime,
369
- error: lastError?.message || 'All deflaking attempts failed'
370
- };
371
- }
372
-
373
- private async runWithAIRepair(request: ScriptExecutionRequest, startTime: number, model: string): Promise<ScriptExecutionResponse> {
374
- const repairFlexibility = request.repairFlexibility || 3;
375
- const attemptRunExactlyFirst = request.attemptRunExactlyFirst || false;
376
-
377
- // Script content is required UNLESS pre-parsed steps are provided
378
- if (!request.script && (!request.steps || request.steps.length === 0)) {
379
- throw new Error('Script content is required for AI repair. The TestChimpService should read the file and provide script content.');
380
- }
381
-
382
- // Check if we should use existing browser
383
- const useExistingBrowser = !!(request.existingBrowser && request.existingContext && request.existingPage);
384
-
385
- // Optionally try runExactly first (with deflaking if specified)
386
- if (attemptRunExactlyFirst) {
387
- const deflakeCount = request.deflakeRunCount || 0;
388
- this.log(`Attempting runExactly first with ${deflakeCount} deflake attempts...`);
389
- const runExactlyResult = await this.runExactly({
390
- ...request,
391
- mode: ExecutionMode.RUN_EXACTLY,
392
- deflakeRunCount: deflakeCount
393
- }, startTime, model);
394
-
395
- if (runExactlyResult.runStatus === 'success') {
396
- this.log('runExactly succeeded, returning without AI repair');
397
- return runExactlyResult;
398
- }
399
-
400
- this.log('runExactly failed, proceeding with AI repair...');
401
- }
402
-
403
- // Start AI repair process
404
- this.log('Starting AI repair process...');
405
-
406
- let repairBrowser: any = null;
407
- let repairContext: any = null;
408
- let repairPage: any = null;
409
-
410
- try {
411
- let steps, updatedSteps;
412
-
413
- if (useExistingBrowser) {
414
- // Use existing browser
415
- this.log('Using existing browser for AI repair...');
416
- repairBrowser = request.existingBrowser;
417
- repairContext = request.existingContext;
418
- repairPage = request.existingPage;
419
-
420
- // Use pre-parsed steps if provided (preserves step IDs from canonical tree),
421
- // otherwise parse script into steps
422
- if (request.steps && request.steps.length > 0) {
423
- this.log(`Using ${request.steps.length} pre-parsed steps (IDs preserved)`);
424
- steps = request.steps;
425
- } else {
426
- this.log('Parsing script into steps...');
427
- if (!request.script) {
428
- throw new Error('Script is required when steps are not provided');
429
- }
430
- steps = await this.parseScriptIntoSteps(request.script, model);
431
- }
432
-
433
- // LIFECYCLE: Call beforeStartTest if provided
434
- if (this.progressReporter?.beforeStartTest) {
435
- await this.progressReporter.beforeStartTest(repairPage, repairBrowser, repairContext);
436
- }
437
-
438
- this.log('Starting AI repair with parsed steps...');
439
- updatedSteps = await this.repairStepsWithAI(steps, repairPage, repairFlexibility, model, request.jobId);
440
- } else {
441
- // Use pre-parsed steps if provided, otherwise parse script
442
- if (request.steps && request.steps.length > 0) {
443
- this.log(`Using ${request.steps.length} pre-parsed steps (IDs preserved)`);
444
- this.log('Initializing repair browser...');
445
- steps = request.steps;
446
- const browserInstance = await this.initializeBrowser(request.playwrightConfig, request.headless, request.playwrightConfigFilePath);
447
- repairBrowser = browserInstance.browser;
448
- repairContext = browserInstance.context;
449
- repairPage = browserInstance.page;
450
- } else {
451
- // Start browser initialization and script parsing in parallel for faster startup
452
- this.log('Initializing repair browser and parsing script...');
453
- if (!request.script) {
454
- throw new Error('Script is required when steps are not provided');
455
- }
456
- const results = await Promise.all([
457
- this.parseScriptIntoSteps(request.script, model),
458
- this.initializeBrowser(request.playwrightConfig, request.headless, request.playwrightConfigFilePath) // Use request.headless (defaults to false/headed)
459
- ]);
460
-
461
- steps = results[0];
462
- repairBrowser = results[1].browser;
463
- repairContext = results[1].context;
464
- repairPage = results[1].page;
465
- }
466
-
467
- // LIFECYCLE: Call beforeStartTest if provided
468
- if (this.progressReporter?.beforeStartTest) {
469
- await this.progressReporter.beforeStartTest(repairPage, repairBrowser, repairContext);
470
- }
471
-
472
- this.log('Starting AI repair with parsed steps...');
473
- updatedSteps = await this.repairStepsWithAI(steps, repairPage, repairFlexibility, model, request.jobId);
474
- }
475
-
476
- // Always generate the updated script (preserve original test name)
477
- const updatedScript = this.generateUpdatedScript(updatedSteps, undefined, request.script);
478
-
479
- // Check if repair was successful by seeing if we completed all steps
480
- const allStepsSuccessful = updatedSteps.length > 0 && updatedSteps.every(step => step.success);
481
-
482
- // Check if we have any successful repairs (partial success)
483
- const hasSuccessfulRepairs = updatedSteps.some(step => step.success);
484
-
485
- // Debug: Log step success status
486
- this.log('Step success status: ' + updatedSteps.map((step, index) => `Step ${index + 1}: ${step.success ? 'SUCCESS' : 'FAILED'}`).join(', '));
487
- this.log(`All steps successful: ${allStepsSuccessful}`);
488
- this.log(`Has successful repairs: ${hasSuccessfulRepairs}`);
489
-
490
- // Debug: Log individual step details
491
- updatedSteps.forEach((step, index) => {
492
- this.log(`Step ${index + 1} details: success=${step.success}, description="${step.description}"`);
493
- });
494
-
495
- // Update file if we have any successful repairs (partial or complete)
496
- if (hasSuccessfulRepairs) {
497
- // IMPORTANT: Use the orchestrator-generated script directly (already has proper Playwright commands)
498
- // Don't regenerate via LLM as it loses the actual repairs
499
- this.log('Using orchestrator-generated script (skipping LLM regeneration to preserve repairs)');
500
-
501
- // For repair advice, compare original vs repaired
502
- const confidenceResponse = await this.llmFacade.assessRepairConfidence(request.script!, updatedScript, model);
503
-
504
- // Add TestChimp comment with repair advice
505
- const scriptWithAdvice = addTestChimpComment(updatedScript, confidenceResponse.advice);
506
-
507
- // Polish the script with minor LLM cleanup (removes redundancies, fixes formatting)
508
- this.log('Applying final LLM polish to repaired script (minor cleanup only)...');
509
- const cleanupResult = await this.llmFacade.cleanupScript(scriptWithAdvice, model);
510
-
511
- if (cleanupResult.changes.length > 0) {
512
- this.log(`Script cleanup made ${cleanupResult.changes.length} minor improvements:`);
513
- cleanupResult.changes.forEach((change, i) => {
514
- this.log(` ${i + 1}. ${change}`);
515
- });
516
- } else if (cleanupResult.skipped) {
517
- this.log(`Script cleanup skipped: ${cleanupResult.skipped}`);
518
- } else {
519
- this.log('Script cleanup: no changes needed');
520
- }
521
-
522
- const scriptWithRepairAdvice = cleanupResult.script;
523
-
524
- // Report credit usage for successful AI repair
525
- this.creditUsageService.reportAIRepairCredit().catch(error => {
526
- this.log(`Failed to report credit usage for AI repair: ${error}`, 'warn');
527
- });
528
-
529
- // LIFECYCLE: Call afterEndTest (partial or complete success)
530
- if (this.progressReporter?.afterEndTest) {
531
- try {
532
- await this.progressReporter.afterEndTest(
533
- allStepsSuccessful ? 'passed' : 'failed',
534
- allStepsSuccessful ? undefined : 'Partial repair success',
535
- repairPage
536
- );
537
- } catch (callbackError) {
538
- this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
539
- }
540
- }
541
-
542
- return {
543
- runStatus: 'failed', // Original script failed
544
- repairStatus: allStepsSuccessful ? 'success' : 'partial', // Complete or partial repair success
545
- repairConfidence: confidenceResponse.confidence,
546
- repairAdvice: confidenceResponse.advice,
547
- updatedScript: scriptWithRepairAdvice, // Return the drop-in replacement script with proper TestChimp comment
548
- numDeflakeRuns: 0, // Deflaking is handled by caller before calling runWithAIRepair
549
- executionTime: Date.now() - startTime
550
- };
551
- } else {
552
- // No successful repairs at all
553
-
554
- // LIFECYCLE: Call afterEndTest (complete failure)
555
- if (this.progressReporter?.afterEndTest) {
556
- try {
557
- await this.progressReporter.afterEndTest('failed', 'AI repair could not fix any steps', repairPage);
558
- } catch (callbackError) {
559
- this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
560
- }
561
- }
562
-
563
- return {
564
- runStatus: 'failed', // Original script failed
565
- repairStatus: 'failed',
566
- repairConfidence: 0,
567
- repairAdvice: 'AI repair could not fix any steps',
568
- updatedScript: request.script!, // Return original script since no repairs were successful
569
- numDeflakeRuns: 0, // Deflaking is handled by caller before calling runWithAIRepair
570
- executionTime: Date.now() - startTime,
571
- error: 'AI repair could not fix any steps'
572
- };
573
- }
574
- } catch (error) {
575
- return {
576
- runStatus: 'failed',
577
- repairStatus: 'failed',
578
- numDeflakeRuns: 0, // Deflaking is handled by caller before calling runWithAIRepair
579
- executionTime: Date.now() - startTime,
580
- error: error instanceof Error ? error.message : 'Script execution failed'
581
- };
582
- } finally {
583
- // Clean up browser resources if we created them (not provided by caller)
584
- if (!useExistingBrowser && repairBrowser) {
585
- try {
586
- await repairBrowser.close();
587
- this.log('AI repair browser closed');
588
- } catch (closeError) {
589
- this.log(`Error closing AI repair browser: ${closeError}`, 'warn');
590
- }
591
- }
592
- }
593
- }
594
-
595
- private async parseScriptIntoSteps(script: string, model: string): Promise<(ScriptStep & { success?: boolean; error?: string })[]> {
596
- // First try LLM-based parsing
597
- try {
598
- this.log('Attempting LLM-based script parsing...');
599
- const result = await this.llmFacade.parseScriptIntoSteps(script, model);
600
- this.log(`LLM parsing successful, got ${result.length} steps`);
601
- return result;
602
- } catch (error) {
603
- this.log(`LLM parsing failed, falling back to code parsing: ${error}`);
604
- const fallbackResult = this.parseScriptIntoStepsFallback(script);
605
- this.log(`Fallback parsing successful, got ${fallbackResult.length} steps`);
606
- return fallbackResult;
607
- }
608
- }
609
-
610
-
611
- private parseScriptIntoStepsFallback(script: string): (ScriptStep & { success?: boolean; error?: string })[] {
612
- const lines = script.split('\n');
613
- const steps: (ScriptStep & { success?: boolean; error?: string })[] = [];
614
- let currentStep: ScriptStep | null = null;
615
- let currentCode: string[] = [];
616
-
617
- for (const line of lines) {
618
- const trimmedLine = line.trim();
619
-
620
- // Check for step comment
621
- if (trimmedLine.startsWith('// Step ')) {
622
- // Save previous step if exists and has code
623
- if (currentStep) {
624
- const code = currentCode.join('\n').trim();
625
- const cleanedCode = this.cleanStepCode(code);
626
- if (cleanedCode) {
627
- currentStep.code = cleanedCode;
628
- steps.push(currentStep);
629
- }
630
- }
631
-
632
- // Start new step
633
- const description = trimmedLine.replace(/^\/\/\s*Step\s*\d+:\s*/, '').replace(/\s*\[FAILED\]\s*$/, '').trim();
634
- currentStep = { description, code: '' };
635
- currentCode = [];
636
- } else if (trimmedLine && !trimmedLine.startsWith('import') && !trimmedLine.startsWith('test(') && !trimmedLine.startsWith('});')) {
637
- // Add code line to current step
638
- if (currentStep) {
639
- currentCode.push(line);
640
- }
641
- }
642
- }
643
-
644
- // Add the last step if it has code
645
- if (currentStep) {
646
- const code = currentCode.join('\n').trim();
647
- const cleanedCode = this.cleanStepCode(code);
648
- if (cleanedCode) {
649
- currentStep.code = cleanedCode;
650
- steps.push(currentStep);
651
- }
652
- }
653
-
654
- return steps;
655
- }
656
-
657
- private async repairStepsWithAI(
658
- steps: (ScriptStep & { success?: boolean; error?: string })[],
659
- page: any,
660
- repairFlexibility: number,
661
- model: string,
662
- jobId?: string
663
- ): Promise<(ScriptStep & { success?: boolean; error?: string })[]> {
664
- let updatedSteps = [...steps];
665
- const maxTries = 3;
666
- const recentRepairs: Array<{
667
- stepNumber: number;
668
- operation: string;
669
- originalDescription?: string;
670
- newDescription?: string;
671
- originalCode?: string;
672
- newCode?: string;
673
- }> = [];
674
-
675
- // Track actual executed steps (including agent repairs) for proper history
676
- const executedStepDescriptions: string[] = [];
677
-
678
- // Create a shared execution context that accumulates all executed code for variable tracking
679
- let executionContext = '';
680
- const contextVariables = new Map<string, any>();
681
-
682
- let i = 0;
683
- while (i < updatedSteps.length) {
684
- const step = updatedSteps[i];
685
- this.log(`Loop iteration: i=${i}, step description="${step.description}", total steps=${updatedSteps.length}`);
686
-
687
- try {
688
- // LIFECYCLE: Call beforeStepStart if provided
689
- if (this.progressReporter?.beforeStepStart) {
690
- await this.progressReporter.beforeStepStart(
691
- {
692
- stepId: step.id, // Preserve original step ID if provided
693
- stepNumber: i + 1,
694
- description: step.description,
695
- code: step.code
696
- },
697
- page
698
- );
699
- }
700
-
701
- // Try to execute the step directly without context replay
702
- this.log(`Attempting Step ${i + 1}: ${step.description}`);
703
- this.log(` Code: ${step.code}`);
704
- await this.executeStepCode(step.code, page);
705
- step.success = true;
706
- this.log(`Step ${i + 1} executed successfully: ${step.description}`);
707
- this.log(`Step ${i + 1} success status set to: ${step.success}`);
708
-
709
- // Track executed step description for agent context
710
- executedStepDescriptions.push(step.description);
711
-
712
- // Report successful step execution
713
- this.log(`DEBUG: About to check callback - progressReporter=${!!this.progressReporter}, onStepProgress=${!!this.progressReporter?.onStepProgress}, jobId=${jobId}`);
714
- if (this.progressReporter?.onStepProgress && jobId) {
715
- this.log(`DEBUG: Firing onStepProgress callback for step ${i + 1}, stepId=${step.id}`);
716
- await this.progressReporter.onStepProgress({
717
- jobId,
718
- stepId: step.id, // Preserve original step ID if provided
719
- stepNumber: i + 1,
720
- description: step.description,
721
- code: step.code,
722
- status: 'SUCCESS_STEP_EXECUTION' as any,
723
- wasRepaired: false
724
- });
725
- this.log(`DEBUG: onStepProgress callback completed for step ${i + 1}`);
726
- } else {
727
- this.log(`DEBUG: Skipping callback - conditions not met`);
728
- }
729
-
730
- // Add this step's code to the execution context for future steps (for variable tracking)
731
- executionContext += step.code + '\n';
732
- i++; // Move to next step
733
- } catch (error) {
734
- this.log(`Step ${i + 1} failed: ${step.description}`);
735
- this.log(` Failed code: ${step.code}`);
736
- this.log(` Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
737
- if (error instanceof Error && error.stack) {
738
- this.log(` Stack trace: ${error.stack}`);
739
- }
740
- step.success = false;
741
- step.error = this.safeSerializeError(error);
742
-
743
- // Use orchestrator for repair (reuses all SoM infrastructure)
744
- this.log(`Calling orchestrator in REPAIR mode for step ${i + 1}`);
745
-
746
- // Prepare repair context - use executedStepDescriptions (includes agent repairs)
747
- const priorSteps = executedStepDescriptions; // What was ACTUALLY executed (scripted + agent)
748
- const nextSteps = updatedSteps.slice(i + 1).map(s => s.description);
749
-
750
- this.log(` Prior steps executed: ${priorSteps.length}, Next steps: ${nextSteps.length}`);
751
- this.log(` Prior steps context:\n ${priorSteps.map((s, idx) => `${idx + 1}. ${s}`).join('\n ')}`);
752
-
753
- // Create minimal memory for repair
754
- const memory: JourneyMemory = {
755
- experiences: [],
756
- extractedData: {},
757
- history: [],
758
- latestNote: undefined
759
- };
760
-
761
- let repairSuccess = false;
762
-
763
- try {
764
- // Call orchestrator with repair context (page object persisted)
765
- const repairResult = await this.orchestratorAgent.executeStep(
766
- page, // Same page object (persisted state)
767
- step.description, // Goal with testdata embedded
768
- i + 1, // Current step number
769
- updatedSteps.length, // Total steps
770
- updatedSteps.map(s => s.description), // All step descriptions
771
- memory, // Memory (empty for repair)
772
- jobId || 'repair',
773
- priorSteps, // NEW: What was already completed
774
- nextSteps // NEW: What comes after this
775
- );
776
-
777
- if (repairResult.success && repairResult.commands.length > 0) {
778
- // MODIFY: Orchestrator fixed the step - replace with new code
779
- const repairedCode = repairResult.commands.join('\n');
780
-
781
- updatedSteps[i] = {
782
- ...step,
783
- code: repairedCode,
784
- success: true,
785
- error: undefined
786
- };
787
-
788
- this.log(`✓ Step ${i + 1} MODIFIED by orchestrator (repair successful)`);
789
- this.log(` Original code: ${step.code}`);
790
- this.log(` New code (${repairResult.commands.length} commands):\n ${repairResult.commands.join('\n ')}`);
791
-
792
- // Track what agent actually did in history (for future repair context)
793
- const agentActionSummary = `${step.description} [AI-repaired: ${repairResult.commands.length} commands]`;
794
- executedStepDescriptions.push(agentActionSummary);
795
-
796
- // Report repaired step
797
- if (this.progressReporter?.onStepProgress && jobId) {
798
- await this.progressReporter.onStepProgress({
799
- jobId,
800
- stepId: step.id,
801
- stepNumber: i + 1,
802
- description: updatedSteps[i].description,
803
- code: updatedSteps[i].code,
804
- status: 'SUCCESS_STEP_EXECUTION' as any,
805
- wasRepaired: true
806
- });
807
- }
808
-
809
- // Ensure page is stable after agent repairs before returning control to script
810
- this.log(`Waiting for page stability after agent repair...`);
811
- try {
812
- await page.waitForLoadState('networkidle', { timeout: 5000 });
813
- this.log(`Page stabilized (networkidle) after agent repair`);
814
- } catch (stabilityError) {
815
- try {
816
- await page.waitForLoadState('domcontentloaded', { timeout: 3000 });
817
- this.log(`Page loaded (domcontentloaded) after agent repair`);
818
- } catch (fallbackError) {
819
- this.log(`Page stability wait timed out (continuing anyway)`, 'warn');
820
- }
821
- }
822
-
823
- repairSuccess = true;
824
- i++; // Continue to NEXT step (hand control back to script)
825
-
826
- } else if (repairResult.success && repairResult.commands.length === 0) {
827
- // DELETE: Step goal already achieved or no longer needed (e.g., modal already dismissed)
828
- this.log(`✓ Step ${i + 1} DELETED by orchestrator (goal already achieved, step obsolete)`);
829
- this.log(` Reason: Orchestrator completed with 0 commands - step no longer needed`);
830
-
831
- // Track deletion in history (helps agent understand what was skipped)
832
- executedStepDescriptions.push(`${step.description} [AI-deleted: step obsolete/already done]`);
833
-
834
- // Remove the step from array
835
- updatedSteps.splice(i, 1);
836
-
837
- repairSuccess = true;
838
- // Don't increment i - next step moved to current position
839
-
840
- } else {
841
- this.log(`✗ Step ${i + 1} could not be repaired by orchestrator (reason: ${repairResult.terminationReason})`);
842
- }
843
- } catch (repairError: any) {
844
- this.log(`✗ Orchestrator repair failed: ${repairError.message}`);
845
- }
846
-
847
- // Legacy repair code removed - now using orchestrator
848
-
849
- if (!repairSuccess) {
850
- this.log(`Step ${i + 1} could not be repaired - stopping execution`);
851
- break;
852
- }
853
- }
854
- }
855
-
856
- return updatedSteps;
857
- }
858
-
859
- private async executeStepCode(code: string, page: any): Promise<void> {
860
- // Keep default timeout (5 seconds) for fast feedback on wrong selectors
861
- // Navigation operations should use explicit longer timeouts in generated code
862
- page.setDefaultTimeout(5000);
863
-
864
- try {
865
- // Clean and validate the code before execution
866
- const cleanedCode = this.cleanStepCode(code);
867
-
868
- if (!cleanedCode || cleanedCode.trim().length === 0) {
869
- throw new Error('Step code is empty or contains only comments');
870
- }
871
-
872
- // Dynamically import expect
873
- const { expect } = require('@playwright/test');
874
-
875
- // Create an async function that has access to page, expect, and other Playwright globals
876
- const executeCode = new Function('page', 'expect', `return (async () => { ${cleanedCode} })()`);
877
- const result = executeCode(page, expect);
878
- await result;
879
- } finally {
880
- // Ensure timeout remains consistent
881
- page.setDefaultTimeout(5000);
882
- }
883
- }
884
-
885
- /**
886
- * Validate step code has executable content (preserves comments)
887
- */
888
- private cleanStepCode(code: string): string {
889
- if (!code || code.trim().length === 0) {
890
- return '';
891
- }
892
-
893
- // Check if there are any executable statements (including those with comments)
894
- const hasExecutableCode = /[a-zA-Z_$][a-zA-Z0-9_$]*\s*\(|await\s+|return\s+|if\s*\(|for\s*\(|while\s*\(|switch\s*\(|try\s*\{|catch\s*\(/.test(code);
895
-
896
- if (!hasExecutableCode) {
897
- return '';
898
- }
899
-
900
- return code; // Return the original code without removing comments
901
- }
902
-
903
- // Legacy repair helper methods (now unused but kept for compilation)
904
- private buildFailureHistory(): string { return ''; }
905
- private buildRecentRepairsContext(): string { return ''; }
906
- private async applyRepairActionInContext(): Promise<{ success: boolean; error?: string }> {
907
- return { success: false };
908
- }
909
-
910
- private generateUpdatedScript(steps: (ScriptStep & { success?: boolean; error?: string })[], repairAdvice?: string, originalScript?: string): string {
911
- // Extract test name and hashtags from original script if provided
912
- let testName = 'repairedTest';
913
- let hashtags: string[] = [];
914
-
915
- if (originalScript) {
916
- const testNameMatch = originalScript.match(/test\(['"]([^'"]+)['"]/);
917
- if (testNameMatch) {
918
- testName = testNameMatch[1];
919
- }
920
-
921
- // Extract hashtags from TestChimp comment
922
- const hashtagMatch = originalScript.match(/#\w+(?:\s+#\w+)*/);
923
- if (hashtagMatch) {
924
- hashtags = hashtagMatch[0].split(/\s+/).filter(tag => tag.startsWith('#'));
925
- }
926
- }
927
-
928
- const scriptLines = [
929
- "import { test, expect } from '@playwright/test';",
930
- `test('${testName}', async ({ page, browser, context }) => {`
931
- ];
932
-
933
- steps.forEach((step, index) => {
934
- // Only add step if it has code to execute
935
- if (step.code && step.code.trim().length > 0) {
936
- scriptLines.push(` // ${step.description}`);
937
- const codeLines = step.code.split('\n');
938
- codeLines.forEach(line => {
939
- scriptLines.push(` ${line}`);
940
- });
941
- }
942
- });
943
-
944
- scriptLines.push('});');
945
- const script = scriptLines.join('\n');
946
-
947
- // Add TestChimp comment with hashtags and repair advice
948
- return addTestChimpComment(script, repairAdvice, hashtags);
949
- }
950
-
951
-
952
- /**
953
- * Initialize browser with configuration (delegates to utility function)
954
- */
955
- private async initializeBrowser(playwrightConfig?: string, headless?: boolean, playwrightConfigFilePath?: string): Promise<{ browser: any; context: any; page: any }> {
956
- return initializeBrowser(playwrightConfig, headless, playwrightConfigFilePath, this.logger);
957
- }
958
-
959
- /**
960
- * Safely serialize error information, filtering out non-serializable values
961
- */
962
- private safeSerializeError(error: any): string {
963
- try {
964
- if (error instanceof Error) {
965
- return error.message;
966
- }
967
-
968
- if (typeof error === 'string') {
969
- return error;
970
- }
971
-
972
- if (typeof error === 'object' && error !== null) {
973
- // Try to extract meaningful information without serializing the entire object
974
- const safeError: any = {};
975
-
976
- // Copy safe properties
977
- if (error.message) safeError.message = error.message;
978
- if (error.name) safeError.name = error.name;
979
- if (error.code) safeError.code = error.code;
980
- if (error.status) safeError.status = error.status;
981
-
982
- // Try to get stack trace safely
983
- if (error.stack && typeof error.stack === 'string') {
984
- safeError.stack = error.stack;
985
- }
986
-
987
- return JSON.stringify(safeError);
988
- }
989
-
990
- return String(error);
991
- } catch (serializationError) {
992
- // If even safe serialization fails, return a basic string representation
993
- return `Error: ${String(error)}`;
994
- }
995
- }
996
- }