testchimp-runner-core 0.0.35 → 0.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/package.json +6 -1
  2. package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
  3. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
  4. package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
  5. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
  6. package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
  7. package/plandocs/INTEGRATION_COMPLETE.md +0 -322
  8. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
  9. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
  10. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
  11. package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
  12. package/plandocs/PHASE_1_COMPLETE.md +0 -165
  13. package/plandocs/PHASE_1_SUMMARY.md +0 -184
  14. package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
  15. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
  16. package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
  17. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
  18. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
  19. package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
  20. package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
  21. package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
  22. package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
  23. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
  24. package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
  25. package/plandocs/exploratory-mode-support-v2.plan.md +0 -953
  26. package/plandocs/exploratory-mode-support.plan.md +0 -928
  27. package/plandocs/journey-id-tracking-addendum.md +0 -227
  28. package/releasenotes/RELEASE_0.0.26.md +0 -165
  29. package/releasenotes/RELEASE_0.0.27.md +0 -236
  30. package/releasenotes/RELEASE_0.0.28.md +0 -286
  31. package/src/auth-config.ts +0 -84
  32. package/src/credit-usage-service.ts +0 -188
  33. package/src/env-loader.ts +0 -103
  34. package/src/execution-service.ts +0 -996
  35. package/src/file-handler.ts +0 -104
  36. package/src/index.ts +0 -432
  37. package/src/llm-facade.ts +0 -821
  38. package/src/llm-provider.ts +0 -53
  39. package/src/model-constants.ts +0 -35
  40. package/src/orchestrator/decision-parser.ts +0 -139
  41. package/src/orchestrator/index.ts +0 -58
  42. package/src/orchestrator/orchestrator-agent.ts +0 -1282
  43. package/src/orchestrator/orchestrator-prompts.ts +0 -786
  44. package/src/orchestrator/page-som-handler.ts +0 -1565
  45. package/src/orchestrator/som-types.ts +0 -188
  46. package/src/orchestrator/tool-registry.ts +0 -184
  47. package/src/orchestrator/tools/check-page-ready.ts +0 -75
  48. package/src/orchestrator/tools/extract-data.ts +0 -92
  49. package/src/orchestrator/tools/index.ts +0 -15
  50. package/src/orchestrator/tools/inspect-page.ts +0 -42
  51. package/src/orchestrator/tools/recall-history.ts +0 -72
  52. package/src/orchestrator/tools/refresh-som-markers.ts +0 -69
  53. package/src/orchestrator/tools/take-screenshot.ts +0 -128
  54. package/src/orchestrator/tools/verify-action-result.ts +0 -159
  55. package/src/orchestrator/tools/view-previous-screenshot.ts +0 -103
  56. package/src/orchestrator/types.ts +0 -291
  57. package/src/playwright-mcp-service.ts +0 -224
  58. package/src/progress-reporter.ts +0 -144
  59. package/src/prompts.ts +0 -842
  60. package/src/providers/backend-proxy-llm-provider.ts +0 -91
  61. package/src/providers/local-llm-provider.ts +0 -38
  62. package/src/scenario-service.ts +0 -252
  63. package/src/scenario-worker-class.ts +0 -1110
  64. package/src/script-utils.ts +0 -203
  65. package/src/types.ts +0 -239
  66. package/src/utils/browser-utils.ts +0 -348
  67. package/src/utils/coordinate-converter.ts +0 -162
  68. package/src/utils/page-info-retry.ts +0 -65
  69. package/src/utils/page-info-utils.ts +0 -285
  70. package/testchimp-runner-core-0.0.35.tgz +0 -0
  71. package/tsconfig.json +0 -19
package/src/llm-facade.ts DELETED
@@ -1,821 +0,0 @@
1
- import { PROMPTS } from './prompts';
2
- import { PageInfo } from './utils/page-info-utils';
3
- import { StepOperation } from './types';
4
- import { DEFAULT_MODEL, DEFAULT_SIMPLER_MODEL, VISION_MODEL } from './model-constants';
5
- import { LLMProvider, LLMRequest, LLMResponse } from './llm-provider';
6
-
7
- // LLM Response interfaces
8
- export interface LLMScenarioBreakdownResponse {
9
- steps: string[];
10
- }
11
-
12
- export interface LLMPlaywrightCommandResponse {
13
- command: string;
14
- reasoning?: string;
15
- }
16
-
17
- export interface LLMTestNameResponse {
18
- testName: string;
19
- }
20
-
21
- export interface RepairSuggestionResponse {
22
- shouldContinue: boolean;
23
- reason: string;
24
- action: {
25
- operation: StepOperation;
26
- stepIndex?: number;
27
- newStep?: {
28
- description: string;
29
- code: string;
30
- };
31
- insertAfterIndex?: number;
32
- };
33
- }
34
-
35
- export interface RepairConfidenceResponse {
36
- confidence: number;
37
- advice: string;
38
- }
39
-
40
- export interface GoalCompletionResponse {
41
- isComplete: boolean;
42
- reason: string;
43
- nextSubGoal?: string;
44
- }
45
-
46
- export interface ScreenshotNeedResponse {
47
- needsScreenshot: boolean;
48
- reason: string;
49
- alternativeApproach?: string;
50
- }
51
-
52
- export interface VisionDiagnosticResponse {
53
- visualAnalysis: string; // What the supervisor sees in the screenshot
54
- rootCause: string; // Why previous attempts failed
55
- specificInstructions: string; // Exact instructions for the worker agent
56
- recommendedApproach: string; // What strategy to use (selector-based, state-based, etc.)
57
- elementsFound: string[]; // What elements are actually visible
58
- elementsNotFound: string[]; // What elements were expected but not visible
59
- }
60
-
61
- export interface ScenarioStep {
62
- stepNumber: number;
63
- description: string;
64
- playwrightCommand?: string;
65
- success?: boolean;
66
- error?: string;
67
- retryCount?: number;
68
- attempts?: Array<{
69
- attemptNumber: number;
70
- command?: string;
71
- success: boolean;
72
- error?: string;
73
- timestamp: number;
74
- }>;
75
- }
76
-
77
- export class LLMFacade {
78
- public llmProvider: LLMProvider; // Expose for orchestrator direct access
79
- private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
80
- private tokenUsageCallback?: (inputTokens: number, outputTokens: number, includesImage: boolean) => void;
81
-
82
- constructor(llmProvider: LLMProvider) {
83
- this.llmProvider = llmProvider;
84
- this.log('LLMFacade initialized with pluggable LLM provider');
85
- }
86
-
87
- /**
88
- * Set token usage callback for tracking
89
- */
90
- setTokenUsageCallback(callback: (inputTokens: number, outputTokens: number, includesImage: boolean) => void): void {
91
- this.tokenUsageCallback = callback;
92
- }
93
-
94
- /**
95
- * Set a logger callback for capturing execution logs
96
- */
97
- setLogger(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void {
98
- this.logger = logger;
99
- this.llmProvider.setLogger?.(logger);
100
- }
101
-
102
- /**
103
- * Log a message using the configured logger
104
- */
105
- private log(message: string, level: 'log' | 'error' | 'warn' = 'log'): void {
106
- if (this.logger) {
107
- this.logger(message, level);
108
- }
109
- // Console fallback for debug visibility
110
- if (level === 'error') {
111
- console.error(message);
112
- } else if (level === 'warn') {
113
- console.warn(message);
114
- } else {
115
- console.log(message);
116
- }
117
- }
118
-
119
- private async callLLM(request: LLMRequest): Promise<LLMResponse> {
120
- try {
121
- const response = await this.llmProvider.callLLM(request);
122
-
123
- // Report token usage if callback is set
124
- if (response.usage && this.tokenUsageCallback) {
125
- this.tokenUsageCallback(
126
- response.usage.inputTokens,
127
- response.usage.outputTokens,
128
- !!request.imageUrl
129
- );
130
- }
131
-
132
- return response;
133
- } catch (error: any) {
134
- // Let provider handle its own error messages, just re-throw
135
- this.log(`LLM call failed: ${error}`, 'error');
136
- throw error;
137
- }
138
- }
139
-
140
- /**
141
- * Generate a test name from scenario description
142
- */
143
- async generateTestName(scenario: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<string> {
144
- this.log('Generating test name with LLM...');
145
-
146
- const request: LLMRequest = {
147
- model,
148
- systemPrompt: PROMPTS.TEST_NAME_GENERATION.SYSTEM,
149
- userPrompt: PROMPTS.TEST_NAME_GENERATION.USER(scenario)
150
- };
151
-
152
- try {
153
- const response = await this.callLLM(request);
154
- const testNameResponse = JSON.parse(response.answer) as LLMTestNameResponse;
155
- return testNameResponse.testName;
156
- } catch (error) {
157
- this.log(`Failed to generate test name: ${error}`, 'error');
158
- // Fallback to a simple generated name
159
- return `Test: ${scenario.substring(0, 50)}...`;
160
- }
161
- }
162
-
163
- /**
164
- * Generate hashtags for semantic grouping
165
- */
166
- async generateHashtags(scenario: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<string[]> {
167
- this.log('Generating hashtags with LLM...');
168
-
169
- const request: LLMRequest = {
170
- model,
171
- systemPrompt: PROMPTS.HASHTAG_GENERATION.SYSTEM,
172
- userPrompt: PROMPTS.HASHTAG_GENERATION.USER(scenario)
173
- };
174
-
175
- try {
176
- const response = await this.callLLM(request);
177
- const hashtagResponse = JSON.parse(response.answer) as { hashtags: string[] };
178
- return hashtagResponse.hashtags || [];
179
- } catch (error) {
180
- this.log(`Failed to generate hashtags: ${error}`, 'error');
181
- // Fallback to empty array
182
- return [];
183
- }
184
- }
185
-
186
- /**
187
- * Check if a goal has been completed based on actions taken and current page state
188
- */
189
- async checkGoalCompletion(
190
- goalDescription: string,
191
- completedActions: string[],
192
- pageInfo: any,
193
- model: string = DEFAULT_MODEL
194
- ): Promise<GoalCompletionResponse> {
195
- this.log('Checking goal completion with LLM...');
196
-
197
- const request: LLMRequest = {
198
- model,
199
- systemPrompt: PROMPTS.GOAL_COMPLETION_CHECK.SYSTEM,
200
- userPrompt: PROMPTS.GOAL_COMPLETION_CHECK.USER(goalDescription, completedActions, pageInfo)
201
- };
202
-
203
- try {
204
- const response = await this.callLLM(request);
205
- return JSON.parse(response.answer) as GoalCompletionResponse;
206
- } catch (error) {
207
- this.log(`Failed to check goal completion: ${error}`, 'error');
208
- // Conservative fallback - assume not complete if we can't determine
209
- return {
210
- isComplete: false,
211
- reason: 'Error checking completion status'
212
- };
213
- }
214
- }
215
-
216
- /**
217
- * Check goal completion with visual verification (uses vision model)
218
- */
219
- async checkGoalCompletionWithVision(
220
- goalDescription: string,
221
- completedActions: string[],
222
- pageInfo: any,
223
- imageDataUrl: string,
224
- model: string = VISION_MODEL
225
- ): Promise<GoalCompletionResponse> {
226
- this.log(`👔 Checking goal completion with vision (${model})...`);
227
-
228
- const request: LLMRequest = {
229
- model,
230
- systemPrompt: `You are checking if a test automation goal has been completed by analyzing both DOM state and visual appearance.
231
-
232
- CRITICAL: For action goals (login, submit, click, navigate), check if the PRIMARY ACTION and its SIDE EFFECTS are complete:
233
- - "Login" = Fill fields AND click button AND verify navigation/page change
234
- - "Submit form" = Fill fields AND click submit AND verify submission (success message/page change)
235
- - "Click X" = Click X AND verify expected page change or UI update
236
-
237
- For verification goals (verify, check, confirm), verify the VISUAL PRESENCE of expected elements.`,
238
- userPrompt: `GOAL: ${goalDescription}
239
-
240
- ACTIONS COMPLETED:
241
- ${completedActions.map((action, i) => `${i + 1}. ${action}`).join('\n')}
242
-
243
- CURRENT PAGE STATE:
244
- URL: ${pageInfo.url}
245
- Title: ${pageInfo.title}
246
- Interactive Elements:
247
- ${pageInfo.formattedElements}
248
-
249
- Based on the screenshot AND page state, is this goal COMPLETE?
250
-
251
- Respond ONLY with valid JSON:
252
- {
253
- "isComplete": true/false,
254
- "reason": "Brief explanation based on what you SEE in the screenshot and DOM",
255
- "nextSubGoal": "If incomplete, what specific next action is needed?"
256
- }`,
257
- imageUrl: imageDataUrl
258
- };
259
-
260
- try {
261
- const response = await this.callLLM(request);
262
- const parsed = JSON.parse(response.answer) as GoalCompletionResponse;
263
- this.log(`👔 Vision goal check result: ${parsed.isComplete ? 'COMPLETE ✅' : 'INCOMPLETE ❌'}`);
264
- return parsed;
265
- } catch (error) {
266
- this.log(`Failed to check goal completion with vision: ${error}`, 'error');
267
- // Conservative fallback - assume not complete if we can't determine
268
- return {
269
- isComplete: false,
270
- reason: 'Error checking completion status with vision'
271
- };
272
- }
273
- }
274
-
275
- /**
276
- * Ask LLM if a screenshot would help debug the current failure
277
- */
278
- async assessScreenshotNeed(
279
- stepDescription: string,
280
- errorMessage: string,
281
- attemptCount: number,
282
- pageInfo: any,
283
- model: string = DEFAULT_SIMPLER_MODEL
284
- ): Promise<ScreenshotNeedResponse> {
285
- this.log('Assessing screenshot need with LLM...');
286
-
287
- const request: LLMRequest = {
288
- model,
289
- systemPrompt: PROMPTS.SCREENSHOT_NEED_ASSESSMENT.SYSTEM,
290
- userPrompt: PROMPTS.SCREENSHOT_NEED_ASSESSMENT.USER(stepDescription, errorMessage, attemptCount, pageInfo)
291
- };
292
-
293
- try {
294
- const response = await this.callLLM(request);
295
- return JSON.parse(response.answer) as ScreenshotNeedResponse;
296
- } catch (error) {
297
- this.log(`Failed to assess screenshot need: ${error}`, 'error');
298
- // Conservative fallback - don't use expensive screenshot unless we're sure
299
- return {
300
- needsScreenshot: false,
301
- reason: 'Error assessing need, defaulting to no screenshot'
302
- };
303
- }
304
- }
305
-
306
- /**
307
- * Get diagnostic analysis from screenshot (supervisor role)
308
- */
309
- async getVisionDiagnostics(
310
- stepDescription: string,
311
- pageInfo: any,
312
- previousSteps: any[],
313
- lastError: string | undefined,
314
- imageDataUrl: string,
315
- model: string = VISION_MODEL
316
- ): Promise<VisionDiagnosticResponse> {
317
- this.log('👔 SUPERVISOR: Analyzing screenshot for diagnostic insights...');
318
-
319
- const previousCommands = previousSteps
320
- .map(s => s.playwrightCommand)
321
- .filter(Boolean)
322
- .join('\n');
323
-
324
- const attemptHistory = previousSteps.length > 0
325
- ? `Previous attempts context: ${previousSteps.length} commands executed`
326
- : '';
327
-
328
- const errorContext = lastError
329
- ? `Last Error: ${lastError}`
330
- : '';
331
-
332
- const request: LLMRequest = {
333
- model,
334
- systemPrompt: PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.SYSTEM,
335
- userPrompt: PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.USER(
336
- stepDescription,
337
- pageInfo,
338
- previousCommands,
339
- attemptHistory,
340
- errorContext
341
- ),
342
- imageUrl: imageDataUrl
343
- };
344
-
345
- try {
346
- const response = await this.callLLM(request);
347
- const diagnostics = JSON.parse(response.answer) as VisionDiagnosticResponse;
348
-
349
- // Log supervisor's findings
350
- this.log(`👔 SUPERVISOR ANALYSIS:`);
351
- this.log(` 📸 Visual: ${diagnostics.visualAnalysis}`);
352
- this.log(` 🔍 Root cause: ${diagnostics.rootCause}`);
353
- this.log(` 📋 Instructions: ${diagnostics.specificInstructions}`);
354
- this.log(` 💡 Approach: ${diagnostics.recommendedApproach}`);
355
- if (diagnostics.elementsFound?.length > 0) {
356
- this.log(` ✅ Elements found: ${diagnostics.elementsFound.join(', ')}`);
357
- }
358
- if (diagnostics.elementsNotFound?.length > 0) {
359
- this.log(` ❌ Elements NOT found: ${diagnostics.elementsNotFound.join(', ')}`);
360
- }
361
-
362
- return diagnostics;
363
- } catch (error) {
364
- this.log(`Failed to get vision diagnostics: ${error}`, 'error');
365
- throw new Error(`Vision diagnostic analysis failed: ${error}`);
366
- }
367
- }
368
-
369
- /**
370
- * Generate command based on supervisor's instructions
371
- */
372
- async generateCommandFromSupervisorInstructions(
373
- stepDescription: string,
374
- supervisorDiagnostics: VisionDiagnosticResponse,
375
- pageInfo: any,
376
- model: string = DEFAULT_MODEL
377
- ): Promise<string> {
378
- this.log('🔨 WORKER: Generating command based on supervisor instructions...');
379
-
380
- const request: LLMRequest = {
381
- model,
382
- systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.SYSTEM,
383
- userPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.USER(
384
- stepDescription,
385
- supervisorDiagnostics.specificInstructions,
386
- supervisorDiagnostics.visualAnalysis,
387
- supervisorDiagnostics.elementsFound || [],
388
- supervisorDiagnostics.elementsNotFound || [],
389
- pageInfo
390
- )
391
- };
392
-
393
- try {
394
- const response = await this.callLLM(request);
395
- const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse;
396
-
397
- if (commandResponse.reasoning) {
398
- this.log(`🔨 WORKER reasoning: ${commandResponse.reasoning}`);
399
- }
400
-
401
- return commandResponse.command;
402
- } catch (error) {
403
- this.log(`Failed to generate command from supervisor instructions: ${error}`, 'error');
404
- throw new Error(`Command generation from supervisor instructions failed: ${error}`);
405
- }
406
- }
407
-
408
- /**
409
- * Generate Playwright command with vision (uses vision model)
410
- */
411
- async generatePlaywrightCommandWithVision(
412
- stepDescription: string,
413
- pageInfo: any,
414
- previousSteps: any[],
415
- lastError: string | undefined,
416
- imageDataUrl: string, // Full data URL: data:image/png;base64,...
417
- model: string = VISION_MODEL
418
- ): Promise<string> {
419
- this.log(`⚠️ USING VISION MODE (${model})...`);
420
-
421
- const previousCommands = previousSteps
422
- .map(s => s.playwrightCommand)
423
- .filter(Boolean)
424
- .join('\n');
425
-
426
- const attemptHistory = previousSteps.length > 0
427
- ? `Previous attempts context: ${previousSteps.length} commands executed`
428
- : '';
429
-
430
- const errorContext = lastError
431
- ? `Last Error: ${lastError}`
432
- : '';
433
-
434
- const request: LLMRequest = {
435
- model,
436
- systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.SYSTEM,
437
- userPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.USER(
438
- stepDescription,
439
- pageInfo,
440
- previousCommands,
441
- attemptHistory,
442
- errorContext
443
- ),
444
- imageUrl: imageDataUrl // Full data URL constructed by client
445
- };
446
-
447
- try {
448
- const response = await this.callLLM(request);
449
- const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse & {
450
- visualInsights?: string;
451
- failureRootCause?: string;
452
- recommendedAlternative?: string;
453
- };
454
-
455
- // Log diagnostic insights from vision analysis
456
- if (commandResponse.visualInsights) {
457
- this.log(`📸 Visual insights: ${commandResponse.visualInsights}`);
458
- }
459
-
460
- if (commandResponse.failureRootCause) {
461
- this.log(`🔍 Root cause analysis: ${commandResponse.failureRootCause}`);
462
- }
463
-
464
- if (commandResponse.recommendedAlternative) {
465
- this.log(`💡 Recommended alternative: ${commandResponse.recommendedAlternative}`);
466
- }
467
-
468
- if (commandResponse.reasoning) {
469
- this.log(`🧠 Vision-based reasoning: ${commandResponse.reasoning}`);
470
- }
471
-
472
- return commandResponse.command;
473
- } catch (error) {
474
- this.log(`Failed to generate command with vision: ${error}`, 'error');
475
- throw new Error(`Vision-enhanced command generation failed: ${error}`);
476
- }
477
- }
478
-
479
- /**
480
- * Break down scenario into steps
481
- */
482
- async breakdownScenario(scenario: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<ScenarioStep[]> {
483
- this.log('Breaking down scenario with LLM...');
484
- this.log(`📝 INPUT SCENARIO: ${scenario}`);
485
-
486
- const request: LLMRequest = {
487
- model,
488
- systemPrompt: PROMPTS.SCENARIO_BREAKDOWN.SYSTEM,
489
- userPrompt: PROMPTS.SCENARIO_BREAKDOWN.USER(scenario)
490
- };
491
-
492
- try {
493
- const response = await this.callLLM(request);
494
- this.log(`🤖 RAW LLM RESPONSE: ${response.answer}`);
495
-
496
- const breakdownResponse = JSON.parse(response.answer) as LLMScenarioBreakdownResponse;
497
- this.log(`📋 PARSED BREAKDOWN: ${JSON.stringify(breakdownResponse, null, 2)}`);
498
-
499
- // Validate and clean up steps
500
- const cleanedSteps = breakdownResponse.steps
501
- .map(step => step.trim())
502
- .filter(step => step.length > 0)
503
- .slice(0, 10); // Limit to 10 steps max
504
-
505
- this.log(`✅ CLEANED STEPS: ${JSON.stringify(cleanedSteps, null, 2)}`);
506
-
507
- return cleanedSteps.map((desc, index) => ({
508
- stepNumber: index + 1,
509
- description: desc,
510
- }));
511
- } catch (error) {
512
- this.log(`❌ Failed to breakdown scenario: ${error}`, 'error');
513
- // Fallback to simple breakdown by newlines (preserves URLs)
514
- const stepDescriptions = scenario.split('\n').map(s => s.trim()).filter(s => s.length > 0);
515
- this.log(`🔄 FALLBACK STEPS: ${JSON.stringify(stepDescriptions, null, 2)}`);
516
- return stepDescriptions.map((desc, index) => ({
517
- stepNumber: index + 1,
518
- description: desc,
519
- }));
520
- }
521
- }
522
-
523
- /**
524
- * Generate Playwright command for a step
525
- */
526
- async generatePlaywrightCommand(
527
- stepDescription: string,
528
- pageInfo: PageInfo,
529
- previousSteps: ScenarioStep[],
530
- lastError?: string,
531
- currentStep?: ScenarioStep,
532
- model: string = DEFAULT_MODEL
533
- ): Promise<string | null> {
534
- this.log('Generating Playwright command with LLM...');
535
-
536
- const previousCommands = previousSteps
537
- .filter(s => s.playwrightCommand && s.success)
538
- .map(s => `// Step ${s.stepNumber}: ${s.description}\n${s.playwrightCommand}`)
539
- .join('\n');
540
-
541
- // Build comprehensive attempt history for current step
542
- const attemptHistory = this.buildAttemptHistory(currentStep);
543
-
544
- // Provide raw error context for LLM analysis
545
- const errorContext = this.buildErrorContext(lastError, currentStep);
546
-
547
- const prompt = PROMPTS.PLAYWRIGHT_COMMAND.USER(
548
- stepDescription,
549
- pageInfo,
550
- previousCommands,
551
- attemptHistory,
552
- errorContext
553
- );
554
-
555
- const request: LLMRequest = {
556
- model,
557
- systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND.SYSTEM,
558
- userPrompt: prompt
559
- };
560
-
561
- try {
562
- const response = await this.callLLM(request);
563
- const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse;
564
- return commandResponse.command;
565
- } catch (error) {
566
- this.log(`Failed to generate Playwright command: ${error}`, 'error');
567
- return null;
568
- }
569
- }
570
-
571
- /**
572
- * Parse script into steps for AI repair
573
- */
574
- async parseScriptIntoSteps(script: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<Array<{ description: string; code: string; success?: boolean; error?: string }>> {
575
- this.log('Parsing script into steps with LLM...');
576
-
577
- const request: LLMRequest = {
578
- model,
579
- systemPrompt: PROMPTS.SCRIPT_PARSING.SYSTEM,
580
- userPrompt: PROMPTS.SCRIPT_PARSING.USER(script)
581
- };
582
-
583
- try {
584
- const response = await this.callLLM(request);
585
- this.log(`Raw LLM parsing response (first 500 chars): ${response.answer.substring(0, 500)}`);
586
-
587
- const parsed = JSON.parse(response.answer);
588
- this.log(`Parsed JSON structure: ${JSON.stringify(parsed, null, 2).substring(0, 1000)}`);
589
-
590
- // Expect JSON object with steps array
591
- if (parsed.steps && Array.isArray(parsed.steps)) {
592
- this.log(`LLM parsing successful, got ${parsed.steps.length} steps`);
593
- return parsed.steps;
594
- } else {
595
- this.log(`Unexpected LLM response format - expected {steps: [...]}: ${JSON.stringify(parsed)}`, 'error');
596
- return [];
597
- }
598
- } catch (error) {
599
- this.log(`Failed to parse LLM response as JSON: ${error}`, 'error');
600
- return [];
601
- }
602
- }
603
-
604
- /**
605
- * Get repair suggestion for a failing step
606
- */
607
- async getRepairSuggestion(
608
- stepDescription: string,
609
- stepCode: string,
610
- errorMessage: string,
611
- pageInfo: PageInfo,
612
- failureHistory: string,
613
- recentRepairs: string,
614
- model: string = DEFAULT_MODEL
615
- ): Promise<RepairSuggestionResponse> {
616
- const request: LLMRequest = {
617
- model,
618
- systemPrompt: PROMPTS.REPAIR_SUGGESTION.SYSTEM,
619
- userPrompt: PROMPTS.REPAIR_SUGGESTION.USER(
620
- stepDescription,
621
- stepCode,
622
- errorMessage,
623
- pageInfo,
624
- failureHistory,
625
- recentRepairs,
626
- )
627
- };
628
-
629
- const response = await this.callLLM(request);
630
- this.log(`🤖 LLM Repair Response: ${response.answer}`);
631
- const parsed = JSON.parse(response.answer) as any;
632
- this.log(`🤖 Parsed Repair Action: ${JSON.stringify(parsed)}`);
633
-
634
- // Convert string operation to enum
635
- if (parsed.action && parsed.action.operation) {
636
- switch (parsed.action.operation) {
637
- case 'MODIFY':
638
- parsed.action.operation = StepOperation.MODIFY;
639
- break;
640
- case 'INSERT':
641
- parsed.action.operation = StepOperation.INSERT;
642
- break;
643
- case 'REMOVE':
644
- parsed.action.operation = StepOperation.REMOVE;
645
- break;
646
- default:
647
- parsed.action.operation = StepOperation.MODIFY;
648
- }
649
- }
650
-
651
- return parsed as RepairSuggestionResponse;
652
- }
653
-
654
- /**
655
- * Assess repair confidence and generate advice
656
- */
657
- async assessRepairConfidence(
658
- originalScript: string,
659
- updatedScript: string,
660
- model: string = DEFAULT_SIMPLER_MODEL
661
- ): Promise<RepairConfidenceResponse> {
662
- const request: LLMRequest = {
663
- model,
664
- systemPrompt: PROMPTS.REPAIR_CONFIDENCE.SYSTEM,
665
- userPrompt: PROMPTS.REPAIR_CONFIDENCE.USER(originalScript, updatedScript)
666
- };
667
-
668
- const response = await this.callLLM(request);
669
- return JSON.parse(response.answer) as RepairConfidenceResponse;
670
- }
671
-
672
- /**
673
- * Generate final script with repair advice
674
- */
675
- async generateFinalScript(
676
- originalScript: string,
677
- updatedScript: string,
678
- newRepairAdvice: string,
679
- model: string = DEFAULT_SIMPLER_MODEL
680
- ): Promise<string> {
681
- const request: LLMRequest = {
682
- model,
683
- systemPrompt: PROMPTS.FINAL_SCRIPT.SYSTEM,
684
- userPrompt: PROMPTS.FINAL_SCRIPT.USER(originalScript, updatedScript, newRepairAdvice)
685
- };
686
-
687
- const response = await this.callLLM(request);
688
- try {
689
- const parsed = JSON.parse(response.answer);
690
- return parsed.script || updatedScript;
691
- } catch (error) {
692
- this.log(`Failed to parse final script response: ${error}`, 'error');
693
- return updatedScript;
694
- }
695
- }
696
-
697
- /**
698
- * Build attempt history for current step
699
- */
700
- private buildAttemptHistory(currentStep?: ScenarioStep): string {
701
- if (!currentStep || !currentStep.attempts || currentStep.attempts.length === 0) {
702
- return 'This is the first attempt for this step.';
703
- }
704
-
705
- const attempts = currentStep.attempts.map((attempt, index) => {
706
- const status = attempt.success ? '✅ SUCCESS' : '❌ FAILED';
707
- return `Attempt ${attempt.attemptNumber} (${status}):
708
- Command: ${attempt.command || 'No command generated'}
709
- ${attempt.error ? `Error: ${attempt.error}` : 'No error'}
710
- Timestamp: ${new Date(attempt.timestamp).toISOString()}`;
711
- }).join('\n\n');
712
-
713
- return `Current step attempt history:
714
- ${attempts}
715
-
716
- LEARNING FROM FAILURES:
717
- - Analyze what went wrong in each attempt
718
- - Try completely different approaches for failed attempts
719
- - If a selector failed, try alternative selectors
720
- - If timing failed, add proper waits
721
- - If element not found, try different strategies`;
722
- }
723
-
724
- /**
725
- * Build error context for LLM analysis
726
- */
727
- private buildErrorContext(lastError?: string, currentStep?: ScenarioStep): string {
728
- if (!lastError && (!currentStep || !currentStep.error)) {
729
- return '';
730
- }
731
-
732
- const errors = [];
733
- if (lastError) errors.push(lastError);
734
- if (currentStep?.error) errors.push(currentStep.error);
735
-
736
- const errorText = errors.join(' | ');
737
-
738
- // Detect if we're repeatedly looking for elements that don't exist
739
- const attemptedCommands = currentStep?.attempts
740
- ?.map(a => a.command)
741
- .filter(Boolean) || [];
742
-
743
- const lookingForNonExistent = attemptedCommands.some(cmd =>
744
- cmd?.includes('getByText') ||
745
- cmd?.includes('toBeVisible') ||
746
- cmd?.includes('waitFor')
747
- ) && errors.some(err =>
748
- err.includes('not found') ||
749
- err.includes('Timeout') ||
750
- err.includes('Expected: visible')
751
- );
752
-
753
- let hallucinationWarning = '';
754
- if (lookingForNonExistent && attemptedCommands.length >= 2) {
755
- hallucinationWarning = `
756
- ⚠️ HALLUCINATION ALERT:
757
- You've made ${attemptedCommands.length} attempts trying to find/verify elements that don't exist.
758
- STOP looking for these elements. They are NOT in the DOM.
759
- Instead:
760
- - Check if the goal is ALREADY COMPLETE (action succeeded = goal done)
761
- - Use alternative verification (state changes, network, page load)
762
- - Move on if the primary action succeeded
763
- `;
764
- }
765
-
766
- return `ERROR CONTEXT:
767
- Last Error: ${errorText}
768
- ${hallucinationWarning}
769
- ANALYZE THE ERROR AND ADAPT:
770
- - Study the error message to understand what went wrong
771
- - If element "not found" after 2+ attempts, it probably doesn't exist - stop looking for it
772
- - Try a completely different approach than what failed
773
- - Consider alternative selectors, timing, or interaction methods
774
- - Never repeat the exact same command that failed`;
775
- }
776
-
777
- /**
778
- * Cleanup generated script - remove redundancies and make minor adjustments
779
- */
780
- async cleanupScript(script: string, model?: string): Promise<{ script: string; changes: string[]; skipped?: string }> {
781
- try {
782
- const response = await this.llmProvider.callLLM({
783
- model: model || DEFAULT_MODEL,
784
- systemPrompt: PROMPTS.SCRIPT_CLEANUP.SYSTEM,
785
- userPrompt: PROMPTS.SCRIPT_CLEANUP.USER(script)
786
- });
787
-
788
- // Parse JSON response
789
- const jsonMatch = response.answer.match(/\{[\s\S]*\}/);
790
- if (!jsonMatch) {
791
- console.log('[LLMFacade] Cleanup response not in JSON format, returning original script');
792
- return { script, changes: [], skipped: 'Response not in JSON format' };
793
- }
794
-
795
- const parsed = JSON.parse(jsonMatch[0]);
796
-
797
- // Validate response
798
- if (!parsed.script) {
799
- console.log('[LLMFacade] Cleanup response missing script field, returning original');
800
- return { script, changes: [], skipped: 'Invalid response format' };
801
- }
802
-
803
- console.log(`[LLMFacade] Script cleanup completed. Changes: ${parsed.changes?.length || 0}`);
804
- if (parsed.changes && parsed.changes.length > 0) {
805
- parsed.changes.forEach((change: string, i: number) => {
806
- console.log(`[LLMFacade] ${i + 1}. ${change}`);
807
- });
808
- }
809
-
810
- return {
811
- script: parsed.script,
812
- changes: parsed.changes || [],
813
- skipped: parsed.skipped
814
- };
815
- } catch (error: any) {
816
- console.error('[LLMFacade] Script cleanup failed:', error.message);
817
- // Return original script on error
818
- return { script, changes: [], skipped: `Error: ${error.message}` };
819
- }
820
- }
821
- }