testchimp-runner-core 0.0.34 → 0.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/execution-service.d.ts +1 -4
  2. package/dist/execution-service.d.ts.map +1 -1
  3. package/dist/execution-service.js +155 -468
  4. package/dist/execution-service.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +11 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/orchestrator/decision-parser.d.ts +18 -0
  10. package/dist/orchestrator/decision-parser.d.ts.map +1 -0
  11. package/dist/orchestrator/decision-parser.js +127 -0
  12. package/dist/orchestrator/decision-parser.js.map +1 -0
  13. package/dist/orchestrator/index.d.ts +4 -2
  14. package/dist/orchestrator/index.d.ts.map +1 -1
  15. package/dist/orchestrator/index.js +14 -2
  16. package/dist/orchestrator/index.js.map +1 -1
  17. package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
  18. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  19. package/dist/orchestrator/orchestrator-agent.js +534 -204
  20. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  21. package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
  22. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  23. package/dist/orchestrator/orchestrator-prompts.js +529 -247
  24. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  25. package/dist/orchestrator/page-som-handler.d.ts +106 -0
  26. package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
  27. package/dist/orchestrator/page-som-handler.js +1353 -0
  28. package/dist/orchestrator/page-som-handler.js.map +1 -0
  29. package/dist/orchestrator/som-types.d.ts +149 -0
  30. package/dist/orchestrator/som-types.d.ts.map +1 -0
  31. package/dist/orchestrator/som-types.js +87 -0
  32. package/dist/orchestrator/som-types.js.map +1 -0
  33. package/dist/orchestrator/tool-registry.d.ts +2 -0
  34. package/dist/orchestrator/tool-registry.d.ts.map +1 -1
  35. package/dist/orchestrator/tool-registry.js.map +1 -1
  36. package/dist/orchestrator/tools/index.d.ts +4 -1
  37. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  38. package/dist/orchestrator/tools/index.js +7 -2
  39. package/dist/orchestrator/tools/index.js.map +1 -1
  40. package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
  41. package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
  42. package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
  43. package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
  44. package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
  45. package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
  46. package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
  47. package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
  48. package/dist/orchestrator/types.d.ts +23 -1
  49. package/dist/orchestrator/types.d.ts.map +1 -1
  50. package/dist/orchestrator/types.js +11 -1
  51. package/dist/orchestrator/types.js.map +1 -1
  52. package/dist/scenario-service.d.ts +5 -0
  53. package/dist/scenario-service.d.ts.map +1 -1
  54. package/dist/scenario-service.js +17 -0
  55. package/dist/scenario-service.js.map +1 -1
  56. package/dist/scenario-worker-class.d.ts +4 -0
  57. package/dist/scenario-worker-class.d.ts.map +1 -1
  58. package/dist/scenario-worker-class.js +18 -3
  59. package/dist/scenario-worker-class.js.map +1 -1
  60. package/dist/testing/agent-tester.d.ts +35 -0
  61. package/dist/testing/agent-tester.d.ts.map +1 -0
  62. package/dist/testing/agent-tester.js +84 -0
  63. package/dist/testing/agent-tester.js.map +1 -0
  64. package/dist/testing/ref-translator-tester.d.ts +44 -0
  65. package/dist/testing/ref-translator-tester.d.ts.map +1 -0
  66. package/dist/testing/ref-translator-tester.js +104 -0
  67. package/dist/testing/ref-translator-tester.js.map +1 -0
  68. package/dist/utils/hierarchical-selector.d.ts +47 -0
  69. package/dist/utils/hierarchical-selector.d.ts.map +1 -0
  70. package/dist/utils/hierarchical-selector.js +212 -0
  71. package/dist/utils/hierarchical-selector.js.map +1 -0
  72. package/dist/utils/page-info-retry.d.ts +14 -0
  73. package/dist/utils/page-info-retry.d.ts.map +1 -0
  74. package/dist/utils/page-info-retry.js +60 -0
  75. package/dist/utils/page-info-retry.js.map +1 -0
  76. package/dist/utils/page-info-utils.d.ts +1 -0
  77. package/dist/utils/page-info-utils.d.ts.map +1 -1
  78. package/dist/utils/page-info-utils.js +46 -18
  79. package/dist/utils/page-info-utils.js.map +1 -1
  80. package/dist/utils/ref-attacher.d.ts +21 -0
  81. package/dist/utils/ref-attacher.d.ts.map +1 -0
  82. package/dist/utils/ref-attacher.js +149 -0
  83. package/dist/utils/ref-attacher.js.map +1 -0
  84. package/dist/utils/ref-translator.d.ts +49 -0
  85. package/dist/utils/ref-translator.d.ts.map +1 -0
  86. package/dist/utils/ref-translator.js +276 -0
  87. package/dist/utils/ref-translator.js.map +1 -0
  88. package/package.json +6 -1
  89. package/RELEASE_0.0.26.md +0 -165
  90. package/RELEASE_0.0.27.md +0 -236
  91. package/RELEASE_0.0.28.md +0 -286
  92. package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
  93. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
  94. package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
  95. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
  96. package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
  97. package/plandocs/INTEGRATION_COMPLETE.md +0 -322
  98. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
  99. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
  100. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
  101. package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
  102. package/plandocs/PHASE_1_COMPLETE.md +0 -165
  103. package/plandocs/PHASE_1_SUMMARY.md +0 -184
  104. package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
  105. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
  106. package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
  107. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
  108. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
  109. package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
  110. package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
  111. package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
  112. package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
  113. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
  114. package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
  115. package/src/auth-config.ts +0 -84
  116. package/src/credit-usage-service.ts +0 -188
  117. package/src/env-loader.ts +0 -103
  118. package/src/execution-service.ts +0 -1413
  119. package/src/file-handler.ts +0 -104
  120. package/src/index.ts +0 -422
  121. package/src/llm-facade.ts +0 -821
  122. package/src/llm-provider.ts +0 -53
  123. package/src/model-constants.ts +0 -35
  124. package/src/orchestrator/index.ts +0 -34
  125. package/src/orchestrator/orchestrator-agent.ts +0 -862
  126. package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
  127. package/src/orchestrator/orchestrator-prompts.ts +0 -474
  128. package/src/orchestrator/tool-registry.ts +0 -182
  129. package/src/orchestrator/tools/check-page-ready.ts +0 -75
  130. package/src/orchestrator/tools/extract-data.ts +0 -92
  131. package/src/orchestrator/tools/index.ts +0 -12
  132. package/src/orchestrator/tools/inspect-page.ts +0 -42
  133. package/src/orchestrator/tools/recall-history.ts +0 -72
  134. package/src/orchestrator/tools/take-screenshot.ts +0 -128
  135. package/src/orchestrator/tools/verify-action-result.ts +0 -159
  136. package/src/orchestrator/types.ts +0 -248
  137. package/src/playwright-mcp-service.ts +0 -224
  138. package/src/progress-reporter.ts +0 -144
  139. package/src/prompts.ts +0 -842
  140. package/src/providers/backend-proxy-llm-provider.ts +0 -91
  141. package/src/providers/local-llm-provider.ts +0 -38
  142. package/src/scenario-service.ts +0 -232
  143. package/src/scenario-worker-class.ts +0 -1089
  144. package/src/script-utils.ts +0 -203
  145. package/src/types.ts +0 -239
  146. package/src/utils/browser-utils.ts +0 -348
  147. package/src/utils/coordinate-converter.ts +0 -162
  148. package/src/utils/page-info-utils.ts +0 -250
  149. package/testchimp-runner-core-0.0.33.tgz +0 -0
  150. package/tsconfig.json +0 -19
package/src/llm-facade.ts DELETED
@@ -1,821 +0,0 @@
1
- import { PROMPTS } from './prompts';
2
- import { PageInfo } from './utils/page-info-utils';
3
- import { StepOperation } from './types';
4
- import { DEFAULT_MODEL, DEFAULT_SIMPLER_MODEL, VISION_MODEL } from './model-constants';
5
- import { LLMProvider, LLMRequest, LLMResponse } from './llm-provider';
6
-
7
- // LLM Response interfaces
8
- export interface LLMScenarioBreakdownResponse {
9
- steps: string[];
10
- }
11
-
12
- export interface LLMPlaywrightCommandResponse {
13
- command: string;
14
- reasoning?: string;
15
- }
16
-
17
- export interface LLMTestNameResponse {
18
- testName: string;
19
- }
20
-
21
- export interface RepairSuggestionResponse {
22
- shouldContinue: boolean;
23
- reason: string;
24
- action: {
25
- operation: StepOperation;
26
- stepIndex?: number;
27
- newStep?: {
28
- description: string;
29
- code: string;
30
- };
31
- insertAfterIndex?: number;
32
- };
33
- }
34
-
35
- export interface RepairConfidenceResponse {
36
- confidence: number;
37
- advice: string;
38
- }
39
-
40
- export interface GoalCompletionResponse {
41
- isComplete: boolean;
42
- reason: string;
43
- nextSubGoal?: string;
44
- }
45
-
46
- export interface ScreenshotNeedResponse {
47
- needsScreenshot: boolean;
48
- reason: string;
49
- alternativeApproach?: string;
50
- }
51
-
52
- export interface VisionDiagnosticResponse {
53
- visualAnalysis: string; // What the supervisor sees in the screenshot
54
- rootCause: string; // Why previous attempts failed
55
- specificInstructions: string; // Exact instructions for the worker agent
56
- recommendedApproach: string; // What strategy to use (selector-based, state-based, etc.)
57
- elementsFound: string[]; // What elements are actually visible
58
- elementsNotFound: string[]; // What elements were expected but not visible
59
- }
60
-
61
- export interface ScenarioStep {
62
- stepNumber: number;
63
- description: string;
64
- playwrightCommand?: string;
65
- success?: boolean;
66
- error?: string;
67
- retryCount?: number;
68
- attempts?: Array<{
69
- attemptNumber: number;
70
- command?: string;
71
- success: boolean;
72
- error?: string;
73
- timestamp: number;
74
- }>;
75
- }
76
-
77
- export class LLMFacade {
78
- public llmProvider: LLMProvider; // Expose for orchestrator direct access
79
- private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
80
- private tokenUsageCallback?: (inputTokens: number, outputTokens: number, includesImage: boolean) => void;
81
-
82
- constructor(llmProvider: LLMProvider) {
83
- this.llmProvider = llmProvider;
84
- this.log('LLMFacade initialized with pluggable LLM provider');
85
- }
86
-
87
- /**
88
- * Set token usage callback for tracking
89
- */
90
- setTokenUsageCallback(callback: (inputTokens: number, outputTokens: number, includesImage: boolean) => void): void {
91
- this.tokenUsageCallback = callback;
92
- }
93
-
94
- /**
95
- * Set a logger callback for capturing execution logs
96
- */
97
- setLogger(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void {
98
- this.logger = logger;
99
- this.llmProvider.setLogger?.(logger);
100
- }
101
-
102
- /**
103
- * Log a message using the configured logger
104
- */
105
- private log(message: string, level: 'log' | 'error' | 'warn' = 'log'): void {
106
- if (this.logger) {
107
- this.logger(message, level);
108
- }
109
- // Console fallback for debug visibility
110
- if (level === 'error') {
111
- console.error(message);
112
- } else if (level === 'warn') {
113
- console.warn(message);
114
- } else {
115
- console.log(message);
116
- }
117
- }
118
-
119
- private async callLLM(request: LLMRequest): Promise<LLMResponse> {
120
- try {
121
- const response = await this.llmProvider.callLLM(request);
122
-
123
- // Report token usage if callback is set
124
- if (response.usage && this.tokenUsageCallback) {
125
- this.tokenUsageCallback(
126
- response.usage.inputTokens,
127
- response.usage.outputTokens,
128
- !!request.imageUrl
129
- );
130
- }
131
-
132
- return response;
133
- } catch (error: any) {
134
- // Let provider handle its own error messages, just re-throw
135
- this.log(`LLM call failed: ${error}`, 'error');
136
- throw error;
137
- }
138
- }
139
-
140
- /**
141
- * Generate a test name from scenario description
142
- */
143
- async generateTestName(scenario: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<string> {
144
- this.log('Generating test name with LLM...');
145
-
146
- const request: LLMRequest = {
147
- model,
148
- systemPrompt: PROMPTS.TEST_NAME_GENERATION.SYSTEM,
149
- userPrompt: PROMPTS.TEST_NAME_GENERATION.USER(scenario)
150
- };
151
-
152
- try {
153
- const response = await this.callLLM(request);
154
- const testNameResponse = JSON.parse(response.answer) as LLMTestNameResponse;
155
- return testNameResponse.testName;
156
- } catch (error) {
157
- this.log(`Failed to generate test name: ${error}`, 'error');
158
- // Fallback to a simple generated name
159
- return `Test: ${scenario.substring(0, 50)}...`;
160
- }
161
- }
162
-
163
- /**
164
- * Generate hashtags for semantic grouping
165
- */
166
- async generateHashtags(scenario: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<string[]> {
167
- this.log('Generating hashtags with LLM...');
168
-
169
- const request: LLMRequest = {
170
- model,
171
- systemPrompt: PROMPTS.HASHTAG_GENERATION.SYSTEM,
172
- userPrompt: PROMPTS.HASHTAG_GENERATION.USER(scenario)
173
- };
174
-
175
- try {
176
- const response = await this.callLLM(request);
177
- const hashtagResponse = JSON.parse(response.answer) as { hashtags: string[] };
178
- return hashtagResponse.hashtags || [];
179
- } catch (error) {
180
- this.log(`Failed to generate hashtags: ${error}`, 'error');
181
- // Fallback to empty array
182
- return [];
183
- }
184
- }
185
-
186
- /**
187
- * Check if a goal has been completed based on actions taken and current page state
188
- */
189
- async checkGoalCompletion(
190
- goalDescription: string,
191
- completedActions: string[],
192
- pageInfo: any,
193
- model: string = DEFAULT_MODEL
194
- ): Promise<GoalCompletionResponse> {
195
- this.log('Checking goal completion with LLM...');
196
-
197
- const request: LLMRequest = {
198
- model,
199
- systemPrompt: PROMPTS.GOAL_COMPLETION_CHECK.SYSTEM,
200
- userPrompt: PROMPTS.GOAL_COMPLETION_CHECK.USER(goalDescription, completedActions, pageInfo)
201
- };
202
-
203
- try {
204
- const response = await this.callLLM(request);
205
- return JSON.parse(response.answer) as GoalCompletionResponse;
206
- } catch (error) {
207
- this.log(`Failed to check goal completion: ${error}`, 'error');
208
- // Conservative fallback - assume not complete if we can't determine
209
- return {
210
- isComplete: false,
211
- reason: 'Error checking completion status'
212
- };
213
- }
214
- }
215
-
216
- /**
217
- * Check goal completion with visual verification (uses vision model)
218
- */
219
- async checkGoalCompletionWithVision(
220
- goalDescription: string,
221
- completedActions: string[],
222
- pageInfo: any,
223
- imageDataUrl: string,
224
- model: string = VISION_MODEL
225
- ): Promise<GoalCompletionResponse> {
226
- this.log(`👔 Checking goal completion with vision (${model})...`);
227
-
228
- const request: LLMRequest = {
229
- model,
230
- systemPrompt: `You are checking if a test automation goal has been completed by analyzing both DOM state and visual appearance.
231
-
232
- CRITICAL: For action goals (login, submit, click, navigate), check if the PRIMARY ACTION and its SIDE EFFECTS are complete:
233
- - "Login" = Fill fields AND click button AND verify navigation/page change
234
- - "Submit form" = Fill fields AND click submit AND verify submission (success message/page change)
235
- - "Click X" = Click X AND verify expected page change or UI update
236
-
237
- For verification goals (verify, check, confirm), verify the VISUAL PRESENCE of expected elements.`,
238
- userPrompt: `GOAL: ${goalDescription}
239
-
240
- ACTIONS COMPLETED:
241
- ${completedActions.map((action, i) => `${i + 1}. ${action}`).join('\n')}
242
-
243
- CURRENT PAGE STATE:
244
- URL: ${pageInfo.url}
245
- Title: ${pageInfo.title}
246
- Interactive Elements:
247
- ${pageInfo.formattedElements}
248
-
249
- Based on the screenshot AND page state, is this goal COMPLETE?
250
-
251
- Respond ONLY with valid JSON:
252
- {
253
- "isComplete": true/false,
254
- "reason": "Brief explanation based on what you SEE in the screenshot and DOM",
255
- "nextSubGoal": "If incomplete, what specific next action is needed?"
256
- }`,
257
- imageUrl: imageDataUrl
258
- };
259
-
260
- try {
261
- const response = await this.callLLM(request);
262
- const parsed = JSON.parse(response.answer) as GoalCompletionResponse;
263
- this.log(`👔 Vision goal check result: ${parsed.isComplete ? 'COMPLETE ✅' : 'INCOMPLETE ❌'}`);
264
- return parsed;
265
- } catch (error) {
266
- this.log(`Failed to check goal completion with vision: ${error}`, 'error');
267
- // Conservative fallback - assume not complete if we can't determine
268
- return {
269
- isComplete: false,
270
- reason: 'Error checking completion status with vision'
271
- };
272
- }
273
- }
274
-
275
- /**
276
- * Ask LLM if a screenshot would help debug the current failure
277
- */
278
- async assessScreenshotNeed(
279
- stepDescription: string,
280
- errorMessage: string,
281
- attemptCount: number,
282
- pageInfo: any,
283
- model: string = DEFAULT_SIMPLER_MODEL
284
- ): Promise<ScreenshotNeedResponse> {
285
- this.log('Assessing screenshot need with LLM...');
286
-
287
- const request: LLMRequest = {
288
- model,
289
- systemPrompt: PROMPTS.SCREENSHOT_NEED_ASSESSMENT.SYSTEM,
290
- userPrompt: PROMPTS.SCREENSHOT_NEED_ASSESSMENT.USER(stepDescription, errorMessage, attemptCount, pageInfo)
291
- };
292
-
293
- try {
294
- const response = await this.callLLM(request);
295
- return JSON.parse(response.answer) as ScreenshotNeedResponse;
296
- } catch (error) {
297
- this.log(`Failed to assess screenshot need: ${error}`, 'error');
298
- // Conservative fallback - don't use expensive screenshot unless we're sure
299
- return {
300
- needsScreenshot: false,
301
- reason: 'Error assessing need, defaulting to no screenshot'
302
- };
303
- }
304
- }
305
-
306
- /**
307
- * Get diagnostic analysis from screenshot (supervisor role)
308
- */
309
- async getVisionDiagnostics(
310
- stepDescription: string,
311
- pageInfo: any,
312
- previousSteps: any[],
313
- lastError: string | undefined,
314
- imageDataUrl: string,
315
- model: string = VISION_MODEL
316
- ): Promise<VisionDiagnosticResponse> {
317
- this.log('👔 SUPERVISOR: Analyzing screenshot for diagnostic insights...');
318
-
319
- const previousCommands = previousSteps
320
- .map(s => s.playwrightCommand)
321
- .filter(Boolean)
322
- .join('\n');
323
-
324
- const attemptHistory = previousSteps.length > 0
325
- ? `Previous attempts context: ${previousSteps.length} commands executed`
326
- : '';
327
-
328
- const errorContext = lastError
329
- ? `Last Error: ${lastError}`
330
- : '';
331
-
332
- const request: LLMRequest = {
333
- model,
334
- systemPrompt: PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.SYSTEM,
335
- userPrompt: PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.USER(
336
- stepDescription,
337
- pageInfo,
338
- previousCommands,
339
- attemptHistory,
340
- errorContext
341
- ),
342
- imageUrl: imageDataUrl
343
- };
344
-
345
- try {
346
- const response = await this.callLLM(request);
347
- const diagnostics = JSON.parse(response.answer) as VisionDiagnosticResponse;
348
-
349
- // Log supervisor's findings
350
- this.log(`👔 SUPERVISOR ANALYSIS:`);
351
- this.log(` 📸 Visual: ${diagnostics.visualAnalysis}`);
352
- this.log(` 🔍 Root cause: ${diagnostics.rootCause}`);
353
- this.log(` 📋 Instructions: ${diagnostics.specificInstructions}`);
354
- this.log(` 💡 Approach: ${diagnostics.recommendedApproach}`);
355
- if (diagnostics.elementsFound?.length > 0) {
356
- this.log(` ✅ Elements found: ${diagnostics.elementsFound.join(', ')}`);
357
- }
358
- if (diagnostics.elementsNotFound?.length > 0) {
359
- this.log(` ❌ Elements NOT found: ${diagnostics.elementsNotFound.join(', ')}`);
360
- }
361
-
362
- return diagnostics;
363
- } catch (error) {
364
- this.log(`Failed to get vision diagnostics: ${error}`, 'error');
365
- throw new Error(`Vision diagnostic analysis failed: ${error}`);
366
- }
367
- }
368
-
369
- /**
370
- * Generate command based on supervisor's instructions
371
- */
372
- async generateCommandFromSupervisorInstructions(
373
- stepDescription: string,
374
- supervisorDiagnostics: VisionDiagnosticResponse,
375
- pageInfo: any,
376
- model: string = DEFAULT_MODEL
377
- ): Promise<string> {
378
- this.log('🔨 WORKER: Generating command based on supervisor instructions...');
379
-
380
- const request: LLMRequest = {
381
- model,
382
- systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.SYSTEM,
383
- userPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.USER(
384
- stepDescription,
385
- supervisorDiagnostics.specificInstructions,
386
- supervisorDiagnostics.visualAnalysis,
387
- supervisorDiagnostics.elementsFound || [],
388
- supervisorDiagnostics.elementsNotFound || [],
389
- pageInfo
390
- )
391
- };
392
-
393
- try {
394
- const response = await this.callLLM(request);
395
- const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse;
396
-
397
- if (commandResponse.reasoning) {
398
- this.log(`🔨 WORKER reasoning: ${commandResponse.reasoning}`);
399
- }
400
-
401
- return commandResponse.command;
402
- } catch (error) {
403
- this.log(`Failed to generate command from supervisor instructions: ${error}`, 'error');
404
- throw new Error(`Command generation from supervisor instructions failed: ${error}`);
405
- }
406
- }
407
-
408
- /**
409
- * Generate Playwright command with vision (uses vision model)
410
- */
411
- async generatePlaywrightCommandWithVision(
412
- stepDescription: string,
413
- pageInfo: any,
414
- previousSteps: any[],
415
- lastError: string | undefined,
416
- imageDataUrl: string, // Full data URL: data:image/png;base64,...
417
- model: string = VISION_MODEL
418
- ): Promise<string> {
419
- this.log(`⚠️ USING VISION MODE (${model})...`);
420
-
421
- const previousCommands = previousSteps
422
- .map(s => s.playwrightCommand)
423
- .filter(Boolean)
424
- .join('\n');
425
-
426
- const attemptHistory = previousSteps.length > 0
427
- ? `Previous attempts context: ${previousSteps.length} commands executed`
428
- : '';
429
-
430
- const errorContext = lastError
431
- ? `Last Error: ${lastError}`
432
- : '';
433
-
434
- const request: LLMRequest = {
435
- model,
436
- systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.SYSTEM,
437
- userPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.USER(
438
- stepDescription,
439
- pageInfo,
440
- previousCommands,
441
- attemptHistory,
442
- errorContext
443
- ),
444
- imageUrl: imageDataUrl // Full data URL constructed by client
445
- };
446
-
447
- try {
448
- const response = await this.callLLM(request);
449
- const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse & {
450
- visualInsights?: string;
451
- failureRootCause?: string;
452
- recommendedAlternative?: string;
453
- };
454
-
455
- // Log diagnostic insights from vision analysis
456
- if (commandResponse.visualInsights) {
457
- this.log(`📸 Visual insights: ${commandResponse.visualInsights}`);
458
- }
459
-
460
- if (commandResponse.failureRootCause) {
461
- this.log(`🔍 Root cause analysis: ${commandResponse.failureRootCause}`);
462
- }
463
-
464
- if (commandResponse.recommendedAlternative) {
465
- this.log(`💡 Recommended alternative: ${commandResponse.recommendedAlternative}`);
466
- }
467
-
468
- if (commandResponse.reasoning) {
469
- this.log(`🧠 Vision-based reasoning: ${commandResponse.reasoning}`);
470
- }
471
-
472
- return commandResponse.command;
473
- } catch (error) {
474
- this.log(`Failed to generate command with vision: ${error}`, 'error');
475
- throw new Error(`Vision-enhanced command generation failed: ${error}`);
476
- }
477
- }
478
-
479
- /**
480
- * Break down scenario into steps
481
- */
482
- async breakdownScenario(scenario: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<ScenarioStep[]> {
483
- this.log('Breaking down scenario with LLM...');
484
- this.log(`📝 INPUT SCENARIO: ${scenario}`);
485
-
486
- const request: LLMRequest = {
487
- model,
488
- systemPrompt: PROMPTS.SCENARIO_BREAKDOWN.SYSTEM,
489
- userPrompt: PROMPTS.SCENARIO_BREAKDOWN.USER(scenario)
490
- };
491
-
492
- try {
493
- const response = await this.callLLM(request);
494
- this.log(`🤖 RAW LLM RESPONSE: ${response.answer}`);
495
-
496
- const breakdownResponse = JSON.parse(response.answer) as LLMScenarioBreakdownResponse;
497
- this.log(`📋 PARSED BREAKDOWN: ${JSON.stringify(breakdownResponse, null, 2)}`);
498
-
499
- // Validate and clean up steps
500
- const cleanedSteps = breakdownResponse.steps
501
- .map(step => step.trim())
502
- .filter(step => step.length > 0)
503
- .slice(0, 10); // Limit to 10 steps max
504
-
505
- this.log(`✅ CLEANED STEPS: ${JSON.stringify(cleanedSteps, null, 2)}`);
506
-
507
- return cleanedSteps.map((desc, index) => ({
508
- stepNumber: index + 1,
509
- description: desc,
510
- }));
511
- } catch (error) {
512
- this.log(`❌ Failed to breakdown scenario: ${error}`, 'error');
513
- // Fallback to simple breakdown by newlines (preserves URLs)
514
- const stepDescriptions = scenario.split('\n').map(s => s.trim()).filter(s => s.length > 0);
515
- this.log(`🔄 FALLBACK STEPS: ${JSON.stringify(stepDescriptions, null, 2)}`);
516
- return stepDescriptions.map((desc, index) => ({
517
- stepNumber: index + 1,
518
- description: desc,
519
- }));
520
- }
521
- }
522
-
523
- /**
524
- * Generate Playwright command for a step
525
- */
526
- async generatePlaywrightCommand(
527
- stepDescription: string,
528
- pageInfo: PageInfo,
529
- previousSteps: ScenarioStep[],
530
- lastError?: string,
531
- currentStep?: ScenarioStep,
532
- model: string = DEFAULT_MODEL
533
- ): Promise<string | null> {
534
- this.log('Generating Playwright command with LLM...');
535
-
536
- const previousCommands = previousSteps
537
- .filter(s => s.playwrightCommand && s.success)
538
- .map(s => `// Step ${s.stepNumber}: ${s.description}\n${s.playwrightCommand}`)
539
- .join('\n');
540
-
541
- // Build comprehensive attempt history for current step
542
- const attemptHistory = this.buildAttemptHistory(currentStep);
543
-
544
- // Provide raw error context for LLM analysis
545
- const errorContext = this.buildErrorContext(lastError, currentStep);
546
-
547
- const prompt = PROMPTS.PLAYWRIGHT_COMMAND.USER(
548
- stepDescription,
549
- pageInfo,
550
- previousCommands,
551
- attemptHistory,
552
- errorContext
553
- );
554
-
555
- const request: LLMRequest = {
556
- model,
557
- systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND.SYSTEM,
558
- userPrompt: prompt
559
- };
560
-
561
- try {
562
- const response = await this.callLLM(request);
563
- const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse;
564
- return commandResponse.command;
565
- } catch (error) {
566
- this.log(`Failed to generate Playwright command: ${error}`, 'error');
567
- return null;
568
- }
569
- }
570
-
571
- /**
572
- * Parse script into steps for AI repair
573
- */
574
- async parseScriptIntoSteps(script: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<Array<{ description: string; code: string; success?: boolean; error?: string }>> {
575
- this.log('Parsing script into steps with LLM...');
576
-
577
- const request: LLMRequest = {
578
- model,
579
- systemPrompt: PROMPTS.SCRIPT_PARSING.SYSTEM,
580
- userPrompt: PROMPTS.SCRIPT_PARSING.USER(script)
581
- };
582
-
583
- try {
584
- const response = await this.callLLM(request);
585
- this.log(`Raw LLM parsing response (first 500 chars): ${response.answer.substring(0, 500)}`);
586
-
587
- const parsed = JSON.parse(response.answer);
588
- this.log(`Parsed JSON structure: ${JSON.stringify(parsed, null, 2).substring(0, 1000)}`);
589
-
590
- // Expect JSON object with steps array
591
- if (parsed.steps && Array.isArray(parsed.steps)) {
592
- this.log(`LLM parsing successful, got ${parsed.steps.length} steps`);
593
- return parsed.steps;
594
- } else {
595
- this.log(`Unexpected LLM response format - expected {steps: [...]}: ${JSON.stringify(parsed)}`, 'error');
596
- return [];
597
- }
598
- } catch (error) {
599
- this.log(`Failed to parse LLM response as JSON: ${error}`, 'error');
600
- return [];
601
- }
602
- }
603
-
604
- /**
605
- * Get repair suggestion for a failing step
606
- */
607
- async getRepairSuggestion(
608
- stepDescription: string,
609
- stepCode: string,
610
- errorMessage: string,
611
- pageInfo: PageInfo,
612
- failureHistory: string,
613
- recentRepairs: string,
614
- model: string = DEFAULT_MODEL
615
- ): Promise<RepairSuggestionResponse> {
616
- const request: LLMRequest = {
617
- model,
618
- systemPrompt: PROMPTS.REPAIR_SUGGESTION.SYSTEM,
619
- userPrompt: PROMPTS.REPAIR_SUGGESTION.USER(
620
- stepDescription,
621
- stepCode,
622
- errorMessage,
623
- pageInfo,
624
- failureHistory,
625
- recentRepairs,
626
- )
627
- };
628
-
629
- const response = await this.callLLM(request);
630
- this.log(`🤖 LLM Repair Response: ${response.answer}`);
631
- const parsed = JSON.parse(response.answer) as any;
632
- this.log(`🤖 Parsed Repair Action: ${JSON.stringify(parsed)}`);
633
-
634
- // Convert string operation to enum
635
- if (parsed.action && parsed.action.operation) {
636
- switch (parsed.action.operation) {
637
- case 'MODIFY':
638
- parsed.action.operation = StepOperation.MODIFY;
639
- break;
640
- case 'INSERT':
641
- parsed.action.operation = StepOperation.INSERT;
642
- break;
643
- case 'REMOVE':
644
- parsed.action.operation = StepOperation.REMOVE;
645
- break;
646
- default:
647
- parsed.action.operation = StepOperation.MODIFY;
648
- }
649
- }
650
-
651
- return parsed as RepairSuggestionResponse;
652
- }
653
-
654
- /**
655
- * Assess repair confidence and generate advice
656
- */
657
- async assessRepairConfidence(
658
- originalScript: string,
659
- updatedScript: string,
660
- model: string = DEFAULT_SIMPLER_MODEL
661
- ): Promise<RepairConfidenceResponse> {
662
- const request: LLMRequest = {
663
- model,
664
- systemPrompt: PROMPTS.REPAIR_CONFIDENCE.SYSTEM,
665
- userPrompt: PROMPTS.REPAIR_CONFIDENCE.USER(originalScript, updatedScript)
666
- };
667
-
668
- const response = await this.callLLM(request);
669
- return JSON.parse(response.answer) as RepairConfidenceResponse;
670
- }
671
-
672
- /**
673
- * Generate final script with repair advice
674
- */
675
- async generateFinalScript(
676
- originalScript: string,
677
- updatedScript: string,
678
- newRepairAdvice: string,
679
- model: string = DEFAULT_SIMPLER_MODEL
680
- ): Promise<string> {
681
- const request: LLMRequest = {
682
- model,
683
- systemPrompt: PROMPTS.FINAL_SCRIPT.SYSTEM,
684
- userPrompt: PROMPTS.FINAL_SCRIPT.USER(originalScript, updatedScript, newRepairAdvice)
685
- };
686
-
687
- const response = await this.callLLM(request);
688
- try {
689
- const parsed = JSON.parse(response.answer);
690
- return parsed.script || updatedScript;
691
- } catch (error) {
692
- this.log(`Failed to parse final script response: ${error}`, 'error');
693
- return updatedScript;
694
- }
695
- }
696
-
697
- /**
698
- * Build attempt history for current step
699
- */
700
- private buildAttemptHistory(currentStep?: ScenarioStep): string {
701
- if (!currentStep || !currentStep.attempts || currentStep.attempts.length === 0) {
702
- return 'This is the first attempt for this step.';
703
- }
704
-
705
- const attempts = currentStep.attempts.map((attempt, index) => {
706
- const status = attempt.success ? '✅ SUCCESS' : '❌ FAILED';
707
- return `Attempt ${attempt.attemptNumber} (${status}):
708
- Command: ${attempt.command || 'No command generated'}
709
- ${attempt.error ? `Error: ${attempt.error}` : 'No error'}
710
- Timestamp: ${new Date(attempt.timestamp).toISOString()}`;
711
- }).join('\n\n');
712
-
713
- return `Current step attempt history:
714
- ${attempts}
715
-
716
- LEARNING FROM FAILURES:
717
- - Analyze what went wrong in each attempt
718
- - Try completely different approaches for failed attempts
719
- - If a selector failed, try alternative selectors
720
- - If timing failed, add proper waits
721
- - If element not found, try different strategies`;
722
- }
723
-
724
- /**
725
- * Build error context for LLM analysis
726
- */
727
- private buildErrorContext(lastError?: string, currentStep?: ScenarioStep): string {
728
- if (!lastError && (!currentStep || !currentStep.error)) {
729
- return '';
730
- }
731
-
732
- const errors = [];
733
- if (lastError) errors.push(lastError);
734
- if (currentStep?.error) errors.push(currentStep.error);
735
-
736
- const errorText = errors.join(' | ');
737
-
738
- // Detect if we're repeatedly looking for elements that don't exist
739
- const attemptedCommands = currentStep?.attempts
740
- ?.map(a => a.command)
741
- .filter(Boolean) || [];
742
-
743
- const lookingForNonExistent = attemptedCommands.some(cmd =>
744
- cmd?.includes('getByText') ||
745
- cmd?.includes('toBeVisible') ||
746
- cmd?.includes('waitFor')
747
- ) && errors.some(err =>
748
- err.includes('not found') ||
749
- err.includes('Timeout') ||
750
- err.includes('Expected: visible')
751
- );
752
-
753
- let hallucinationWarning = '';
754
- if (lookingForNonExistent && attemptedCommands.length >= 2) {
755
- hallucinationWarning = `
756
- ⚠️ HALLUCINATION ALERT:
757
- You've made ${attemptedCommands.length} attempts trying to find/verify elements that don't exist.
758
- STOP looking for these elements. They are NOT in the DOM.
759
- Instead:
760
- - Check if the goal is ALREADY COMPLETE (action succeeded = goal done)
761
- - Use alternative verification (state changes, network, page load)
762
- - Move on if the primary action succeeded
763
- `;
764
- }
765
-
766
- return `ERROR CONTEXT:
767
- Last Error: ${errorText}
768
- ${hallucinationWarning}
769
- ANALYZE THE ERROR AND ADAPT:
770
- - Study the error message to understand what went wrong
771
- - If element "not found" after 2+ attempts, it probably doesn't exist - stop looking for it
772
- - Try a completely different approach than what failed
773
- - Consider alternative selectors, timing, or interaction methods
774
- - Never repeat the exact same command that failed`;
775
- }
776
-
777
- /**
778
- * Cleanup generated script - remove redundancies and make minor adjustments
779
- */
780
- async cleanupScript(script: string, model?: string): Promise<{ script: string; changes: string[]; skipped?: string }> {
781
- try {
782
- const response = await this.llmProvider.callLLM({
783
- model: model || DEFAULT_MODEL,
784
- systemPrompt: PROMPTS.SCRIPT_CLEANUP.SYSTEM,
785
- userPrompt: PROMPTS.SCRIPT_CLEANUP.USER(script)
786
- });
787
-
788
- // Parse JSON response
789
- const jsonMatch = response.answer.match(/\{[\s\S]*\}/);
790
- if (!jsonMatch) {
791
- console.log('[LLMFacade] Cleanup response not in JSON format, returning original script');
792
- return { script, changes: [], skipped: 'Response not in JSON format' };
793
- }
794
-
795
- const parsed = JSON.parse(jsonMatch[0]);
796
-
797
- // Validate response
798
- if (!parsed.script) {
799
- console.log('[LLMFacade] Cleanup response missing script field, returning original');
800
- return { script, changes: [], skipped: 'Invalid response format' };
801
- }
802
-
803
- console.log(`[LLMFacade] Script cleanup completed. Changes: ${parsed.changes?.length || 0}`);
804
- if (parsed.changes && parsed.changes.length > 0) {
805
- parsed.changes.forEach((change: string, i: number) => {
806
- console.log(`[LLMFacade] ${i + 1}. ${change}`);
807
- });
808
- }
809
-
810
- return {
811
- script: parsed.script,
812
- changes: parsed.changes || [],
813
- skipped: parsed.skipped
814
- };
815
- } catch (error: any) {
816
- console.error('[LLMFacade] Script cleanup failed:', error.message);
817
- // Return original script on error
818
- return { script, changes: [], skipped: `Error: ${error.message}` };
819
- }
820
- }
821
- }