testchimp-runner-core 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/VISION_DIAGNOSTICS_IMPROVEMENTS.md +336 -0
  2. package/dist/credit-usage-service.d.ts +9 -0
  3. package/dist/credit-usage-service.d.ts.map +1 -1
  4. package/dist/credit-usage-service.js +20 -5
  5. package/dist/credit-usage-service.js.map +1 -1
  6. package/dist/execution-service.d.ts +7 -2
  7. package/dist/execution-service.d.ts.map +1 -1
  8. package/dist/execution-service.js +91 -36
  9. package/dist/execution-service.js.map +1 -1
  10. package/dist/index.d.ts +30 -2
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +91 -26
  13. package/dist/index.js.map +1 -1
  14. package/dist/llm-facade.d.ts +64 -8
  15. package/dist/llm-facade.d.ts.map +1 -1
  16. package/dist/llm-facade.js +361 -109
  17. package/dist/llm-facade.js.map +1 -1
  18. package/dist/llm-provider.d.ts +39 -0
  19. package/dist/llm-provider.d.ts.map +1 -0
  20. package/dist/llm-provider.js +7 -0
  21. package/dist/llm-provider.js.map +1 -0
  22. package/dist/model-constants.d.ts +21 -0
  23. package/dist/model-constants.d.ts.map +1 -0
  24. package/dist/model-constants.js +24 -0
  25. package/dist/model-constants.js.map +1 -0
  26. package/dist/orchestrator/index.d.ts +8 -0
  27. package/dist/orchestrator/index.d.ts.map +1 -0
  28. package/dist/orchestrator/index.js +23 -0
  29. package/dist/orchestrator/index.js.map +1 -0
  30. package/dist/orchestrator/orchestrator-agent.d.ts +66 -0
  31. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -0
  32. package/dist/orchestrator/orchestrator-agent.js +855 -0
  33. package/dist/orchestrator/orchestrator-agent.js.map +1 -0
  34. package/dist/orchestrator/tool-registry.d.ts +74 -0
  35. package/dist/orchestrator/tool-registry.d.ts.map +1 -0
  36. package/dist/orchestrator/tool-registry.js +131 -0
  37. package/dist/orchestrator/tool-registry.js.map +1 -0
  38. package/dist/orchestrator/tools/check-page-ready.d.ts +13 -0
  39. package/dist/orchestrator/tools/check-page-ready.d.ts.map +1 -0
  40. package/dist/orchestrator/tools/check-page-ready.js +72 -0
  41. package/dist/orchestrator/tools/check-page-ready.js.map +1 -0
  42. package/dist/orchestrator/tools/extract-data.d.ts +13 -0
  43. package/dist/orchestrator/tools/extract-data.d.ts.map +1 -0
  44. package/dist/orchestrator/tools/extract-data.js +84 -0
  45. package/dist/orchestrator/tools/extract-data.js.map +1 -0
  46. package/dist/orchestrator/tools/index.d.ts +10 -0
  47. package/dist/orchestrator/tools/index.d.ts.map +1 -0
  48. package/dist/orchestrator/tools/index.js +18 -0
  49. package/dist/orchestrator/tools/index.js.map +1 -0
  50. package/dist/orchestrator/tools/inspect-page.d.ts +13 -0
  51. package/dist/orchestrator/tools/inspect-page.d.ts.map +1 -0
  52. package/dist/orchestrator/tools/inspect-page.js +39 -0
  53. package/dist/orchestrator/tools/inspect-page.js.map +1 -0
  54. package/dist/orchestrator/tools/recall-history.d.ts +13 -0
  55. package/dist/orchestrator/tools/recall-history.d.ts.map +1 -0
  56. package/dist/orchestrator/tools/recall-history.js +64 -0
  57. package/dist/orchestrator/tools/recall-history.js.map +1 -0
  58. package/dist/orchestrator/tools/take-screenshot.d.ts +15 -0
  59. package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -0
  60. package/dist/orchestrator/tools/take-screenshot.js +112 -0
  61. package/dist/orchestrator/tools/take-screenshot.js.map +1 -0
  62. package/dist/orchestrator/types.d.ts +133 -0
  63. package/dist/orchestrator/types.d.ts.map +1 -0
  64. package/dist/orchestrator/types.js +28 -0
  65. package/dist/orchestrator/types.js.map +1 -0
  66. package/dist/playwright-mcp-service.d.ts +9 -0
  67. package/dist/playwright-mcp-service.d.ts.map +1 -1
  68. package/dist/playwright-mcp-service.js +20 -5
  69. package/dist/playwright-mcp-service.js.map +1 -1
  70. package/dist/progress-reporter.d.ts +97 -0
  71. package/dist/progress-reporter.d.ts.map +1 -0
  72. package/dist/progress-reporter.js +18 -0
  73. package/dist/progress-reporter.js.map +1 -0
  74. package/dist/prompts.d.ts +24 -0
  75. package/dist/prompts.d.ts.map +1 -1
  76. package/dist/prompts.js +593 -68
  77. package/dist/prompts.js.map +1 -1
  78. package/dist/providers/backend-proxy-llm-provider.d.ts +25 -0
  79. package/dist/providers/backend-proxy-llm-provider.d.ts.map +1 -0
  80. package/dist/providers/backend-proxy-llm-provider.js +76 -0
  81. package/dist/providers/backend-proxy-llm-provider.js.map +1 -0
  82. package/dist/providers/local-llm-provider.d.ts +21 -0
  83. package/dist/providers/local-llm-provider.d.ts.map +1 -0
  84. package/dist/providers/local-llm-provider.js +35 -0
  85. package/dist/providers/local-llm-provider.js.map +1 -0
  86. package/dist/scenario-service.d.ts +27 -1
  87. package/dist/scenario-service.d.ts.map +1 -1
  88. package/dist/scenario-service.js +48 -12
  89. package/dist/scenario-service.js.map +1 -1
  90. package/dist/scenario-worker-class.d.ts +39 -2
  91. package/dist/scenario-worker-class.d.ts.map +1 -1
  92. package/dist/scenario-worker-class.js +614 -86
  93. package/dist/scenario-worker-class.js.map +1 -1
  94. package/dist/script-utils.d.ts +2 -0
  95. package/dist/script-utils.d.ts.map +1 -1
  96. package/dist/script-utils.js +44 -4
  97. package/dist/script-utils.js.map +1 -1
  98. package/dist/types.d.ts +11 -0
  99. package/dist/types.d.ts.map +1 -1
  100. package/dist/types.js.map +1 -1
  101. package/dist/utils/browser-utils.d.ts +20 -1
  102. package/dist/utils/browser-utils.d.ts.map +1 -1
  103. package/dist/utils/browser-utils.js +102 -51
  104. package/dist/utils/browser-utils.js.map +1 -1
  105. package/dist/utils/page-info-utils.d.ts +23 -4
  106. package/dist/utils/page-info-utils.d.ts.map +1 -1
  107. package/dist/utils/page-info-utils.js +174 -43
  108. package/dist/utils/page-info-utils.js.map +1 -1
  109. package/package.json +1 -2
  110. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +642 -0
  111. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +844 -0
  112. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +539 -0
  113. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +241 -0
  114. package/plandocs/PHASE1_FINAL_STATUS.md +210 -0
  115. package/plandocs/PLANNING_SESSION_SUMMARY.md +372 -0
  116. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +201 -0
  117. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +364 -0
  118. package/plandocs/SELECTOR_IMPROVEMENTS.md +139 -0
  119. package/src/credit-usage-service.ts +23 -5
  120. package/src/execution-service.ts +152 -42
  121. package/src/index.ts +169 -26
  122. package/src/llm-facade.ts +500 -126
  123. package/src/llm-provider.ts +43 -0
  124. package/src/model-constants.ts +23 -0
  125. package/src/orchestrator/index.ts +33 -0
  126. package/src/orchestrator/orchestrator-agent.ts +1037 -0
  127. package/src/orchestrator/tool-registry.ts +182 -0
  128. package/src/orchestrator/tools/check-page-ready.ts +75 -0
  129. package/src/orchestrator/tools/extract-data.ts +92 -0
  130. package/src/orchestrator/tools/index.ts +11 -0
  131. package/src/orchestrator/tools/inspect-page.ts +42 -0
  132. package/src/orchestrator/tools/recall-history.ts +72 -0
  133. package/src/orchestrator/tools/take-screenshot.ts +128 -0
  134. package/src/orchestrator/types.ts +200 -0
  135. package/src/playwright-mcp-service.ts +23 -5
  136. package/src/progress-reporter.ts +109 -0
  137. package/src/prompts.ts +606 -69
  138. package/src/providers/backend-proxy-llm-provider.ts +91 -0
  139. package/src/providers/local-llm-provider.ts +38 -0
  140. package/src/scenario-service.ts +83 -13
  141. package/src/scenario-worker-class.ts +740 -72
  142. package/src/script-utils.ts +50 -5
  143. package/src/types.ts +13 -1
  144. package/src/utils/browser-utils.ts +123 -51
  145. package/src/utils/page-info-utils.ts +210 -53
  146. package/testchimp-runner-core-0.0.22.tgz +0 -0
package/src/llm-facade.ts CHANGED
@@ -1,20 +1,8 @@
1
- import axios from 'axios';
2
1
  import { PROMPTS } from './prompts';
3
2
  import { PageInfo } from './utils/page-info-utils';
4
3
  import { StepOperation } from './types';
5
- import { AuthConfig, createAuthConfigFromEnv, getAuthHeaders } from './auth-config';
6
- import { loadEnvConfig } from './env-loader';
7
-
8
- // LLM Request/Response interfaces for backend proxy
9
- interface CallLLMRequest {
10
- model?: string;
11
- system_prompt?: string;
12
- user_prompt?: string;
13
- }
14
-
15
- interface CallLLMResponse {
16
- answer?: string;
17
- }
4
+ import { DEFAULT_MODEL, VISION_MODEL } from './model-constants';
5
+ import { LLMProvider, LLMRequest, LLMResponse } from './llm-provider';
18
6
 
19
7
  // LLM Response interfaces
20
8
  export interface LLMScenarioBreakdownResponse {
@@ -49,6 +37,27 @@ export interface RepairConfidenceResponse {
49
37
  advice: string;
50
38
  }
51
39
 
40
+ export interface GoalCompletionResponse {
41
+ isComplete: boolean;
42
+ reason: string;
43
+ nextSubGoal?: string;
44
+ }
45
+
46
+ export interface ScreenshotNeedResponse {
47
+ needsScreenshot: boolean;
48
+ reason: string;
49
+ alternativeApproach?: string;
50
+ }
51
+
52
+ export interface VisionDiagnosticResponse {
53
+ visualAnalysis: string; // What the supervisor sees in the screenshot
54
+ rootCause: string; // Why previous attempts failed
55
+ specificInstructions: string; // Exact instructions for the worker agent
56
+ recommendedApproach: string; // What strategy to use (selector-based, state-based, etc.)
57
+ elementsFound: string[]; // What elements are actually visible
58
+ elementsNotFound: string[]; // What elements were expected but not visible
59
+ }
60
+
52
61
  export interface ScenarioStep {
53
62
  stepNumber: number;
54
63
  description: string;
@@ -66,101 +75,86 @@ export interface ScenarioStep {
66
75
  }
67
76
 
68
77
  export class LLMFacade {
69
- private backendUrl: string;
70
- private authConfig: AuthConfig | null;
71
-
72
- constructor(authConfig?: AuthConfig, backendUrl?: string) {
73
- // Use provided backend URL or fall back to environment configuration
74
- if (backendUrl) {
75
- this.backendUrl = backendUrl;
76
- console.log(`LLMFacade initialized with provided backend URL: ${this.backendUrl}`);
77
- } else {
78
- // Fall back to environment configuration for backward compatibility
79
- const envConfig = loadEnvConfig();
80
- this.backendUrl = envConfig.TESTCHIMP_BACKEND_URL;
81
- console.log(`LLMFacade initialized with environment backend URL: ${this.backendUrl}`);
82
- }
83
-
84
- // Use provided auth config or try to create from environment
85
- this.authConfig = authConfig || createAuthConfigFromEnv();
86
-
87
- if (!this.authConfig) {
88
- console.warn('TestChimp authentication not configured. LLM calls may fail.');
89
- }
90
- }
78
+ public llmProvider: LLMProvider; // Expose for orchestrator direct access
79
+ private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
80
+ private tokenUsageCallback?: (inputTokens: number, outputTokens: number, includesImage: boolean) => void;
91
81
 
82
+ constructor(llmProvider: LLMProvider) {
83
+ this.llmProvider = llmProvider;
84
+ this.log('LLMFacade initialized with pluggable LLM provider');
85
+ }
86
+
92
87
  /**
93
- * Update authentication configuration
88
+ * Set token usage callback for tracking
94
89
  */
95
- setAuthConfig(authConfig: AuthConfig): void {
96
- this.authConfig = authConfig;
90
+ setTokenUsageCallback(callback: (inputTokens: number, outputTokens: number, includesImage: boolean) => void): void {
91
+ this.tokenUsageCallback = callback;
97
92
  }
98
93
 
99
94
  /**
100
- * Get current authentication configuration
95
+ * Set a logger callback for capturing execution logs
101
96
  */
102
- getAuthConfig(): AuthConfig | null {
103
- return this.authConfig;
97
+ setLogger(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void {
98
+ this.logger = logger;
99
+ this.llmProvider.setLogger?.(logger);
104
100
  }
105
101
 
106
- private async callLLM(request: CallLLMRequest): Promise<string> {
107
- if (!this.authConfig) {
108
- throw new Error('Authentication not configured. Please set authentication credentials.');
102
+ /**
103
+ * Log a message using the configured logger
104
+ */
105
+ private log(message: string, level: 'log' | 'error' | 'warn' = 'log'): void {
106
+ if (this.logger) {
107
+ this.logger(message, level);
108
+ }
109
+ // Console fallback for debug visibility
110
+ if (level === 'error') {
111
+ console.error(message);
112
+ } else if (level === 'warn') {
113
+ console.warn(message);
114
+ } else {
115
+ console.log(message);
109
116
  }
117
+ }
110
118
 
119
+ private async callLLM(request: LLMRequest): Promise<LLMResponse> {
111
120
  try {
112
- const authHeaders = getAuthHeaders(this.authConfig);
113
- const url = `${this.backendUrl}/localagent/call_llm`;
114
- console.log(`repairing step`);
121
+ const response = await this.llmProvider.callLLM(request);
115
122
 
116
- const response = await axios.post(url, request, {
117
- headers: {
118
- ...authHeaders,
119
- 'Content-Type': 'application/json'
120
- },
121
- timeout: 30000 // 30 second timeout for LLM calls
122
- });
123
-
124
- if (response.data && response.data.answer) {
125
- return response.data.answer;
126
- } else {
127
- throw new Error('Invalid response from LLM backend');
123
+ // Report token usage if callback is set
124
+ if (response.usage && this.tokenUsageCallback) {
125
+ this.tokenUsageCallback(
126
+ response.usage.inputTokens,
127
+ response.usage.outputTokens,
128
+ !!request.imageUrl
129
+ );
128
130
  }
131
+
132
+ return response;
129
133
  } catch (error: any) {
130
- // Handle specific error cases with user-friendly messages
131
- if (error.response?.status === 412) {
132
- throw new Error('Insufficient credits. Please upgrade your TestChimp plan or add more credits to continue using AI features.');
133
- } else if (error.response?.status === 401) {
134
- throw new Error('Authentication failed. Please check your API credentials.');
135
- } else if (error.response?.status === 403) {
136
- throw new Error('Access denied. Please check your API permissions.');
137
- } else if (error.response?.status === 429) {
138
- throw new Error('Rate limit exceeded. Please try again later.');
139
- } else {
140
- console.error('LLM call failed:', error);
141
- throw new Error(`LLM call failed: ${error.message}`);
142
- }
134
+ // Let provider handle its own error messages, just re-throw
135
+ this.log(`LLM call failed: ${error}`, 'error');
136
+ throw error;
143
137
  }
144
138
  }
145
139
 
146
140
  /**
147
141
  * Generate a test name from scenario description
148
142
  */
149
- async generateTestName(scenario: string, model: string = 'gpt-4.1-mini'): Promise<string> {
150
- console.log('Generating test name with LLM...');
143
+ async generateTestName(scenario: string, model: string = DEFAULT_MODEL): Promise<string> {
144
+ this.log('Generating test name with LLM...');
151
145
 
152
- const request: CallLLMRequest = {
146
+ const request: LLMRequest = {
153
147
  model,
154
- system_prompt: PROMPTS.TEST_NAME_GENERATION.SYSTEM,
155
- user_prompt: PROMPTS.TEST_NAME_GENERATION.USER(scenario)
148
+ systemPrompt: PROMPTS.TEST_NAME_GENERATION.SYSTEM,
149
+ userPrompt: PROMPTS.TEST_NAME_GENERATION.USER(scenario)
156
150
  };
157
151
 
158
152
  try {
159
153
  const response = await this.callLLM(request);
160
- const testNameResponse = JSON.parse(response) as LLMTestNameResponse;
154
+ const testNameResponse = JSON.parse(response.answer) as LLMTestNameResponse;
161
155
  return testNameResponse.testName;
162
156
  } catch (error) {
163
- console.error('Failed to generate test name:', error);
157
+ this.log(`Failed to generate test name: ${error}`, 'error');
164
158
  // Fallback to a simple generated name
165
159
  return `Test: ${scenario.substring(0, 50)}...`;
166
160
  }
@@ -170,40 +164,337 @@ export class LLMFacade {
170
164
  * Generate hashtags for semantic grouping
171
165
  */
172
166
  async generateHashtags(scenario: string, model: string = 'gpt-4o-mini'): Promise<string[]> {
173
- console.log('Generating hashtags with LLM...');
167
+ this.log('Generating hashtags with LLM...');
174
168
 
175
- const request: CallLLMRequest = {
169
+ const request: LLMRequest = {
176
170
  model,
177
- system_prompt: PROMPTS.HASHTAG_GENERATION.SYSTEM,
178
- user_prompt: PROMPTS.HASHTAG_GENERATION.USER(scenario)
171
+ systemPrompt: PROMPTS.HASHTAG_GENERATION.SYSTEM,
172
+ userPrompt: PROMPTS.HASHTAG_GENERATION.USER(scenario)
179
173
  };
180
174
 
181
175
  try {
182
176
  const response = await this.callLLM(request);
183
- const hashtagResponse = JSON.parse(response) as { hashtags: string[] };
177
+ const hashtagResponse = JSON.parse(response.answer) as { hashtags: string[] };
184
178
  return hashtagResponse.hashtags || [];
185
179
  } catch (error) {
186
- console.error('Failed to generate hashtags:', error);
180
+ this.log(`Failed to generate hashtags: ${error}`, 'error');
187
181
  // Fallback to empty array
188
182
  return [];
189
183
  }
190
184
  }
191
185
 
186
+ /**
187
+ * Check if a goal has been completed based on actions taken and current page state
188
+ */
189
+ async checkGoalCompletion(
190
+ goalDescription: string,
191
+ completedActions: string[],
192
+ pageInfo: any,
193
+ model: string = DEFAULT_MODEL
194
+ ): Promise<GoalCompletionResponse> {
195
+ this.log('Checking goal completion with LLM...');
196
+
197
+ const request: LLMRequest = {
198
+ model,
199
+ systemPrompt: PROMPTS.GOAL_COMPLETION_CHECK.SYSTEM,
200
+ userPrompt: PROMPTS.GOAL_COMPLETION_CHECK.USER(goalDescription, completedActions, pageInfo)
201
+ };
202
+
203
+ try {
204
+ const response = await this.callLLM(request);
205
+ return JSON.parse(response.answer) as GoalCompletionResponse;
206
+ } catch (error) {
207
+ this.log(`Failed to check goal completion: ${error}`, 'error');
208
+ // Conservative fallback - assume not complete if we can't determine
209
+ return {
210
+ isComplete: false,
211
+ reason: 'Error checking completion status'
212
+ };
213
+ }
214
+ }
215
+
216
+ /**
217
+ * Check goal completion with visual verification (uses vision model)
218
+ */
219
+ async checkGoalCompletionWithVision(
220
+ goalDescription: string,
221
+ completedActions: string[],
222
+ pageInfo: any,
223
+ imageDataUrl: string,
224
+ model: string = VISION_MODEL
225
+ ): Promise<GoalCompletionResponse> {
226
+ this.log(`👔 Checking goal completion with vision (${model})...`);
227
+
228
+ const request: LLMRequest = {
229
+ model,
230
+ systemPrompt: `You are checking if a test automation goal has been completed by analyzing both DOM state and visual appearance.
231
+
232
+ CRITICAL: For action goals (login, submit, click, navigate), check if the PRIMARY ACTION and its SIDE EFFECTS are complete:
233
+ - "Login" = Fill fields AND click button AND verify navigation/page change
234
+ - "Submit form" = Fill fields AND click submit AND verify submission (success message/page change)
235
+ - "Click X" = Click X AND verify expected page change or UI update
236
+
237
+ For verification goals (verify, check, confirm), verify the VISUAL PRESENCE of expected elements.`,
238
+ userPrompt: `GOAL: ${goalDescription}
239
+
240
+ ACTIONS COMPLETED:
241
+ ${completedActions.map((action, i) => `${i + 1}. ${action}`).join('\n')}
242
+
243
+ CURRENT PAGE STATE:
244
+ URL: ${pageInfo.url}
245
+ Title: ${pageInfo.title}
246
+ Interactive Elements:
247
+ ${pageInfo.formattedElements}
248
+
249
+ Based on the screenshot AND page state, is this goal COMPLETE?
250
+
251
+ Respond ONLY with valid JSON:
252
+ {
253
+ "isComplete": true/false,
254
+ "reason": "Brief explanation based on what you SEE in the screenshot and DOM",
255
+ "nextSubGoal": "If incomplete, what specific next action is needed?"
256
+ }`,
257
+ imageUrl: imageDataUrl
258
+ };
259
+
260
+ try {
261
+ const response = await this.callLLM(request);
262
+ const parsed = JSON.parse(response.answer) as GoalCompletionResponse;
263
+ this.log(`👔 Vision goal check result: ${parsed.isComplete ? 'COMPLETE ✅' : 'INCOMPLETE ❌'}`);
264
+ return parsed;
265
+ } catch (error) {
266
+ this.log(`Failed to check goal completion with vision: ${error}`, 'error');
267
+ // Conservative fallback - assume not complete if we can't determine
268
+ return {
269
+ isComplete: false,
270
+ reason: 'Error checking completion status with vision'
271
+ };
272
+ }
273
+ }
274
+
275
+ /**
276
+ * Ask LLM if a screenshot would help debug the current failure
277
+ */
278
+ async assessScreenshotNeed(
279
+ stepDescription: string,
280
+ errorMessage: string,
281
+ attemptCount: number,
282
+ pageInfo: any,
283
+ model: string = DEFAULT_MODEL
284
+ ): Promise<ScreenshotNeedResponse> {
285
+ this.log('Assessing screenshot need with LLM...');
286
+
287
+ const request: LLMRequest = {
288
+ model,
289
+ systemPrompt: PROMPTS.SCREENSHOT_NEED_ASSESSMENT.SYSTEM,
290
+ userPrompt: PROMPTS.SCREENSHOT_NEED_ASSESSMENT.USER(stepDescription, errorMessage, attemptCount, pageInfo)
291
+ };
292
+
293
+ try {
294
+ const response = await this.callLLM(request);
295
+ return JSON.parse(response.answer) as ScreenshotNeedResponse;
296
+ } catch (error) {
297
+ this.log(`Failed to assess screenshot need: ${error}`, 'error');
298
+ // Conservative fallback - don't use expensive screenshot unless we're sure
299
+ return {
300
+ needsScreenshot: false,
301
+ reason: 'Error assessing need, defaulting to no screenshot'
302
+ };
303
+ }
304
+ }
305
+
306
+ /**
307
+ * Get diagnostic analysis from screenshot (supervisor role)
308
+ */
309
+ async getVisionDiagnostics(
310
+ stepDescription: string,
311
+ pageInfo: any,
312
+ previousSteps: any[],
313
+ lastError: string | undefined,
314
+ imageDataUrl: string,
315
+ model: string = VISION_MODEL
316
+ ): Promise<VisionDiagnosticResponse> {
317
+ this.log('👔 SUPERVISOR: Analyzing screenshot for diagnostic insights...');
318
+
319
+ const previousCommands = previousSteps
320
+ .map(s => s.playwrightCommand)
321
+ .filter(Boolean)
322
+ .join('\n');
323
+
324
+ const attemptHistory = previousSteps.length > 0
325
+ ? `Previous attempts context: ${previousSteps.length} commands executed`
326
+ : '';
327
+
328
+ const errorContext = lastError
329
+ ? `Last Error: ${lastError}`
330
+ : '';
331
+
332
+ const request: LLMRequest = {
333
+ model,
334
+ systemPrompt: PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.SYSTEM,
335
+ userPrompt: PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.USER(
336
+ stepDescription,
337
+ pageInfo,
338
+ previousCommands,
339
+ attemptHistory,
340
+ errorContext
341
+ ),
342
+ imageUrl: imageDataUrl
343
+ };
344
+
345
+ try {
346
+ const response = await this.callLLM(request);
347
+ const diagnostics = JSON.parse(response.answer) as VisionDiagnosticResponse;
348
+
349
+ // Log supervisor's findings
350
+ this.log(`👔 SUPERVISOR ANALYSIS:`);
351
+ this.log(` 📸 Visual: ${diagnostics.visualAnalysis}`);
352
+ this.log(` 🔍 Root cause: ${diagnostics.rootCause}`);
353
+ this.log(` 📋 Instructions: ${diagnostics.specificInstructions}`);
354
+ this.log(` 💡 Approach: ${diagnostics.recommendedApproach}`);
355
+ if (diagnostics.elementsFound?.length > 0) {
356
+ this.log(` ✅ Elements found: ${diagnostics.elementsFound.join(', ')}`);
357
+ }
358
+ if (diagnostics.elementsNotFound?.length > 0) {
359
+ this.log(` ❌ Elements NOT found: ${diagnostics.elementsNotFound.join(', ')}`);
360
+ }
361
+
362
+ return diagnostics;
363
+ } catch (error) {
364
+ this.log(`Failed to get vision diagnostics: ${error}`, 'error');
365
+ throw new Error(`Vision diagnostic analysis failed: ${error}`);
366
+ }
367
+ }
368
+
369
+ /**
370
+ * Generate command based on supervisor's instructions
371
+ */
372
+ async generateCommandFromSupervisorInstructions(
373
+ stepDescription: string,
374
+ supervisorDiagnostics: VisionDiagnosticResponse,
375
+ pageInfo: any,
376
+ model: string = DEFAULT_MODEL
377
+ ): Promise<string> {
378
+ this.log('🔨 WORKER: Generating command based on supervisor instructions...');
379
+
380
+ const request: LLMRequest = {
381
+ model,
382
+ systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.SYSTEM,
383
+ userPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.USER(
384
+ stepDescription,
385
+ supervisorDiagnostics.specificInstructions,
386
+ supervisorDiagnostics.visualAnalysis,
387
+ supervisorDiagnostics.elementsFound || [],
388
+ supervisorDiagnostics.elementsNotFound || [],
389
+ pageInfo
390
+ )
391
+ };
392
+
393
+ try {
394
+ const response = await this.callLLM(request);
395
+ const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse;
396
+
397
+ if (commandResponse.reasoning) {
398
+ this.log(`🔨 WORKER reasoning: ${commandResponse.reasoning}`);
399
+ }
400
+
401
+ return commandResponse.command;
402
+ } catch (error) {
403
+ this.log(`Failed to generate command from supervisor instructions: ${error}`, 'error');
404
+ throw new Error(`Command generation from supervisor instructions failed: ${error}`);
405
+ }
406
+ }
407
+
408
+ /**
409
+ * Generate Playwright command with vision (uses vision model)
410
+ */
411
+ async generatePlaywrightCommandWithVision(
412
+ stepDescription: string,
413
+ pageInfo: any,
414
+ previousSteps: any[],
415
+ lastError: string | undefined,
416
+ imageDataUrl: string, // Full data URL: data:image/png;base64,...
417
+ model: string = VISION_MODEL
418
+ ): Promise<string> {
419
+ this.log(`⚠️ USING VISION MODE (${model})...`);
420
+
421
+ const previousCommands = previousSteps
422
+ .map(s => s.playwrightCommand)
423
+ .filter(Boolean)
424
+ .join('\n');
425
+
426
+ const attemptHistory = previousSteps.length > 0
427
+ ? `Previous attempts context: ${previousSteps.length} commands executed`
428
+ : '';
429
+
430
+ const errorContext = lastError
431
+ ? `Last Error: ${lastError}`
432
+ : '';
433
+
434
+ const request: LLMRequest = {
435
+ model,
436
+ systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.SYSTEM,
437
+ userPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.USER(
438
+ stepDescription,
439
+ pageInfo,
440
+ previousCommands,
441
+ attemptHistory,
442
+ errorContext
443
+ ),
444
+ imageUrl: imageDataUrl // Full data URL constructed by client
445
+ };
446
+
447
+ try {
448
+ const response = await this.callLLM(request);
449
+ const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse & {
450
+ visualInsights?: string;
451
+ failureRootCause?: string;
452
+ recommendedAlternative?: string;
453
+ };
454
+
455
+ // Log diagnostic insights from vision analysis
456
+ if (commandResponse.visualInsights) {
457
+ this.log(`📸 Visual insights: ${commandResponse.visualInsights}`);
458
+ }
459
+
460
+ if (commandResponse.failureRootCause) {
461
+ this.log(`🔍 Root cause analysis: ${commandResponse.failureRootCause}`);
462
+ }
463
+
464
+ if (commandResponse.recommendedAlternative) {
465
+ this.log(`💡 Recommended alternative: ${commandResponse.recommendedAlternative}`);
466
+ }
467
+
468
+ if (commandResponse.reasoning) {
469
+ this.log(`🧠 Vision-based reasoning: ${commandResponse.reasoning}`);
470
+ }
471
+
472
+ return commandResponse.command;
473
+ } catch (error) {
474
+ this.log(`Failed to generate command with vision: ${error}`, 'error');
475
+ throw new Error(`Vision-enhanced command generation failed: ${error}`);
476
+ }
477
+ }
478
+
192
479
  /**
193
480
  * Break down scenario into steps
194
481
  */
195
- async breakdownScenario(scenario: string, model: string = 'gpt-4.1-mini'): Promise<ScenarioStep[]> {
196
- console.log('Breaking down scenario with LLM...');
482
+ async breakdownScenario(scenario: string, model: string = DEFAULT_MODEL): Promise<ScenarioStep[]> {
483
+ this.log('Breaking down scenario with LLM...');
484
+ this.log(`📝 INPUT SCENARIO: ${scenario}`);
197
485
 
198
- const request: CallLLMRequest = {
486
+ const request: LLMRequest = {
199
487
  model,
200
- system_prompt: PROMPTS.SCENARIO_BREAKDOWN.SYSTEM,
201
- user_prompt: PROMPTS.SCENARIO_BREAKDOWN.USER(scenario)
488
+ systemPrompt: PROMPTS.SCENARIO_BREAKDOWN.SYSTEM,
489
+ userPrompt: PROMPTS.SCENARIO_BREAKDOWN.USER(scenario)
202
490
  };
203
491
 
204
492
  try {
205
493
  const response = await this.callLLM(request);
206
- const breakdownResponse = JSON.parse(response) as LLMScenarioBreakdownResponse;
494
+ this.log(`🤖 RAW LLM RESPONSE: ${response.answer}`);
495
+
496
+ const breakdownResponse = JSON.parse(response.answer) as LLMScenarioBreakdownResponse;
497
+ this.log(`📋 PARSED BREAKDOWN: ${JSON.stringify(breakdownResponse, null, 2)}`);
207
498
 
208
499
  // Validate and clean up steps
209
500
  const cleanedSteps = breakdownResponse.steps
@@ -211,14 +502,17 @@ export class LLMFacade {
211
502
  .filter(step => step.length > 0)
212
503
  .slice(0, 10); // Limit to 10 steps max
213
504
 
505
+ this.log(`✅ CLEANED STEPS: ${JSON.stringify(cleanedSteps, null, 2)}`);
506
+
214
507
  return cleanedSteps.map((desc, index) => ({
215
508
  stepNumber: index + 1,
216
509
  description: desc,
217
510
  }));
218
511
  } catch (error) {
219
- console.error('Failed to breakdown scenario:', error);
220
- // Fallback to simple breakdown
221
- const stepDescriptions = scenario.split('.').map(s => s.trim()).filter(s => s.length > 0);
512
+ this.log(`❌ Failed to breakdown scenario: ${error}`, 'error');
513
+ // Fallback to simple breakdown by newlines (preserves URLs)
514
+ const stepDescriptions = scenario.split('\n').map(s => s.trim()).filter(s => s.length > 0);
515
+ this.log(`🔄 FALLBACK STEPS: ${JSON.stringify(stepDescriptions, null, 2)}`);
222
516
  return stepDescriptions.map((desc, index) => ({
223
517
  stepNumber: index + 1,
224
518
  description: desc,
@@ -235,9 +529,9 @@ export class LLMFacade {
235
529
  previousSteps: ScenarioStep[],
236
530
  lastError?: string,
237
531
  currentStep?: ScenarioStep,
238
- model: string = 'gpt-4.1-mini'
532
+ model: string = DEFAULT_MODEL
239
533
  ): Promise<string | null> {
240
- console.log('Generating Playwright command with LLM...');
534
+ this.log('Generating Playwright command with LLM...');
241
535
 
242
536
  const previousCommands = previousSteps
243
537
  .filter(s => s.playwrightCommand && s.success)
@@ -258,18 +552,18 @@ export class LLMFacade {
258
552
  errorContext
259
553
  );
260
554
 
261
- const request: CallLLMRequest = {
555
+ const request: LLMRequest = {
262
556
  model,
263
- system_prompt: PROMPTS.PLAYWRIGHT_COMMAND.SYSTEM,
264
- user_prompt: prompt
557
+ systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND.SYSTEM,
558
+ userPrompt: prompt
265
559
  };
266
560
 
267
561
  try {
268
562
  const response = await this.callLLM(request);
269
- const commandResponse = JSON.parse(response) as LLMPlaywrightCommandResponse;
563
+ const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse;
270
564
  return commandResponse.command;
271
565
  } catch (error) {
272
- console.error('Failed to generate Playwright command:', error);
566
+ this.log(`Failed to generate Playwright command: ${error}`, 'error');
273
567
  return null;
274
568
  }
275
569
  }
@@ -278,25 +572,31 @@ export class LLMFacade {
278
572
  * Parse script into steps for AI repair
279
573
  */
280
574
  async parseScriptIntoSteps(script: string, model: string = 'gpt-4o-mini'): Promise<Array<{ description: string; code: string; success?: boolean; error?: string }>> {
281
- const request: CallLLMRequest = {
575
+ this.log('Parsing script into steps with LLM...');
576
+
577
+ const request: LLMRequest = {
282
578
  model,
283
- system_prompt: PROMPTS.SCRIPT_PARSING.SYSTEM,
284
- user_prompt: PROMPTS.SCRIPT_PARSING.USER(script)
579
+ systemPrompt: PROMPTS.SCRIPT_PARSING.SYSTEM,
580
+ userPrompt: PROMPTS.SCRIPT_PARSING.USER(script)
285
581
  };
286
582
 
287
583
  try {
288
584
  const response = await this.callLLM(request);
289
- const parsed = JSON.parse(response);
585
+ this.log(`Raw LLM parsing response (first 500 chars): ${response.answer.substring(0, 500)}`);
586
+
587
+ const parsed = JSON.parse(response.answer);
588
+ this.log(`Parsed JSON structure: ${JSON.stringify(parsed, null, 2).substring(0, 1000)}`);
290
589
 
291
590
  // Expect JSON object with steps array
292
591
  if (parsed.steps && Array.isArray(parsed.steps)) {
592
+ this.log(`LLM parsing successful, got ${parsed.steps.length} steps`);
293
593
  return parsed.steps;
294
594
  } else {
295
- console.error('Unexpected LLM response format - expected {steps: [...]}:', parsed);
595
+ this.log(`Unexpected LLM response format - expected {steps: [...]}: ${JSON.stringify(parsed)}`, 'error');
296
596
  return [];
297
597
  }
298
598
  } catch (error) {
299
- console.error('Failed to parse LLM response as JSON:', error);
599
+ this.log(`Failed to parse LLM response as JSON: ${error}`, 'error');
300
600
  return [];
301
601
  }
302
602
  }
@@ -311,12 +611,12 @@ export class LLMFacade {
311
611
  pageInfo: PageInfo,
312
612
  failureHistory: string,
313
613
  recentRepairs: string,
314
- model: string = 'gpt-4.1-mini'
614
+ model: string = DEFAULT_MODEL
315
615
  ): Promise<RepairSuggestionResponse> {
316
- const request: CallLLMRequest = {
616
+ const request: LLMRequest = {
317
617
  model,
318
- system_prompt: PROMPTS.REPAIR_SUGGESTION.SYSTEM,
319
- user_prompt: PROMPTS.REPAIR_SUGGESTION.USER(
618
+ systemPrompt: PROMPTS.REPAIR_SUGGESTION.SYSTEM,
619
+ userPrompt: PROMPTS.REPAIR_SUGGESTION.USER(
320
620
  stepDescription,
321
621
  stepCode,
322
622
  errorMessage,
@@ -327,9 +627,9 @@ export class LLMFacade {
327
627
  };
328
628
 
329
629
  const response = await this.callLLM(request);
330
- console.log(`🤖 LLM Repair Response:`, response);
331
- const parsed = JSON.parse(response) as any;
332
- console.log(`🤖 Parsed Repair Action:`, parsed);
630
+ this.log(`🤖 LLM Repair Response: ${response.answer}`);
631
+ const parsed = JSON.parse(response.answer) as any;
632
+ this.log(`🤖 Parsed Repair Action: ${JSON.stringify(parsed)}`);
333
633
 
334
634
  // Convert string operation to enum
335
635
  if (parsed.action && parsed.action.operation) {
@@ -357,16 +657,16 @@ export class LLMFacade {
357
657
  async assessRepairConfidence(
358
658
  originalScript: string,
359
659
  updatedScript: string,
360
- model: string = 'gpt-4.1-mini'
660
+ model: string = DEFAULT_MODEL
361
661
  ): Promise<RepairConfidenceResponse> {
362
- const request: CallLLMRequest = {
662
+ const request: LLMRequest = {
363
663
  model,
364
- system_prompt: PROMPTS.REPAIR_CONFIDENCE.SYSTEM,
365
- user_prompt: PROMPTS.REPAIR_CONFIDENCE.USER(originalScript, updatedScript)
664
+ systemPrompt: PROMPTS.REPAIR_CONFIDENCE.SYSTEM,
665
+ userPrompt: PROMPTS.REPAIR_CONFIDENCE.USER(originalScript, updatedScript)
366
666
  };
367
667
 
368
668
  const response = await this.callLLM(request);
369
- return JSON.parse(response) as RepairConfidenceResponse;
669
+ return JSON.parse(response.answer) as RepairConfidenceResponse;
370
670
  }
371
671
 
372
672
  /**
@@ -378,18 +678,18 @@ export class LLMFacade {
378
678
  newRepairAdvice: string,
379
679
  model: string = 'gpt-4o-mini'
380
680
  ): Promise<string> {
381
- const request: CallLLMRequest = {
681
+ const request: LLMRequest = {
382
682
  model,
383
- system_prompt: PROMPTS.FINAL_SCRIPT.SYSTEM,
384
- user_prompt: PROMPTS.FINAL_SCRIPT.USER(originalScript, updatedScript, newRepairAdvice)
683
+ systemPrompt: PROMPTS.FINAL_SCRIPT.SYSTEM,
684
+ userPrompt: PROMPTS.FINAL_SCRIPT.USER(originalScript, updatedScript, newRepairAdvice)
385
685
  };
386
686
 
387
687
  const response = await this.callLLM(request);
388
688
  try {
389
- const parsed = JSON.parse(response);
689
+ const parsed = JSON.parse(response.answer);
390
690
  return parsed.script || updatedScript;
391
691
  } catch (error) {
392
- console.error('Failed to parse final script response:', error);
692
+ this.log(`Failed to parse final script response: ${error}`, 'error');
393
693
  return updatedScript;
394
694
  }
395
695
  }
@@ -435,13 +735,87 @@ LEARNING FROM FAILURES:
435
735
 
436
736
  const errorText = errors.join(' | ');
437
737
 
738
+ // Detect if we're repeatedly looking for elements that don't exist
739
+ const attemptedCommands = currentStep?.attempts
740
+ ?.map(a => a.command)
741
+ .filter(Boolean) || [];
742
+
743
+ const lookingForNonExistent = attemptedCommands.some(cmd =>
744
+ cmd?.includes('getByText') ||
745
+ cmd?.includes('toBeVisible') ||
746
+ cmd?.includes('waitFor')
747
+ ) && errors.some(err =>
748
+ err.includes('not found') ||
749
+ err.includes('Timeout') ||
750
+ err.includes('Expected: visible')
751
+ );
752
+
753
+ let hallucinationWarning = '';
754
+ if (lookingForNonExistent && attemptedCommands.length >= 2) {
755
+ hallucinationWarning = `
756
+ ⚠️ HALLUCINATION ALERT:
757
+ You've made ${attemptedCommands.length} attempts trying to find/verify elements that don't exist.
758
+ STOP looking for these elements. They are NOT in the DOM.
759
+ Instead:
760
+ - Check if the goal is ALREADY COMPLETE (action succeeded = goal done)
761
+ - Use alternative verification (state changes, network, page load)
762
+ - Move on if the primary action succeeded
763
+ `;
764
+ }
765
+
438
766
  return `ERROR CONTEXT:
439
767
  Last Error: ${errorText}
440
-
768
+ ${hallucinationWarning}
441
769
  ANALYZE THE ERROR AND ADAPT:
442
770
  - Study the error message to understand what went wrong
771
+ - If element "not found" after 2+ attempts, it probably doesn't exist - stop looking for it
443
772
  - Try a completely different approach than what failed
444
773
  - Consider alternative selectors, timing, or interaction methods
445
774
  - Never repeat the exact same command that failed`;
446
775
  }
776
+
777
+ /**
778
+ * Cleanup generated script - remove redundancies and make minor adjustments
779
+ */
780
+ async cleanupScript(script: string, model?: string): Promise<{ script: string; changes: string[]; skipped?: string }> {
781
+ try {
782
+ const response = await this.llmProvider.callLLM({
783
+ model: model || DEFAULT_MODEL,
784
+ systemPrompt: PROMPTS.SCRIPT_CLEANUP.SYSTEM,
785
+ userPrompt: PROMPTS.SCRIPT_CLEANUP.USER(script)
786
+ });
787
+
788
+ // Parse JSON response
789
+ const jsonMatch = response.answer.match(/\{[\s\S]*\}/);
790
+ if (!jsonMatch) {
791
+ console.log('[LLMFacade] Cleanup response not in JSON format, returning original script');
792
+ return { script, changes: [], skipped: 'Response not in JSON format' };
793
+ }
794
+
795
+ const parsed = JSON.parse(jsonMatch[0]);
796
+
797
+ // Validate response
798
+ if (!parsed.script) {
799
+ console.log('[LLMFacade] Cleanup response missing script field, returning original');
800
+ return { script, changes: [], skipped: 'Invalid response format' };
801
+ }
802
+
803
+ console.log(`[LLMFacade] Script cleanup completed. Changes: ${parsed.changes?.length || 0}`);
804
+ if (parsed.changes && parsed.changes.length > 0) {
805
+ parsed.changes.forEach((change: string, i: number) => {
806
+ console.log(`[LLMFacade] ${i + 1}. ${change}`);
807
+ });
808
+ }
809
+
810
+ return {
811
+ script: parsed.script,
812
+ changes: parsed.changes || [],
813
+ skipped: parsed.skipped
814
+ };
815
+ } catch (error: any) {
816
+ console.error('[LLMFacade] Script cleanup failed:', error.message);
817
+ // Return original script on error
818
+ return { script, changes: [], skipped: `Error: ${error.message}` };
819
+ }
820
+ }
447
821
  }