testchimp-runner-core 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/VISION_DIAGNOSTICS_IMPROVEMENTS.md +336 -0
  2. package/dist/credit-usage-service.d.ts +9 -0
  3. package/dist/credit-usage-service.d.ts.map +1 -1
  4. package/dist/credit-usage-service.js +20 -5
  5. package/dist/credit-usage-service.js.map +1 -1
  6. package/dist/execution-service.d.ts +7 -2
  7. package/dist/execution-service.d.ts.map +1 -1
  8. package/dist/execution-service.js +91 -36
  9. package/dist/execution-service.js.map +1 -1
  10. package/dist/index.d.ts +30 -2
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +91 -26
  13. package/dist/index.js.map +1 -1
  14. package/dist/llm-facade.d.ts +64 -8
  15. package/dist/llm-facade.d.ts.map +1 -1
  16. package/dist/llm-facade.js +361 -109
  17. package/dist/llm-facade.js.map +1 -1
  18. package/dist/llm-provider.d.ts +39 -0
  19. package/dist/llm-provider.d.ts.map +1 -0
  20. package/dist/llm-provider.js +7 -0
  21. package/dist/llm-provider.js.map +1 -0
  22. package/dist/model-constants.d.ts +21 -0
  23. package/dist/model-constants.d.ts.map +1 -0
  24. package/dist/model-constants.js +24 -0
  25. package/dist/model-constants.js.map +1 -0
  26. package/dist/orchestrator/index.d.ts +8 -0
  27. package/dist/orchestrator/index.d.ts.map +1 -0
  28. package/dist/orchestrator/index.js +23 -0
  29. package/dist/orchestrator/index.js.map +1 -0
  30. package/dist/orchestrator/orchestrator-agent.d.ts +66 -0
  31. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -0
  32. package/dist/orchestrator/orchestrator-agent.js +855 -0
  33. package/dist/orchestrator/orchestrator-agent.js.map +1 -0
  34. package/dist/orchestrator/tool-registry.d.ts +74 -0
  35. package/dist/orchestrator/tool-registry.d.ts.map +1 -0
  36. package/dist/orchestrator/tool-registry.js +131 -0
  37. package/dist/orchestrator/tool-registry.js.map +1 -0
  38. package/dist/orchestrator/tools/check-page-ready.d.ts +13 -0
  39. package/dist/orchestrator/tools/check-page-ready.d.ts.map +1 -0
  40. package/dist/orchestrator/tools/check-page-ready.js +72 -0
  41. package/dist/orchestrator/tools/check-page-ready.js.map +1 -0
  42. package/dist/orchestrator/tools/extract-data.d.ts +13 -0
  43. package/dist/orchestrator/tools/extract-data.d.ts.map +1 -0
  44. package/dist/orchestrator/tools/extract-data.js +84 -0
  45. package/dist/orchestrator/tools/extract-data.js.map +1 -0
  46. package/dist/orchestrator/tools/index.d.ts +10 -0
  47. package/dist/orchestrator/tools/index.d.ts.map +1 -0
  48. package/dist/orchestrator/tools/index.js +18 -0
  49. package/dist/orchestrator/tools/index.js.map +1 -0
  50. package/dist/orchestrator/tools/inspect-page.d.ts +13 -0
  51. package/dist/orchestrator/tools/inspect-page.d.ts.map +1 -0
  52. package/dist/orchestrator/tools/inspect-page.js +39 -0
  53. package/dist/orchestrator/tools/inspect-page.js.map +1 -0
  54. package/dist/orchestrator/tools/recall-history.d.ts +13 -0
  55. package/dist/orchestrator/tools/recall-history.d.ts.map +1 -0
  56. package/dist/orchestrator/tools/recall-history.js +64 -0
  57. package/dist/orchestrator/tools/recall-history.js.map +1 -0
  58. package/dist/orchestrator/tools/take-screenshot.d.ts +15 -0
  59. package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -0
  60. package/dist/orchestrator/tools/take-screenshot.js +112 -0
  61. package/dist/orchestrator/tools/take-screenshot.js.map +1 -0
  62. package/dist/orchestrator/types.d.ts +133 -0
  63. package/dist/orchestrator/types.d.ts.map +1 -0
  64. package/dist/orchestrator/types.js +28 -0
  65. package/dist/orchestrator/types.js.map +1 -0
  66. package/dist/playwright-mcp-service.d.ts +9 -0
  67. package/dist/playwright-mcp-service.d.ts.map +1 -1
  68. package/dist/playwright-mcp-service.js +20 -5
  69. package/dist/playwright-mcp-service.js.map +1 -1
  70. package/dist/progress-reporter.d.ts +97 -0
  71. package/dist/progress-reporter.d.ts.map +1 -0
  72. package/dist/progress-reporter.js +18 -0
  73. package/dist/progress-reporter.js.map +1 -0
  74. package/dist/prompts.d.ts +24 -0
  75. package/dist/prompts.d.ts.map +1 -1
  76. package/dist/prompts.js +593 -68
  77. package/dist/prompts.js.map +1 -1
  78. package/dist/providers/backend-proxy-llm-provider.d.ts +25 -0
  79. package/dist/providers/backend-proxy-llm-provider.d.ts.map +1 -0
  80. package/dist/providers/backend-proxy-llm-provider.js +76 -0
  81. package/dist/providers/backend-proxy-llm-provider.js.map +1 -0
  82. package/dist/providers/local-llm-provider.d.ts +21 -0
  83. package/dist/providers/local-llm-provider.d.ts.map +1 -0
  84. package/dist/providers/local-llm-provider.js +35 -0
  85. package/dist/providers/local-llm-provider.js.map +1 -0
  86. package/dist/scenario-service.d.ts +27 -1
  87. package/dist/scenario-service.d.ts.map +1 -1
  88. package/dist/scenario-service.js +48 -12
  89. package/dist/scenario-service.js.map +1 -1
  90. package/dist/scenario-worker-class.d.ts +39 -2
  91. package/dist/scenario-worker-class.d.ts.map +1 -1
  92. package/dist/scenario-worker-class.js +614 -86
  93. package/dist/scenario-worker-class.js.map +1 -1
  94. package/dist/script-utils.d.ts +2 -0
  95. package/dist/script-utils.d.ts.map +1 -1
  96. package/dist/script-utils.js +44 -4
  97. package/dist/script-utils.js.map +1 -1
  98. package/dist/types.d.ts +11 -0
  99. package/dist/types.d.ts.map +1 -1
  100. package/dist/types.js.map +1 -1
  101. package/dist/utils/browser-utils.d.ts +20 -1
  102. package/dist/utils/browser-utils.d.ts.map +1 -1
  103. package/dist/utils/browser-utils.js +102 -51
  104. package/dist/utils/browser-utils.js.map +1 -1
  105. package/dist/utils/page-info-utils.d.ts +23 -4
  106. package/dist/utils/page-info-utils.d.ts.map +1 -1
  107. package/dist/utils/page-info-utils.js +174 -43
  108. package/dist/utils/page-info-utils.js.map +1 -1
  109. package/package.json +1 -2
  110. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +642 -0
  111. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +844 -0
  112. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +539 -0
  113. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +241 -0
  114. package/plandocs/PHASE1_FINAL_STATUS.md +210 -0
  115. package/plandocs/PLANNING_SESSION_SUMMARY.md +372 -0
  116. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +201 -0
  117. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +364 -0
  118. package/plandocs/SELECTOR_IMPROVEMENTS.md +139 -0
  119. package/src/credit-usage-service.ts +23 -5
  120. package/src/execution-service.ts +152 -42
  121. package/src/index.ts +169 -26
  122. package/src/llm-facade.ts +500 -126
  123. package/src/llm-provider.ts +43 -0
  124. package/src/model-constants.ts +23 -0
  125. package/src/orchestrator/index.ts +33 -0
  126. package/src/orchestrator/orchestrator-agent.ts +1037 -0
  127. package/src/orchestrator/tool-registry.ts +182 -0
  128. package/src/orchestrator/tools/check-page-ready.ts +75 -0
  129. package/src/orchestrator/tools/extract-data.ts +92 -0
  130. package/src/orchestrator/tools/index.ts +11 -0
  131. package/src/orchestrator/tools/inspect-page.ts +42 -0
  132. package/src/orchestrator/tools/recall-history.ts +72 -0
  133. package/src/orchestrator/tools/take-screenshot.ts +128 -0
  134. package/src/orchestrator/types.ts +200 -0
  135. package/src/playwright-mcp-service.ts +23 -5
  136. package/src/progress-reporter.ts +109 -0
  137. package/src/prompts.ts +606 -69
  138. package/src/providers/backend-proxy-llm-provider.ts +91 -0
  139. package/src/providers/local-llm-provider.ts +38 -0
  140. package/src/scenario-service.ts +83 -13
  141. package/src/scenario-worker-class.ts +740 -72
  142. package/src/script-utils.ts +50 -5
  143. package/src/types.ts +13 -1
  144. package/src/utils/browser-utils.ts +123 -51
  145. package/src/utils/page-info-utils.ts +210 -53
  146. package/testchimp-runner-core-0.0.22.tgz +0 -0
@@ -1,104 +1,77 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
3
  exports.LLMFacade = void 0;
7
- const axios_1 = __importDefault(require("axios"));
8
4
  const prompts_1 = require("./prompts");
9
5
  const types_1 = require("./types");
10
- const auth_config_1 = require("./auth-config");
11
- const env_loader_1 = require("./env-loader");
6
+ const model_constants_1 = require("./model-constants");
12
7
  class LLMFacade {
13
- constructor(authConfig, backendUrl) {
14
- // Use provided backend URL or fall back to environment configuration
15
- if (backendUrl) {
16
- this.backendUrl = backendUrl;
17
- console.log(`LLMFacade initialized with provided backend URL: ${this.backendUrl}`);
18
- }
19
- else {
20
- // Fall back to environment configuration for backward compatibility
21
- const envConfig = (0, env_loader_1.loadEnvConfig)();
22
- this.backendUrl = envConfig.TESTCHIMP_BACKEND_URL;
23
- console.log(`LLMFacade initialized with environment backend URL: ${this.backendUrl}`);
24
- }
25
- // Use provided auth config or try to create from environment
26
- this.authConfig = authConfig || (0, auth_config_1.createAuthConfigFromEnv)();
27
- if (!this.authConfig) {
28
- console.warn('TestChimp authentication not configured. LLM calls may fail.');
29
- }
8
+ constructor(llmProvider) {
9
+ this.llmProvider = llmProvider;
10
+ this.log('LLMFacade initialized with pluggable LLM provider');
30
11
  }
31
12
  /**
32
- * Update authentication configuration
13
+ * Set token usage callback for tracking
33
14
  */
34
- setAuthConfig(authConfig) {
35
- this.authConfig = authConfig;
15
+ setTokenUsageCallback(callback) {
16
+ this.tokenUsageCallback = callback;
36
17
  }
37
18
  /**
38
- * Get current authentication configuration
19
+ * Set a logger callback for capturing execution logs
39
20
  */
40
- getAuthConfig() {
41
- return this.authConfig;
21
+ setLogger(logger) {
22
+ this.logger = logger;
23
+ this.llmProvider.setLogger?.(logger);
42
24
  }
43
- async callLLM(request) {
44
- if (!this.authConfig) {
45
- throw new Error('Authentication not configured. Please set authentication credentials.');
25
+ /**
26
+ * Log a message using the configured logger
27
+ */
28
+ log(message, level = 'log') {
29
+ if (this.logger) {
30
+ this.logger(message, level);
31
+ }
32
+ // Console fallback for debug visibility
33
+ if (level === 'error') {
34
+ console.error(message);
46
35
  }
36
+ else if (level === 'warn') {
37
+ console.warn(message);
38
+ }
39
+ else {
40
+ console.log(message);
41
+ }
42
+ }
43
+ async callLLM(request) {
47
44
  try {
48
- const authHeaders = (0, auth_config_1.getAuthHeaders)(this.authConfig);
49
- const url = `${this.backendUrl}/localagent/call_llm`;
50
- console.log(`repairing step`);
51
- const response = await axios_1.default.post(url, request, {
52
- headers: {
53
- ...authHeaders,
54
- 'Content-Type': 'application/json'
55
- },
56
- timeout: 30000 // 30 second timeout for LLM calls
57
- });
58
- if (response.data && response.data.answer) {
59
- return response.data.answer;
60
- }
61
- else {
62
- throw new Error('Invalid response from LLM backend');
45
+ const response = await this.llmProvider.callLLM(request);
46
+ // Report token usage if callback is set
47
+ if (response.usage && this.tokenUsageCallback) {
48
+ this.tokenUsageCallback(response.usage.inputTokens, response.usage.outputTokens, !!request.imageUrl);
63
49
  }
50
+ return response;
64
51
  }
65
52
  catch (error) {
66
- // Handle specific error cases with user-friendly messages
67
- if (error.response?.status === 412) {
68
- throw new Error('Insufficient credits. Please upgrade your TestChimp plan or add more credits to continue using AI features.');
69
- }
70
- else if (error.response?.status === 401) {
71
- throw new Error('Authentication failed. Please check your API credentials.');
72
- }
73
- else if (error.response?.status === 403) {
74
- throw new Error('Access denied. Please check your API permissions.');
75
- }
76
- else if (error.response?.status === 429) {
77
- throw new Error('Rate limit exceeded. Please try again later.');
78
- }
79
- else {
80
- console.error('LLM call failed:', error);
81
- throw new Error(`LLM call failed: ${error.message}`);
82
- }
53
+ // Let provider handle its own error messages, just re-throw
54
+ this.log(`LLM call failed: ${error}`, 'error');
55
+ throw error;
83
56
  }
84
57
  }
85
58
  /**
86
59
  * Generate a test name from scenario description
87
60
  */
88
- async generateTestName(scenario, model = 'gpt-4.1-mini') {
89
- console.log('Generating test name with LLM...');
61
+ async generateTestName(scenario, model = model_constants_1.DEFAULT_MODEL) {
62
+ this.log('Generating test name with LLM...');
90
63
  const request = {
91
64
  model,
92
- system_prompt: prompts_1.PROMPTS.TEST_NAME_GENERATION.SYSTEM,
93
- user_prompt: prompts_1.PROMPTS.TEST_NAME_GENERATION.USER(scenario)
65
+ systemPrompt: prompts_1.PROMPTS.TEST_NAME_GENERATION.SYSTEM,
66
+ userPrompt: prompts_1.PROMPTS.TEST_NAME_GENERATION.USER(scenario)
94
67
  };
95
68
  try {
96
69
  const response = await this.callLLM(request);
97
- const testNameResponse = JSON.parse(response);
70
+ const testNameResponse = JSON.parse(response.answer);
98
71
  return testNameResponse.testName;
99
72
  }
100
73
  catch (error) {
101
- console.error('Failed to generate test name:', error);
74
+ this.log(`Failed to generate test name: ${error}`, 'error');
102
75
  // Fallback to a simple generated name
103
76
  return `Test: ${scenario.substring(0, 50)}...`;
104
77
  }
@@ -107,50 +80,263 @@ class LLMFacade {
107
80
  * Generate hashtags for semantic grouping
108
81
  */
109
82
  async generateHashtags(scenario, model = 'gpt-4o-mini') {
110
- console.log('Generating hashtags with LLM...');
83
+ this.log('Generating hashtags with LLM...');
111
84
  const request = {
112
85
  model,
113
- system_prompt: prompts_1.PROMPTS.HASHTAG_GENERATION.SYSTEM,
114
- user_prompt: prompts_1.PROMPTS.HASHTAG_GENERATION.USER(scenario)
86
+ systemPrompt: prompts_1.PROMPTS.HASHTAG_GENERATION.SYSTEM,
87
+ userPrompt: prompts_1.PROMPTS.HASHTAG_GENERATION.USER(scenario)
115
88
  };
116
89
  try {
117
90
  const response = await this.callLLM(request);
118
- const hashtagResponse = JSON.parse(response);
91
+ const hashtagResponse = JSON.parse(response.answer);
119
92
  return hashtagResponse.hashtags || [];
120
93
  }
121
94
  catch (error) {
122
- console.error('Failed to generate hashtags:', error);
95
+ this.log(`Failed to generate hashtags: ${error}`, 'error');
123
96
  // Fallback to empty array
124
97
  return [];
125
98
  }
126
99
  }
100
+ /**
101
+ * Check if a goal has been completed based on actions taken and current page state
102
+ */
103
+ async checkGoalCompletion(goalDescription, completedActions, pageInfo, model = model_constants_1.DEFAULT_MODEL) {
104
+ this.log('Checking goal completion with LLM...');
105
+ const request = {
106
+ model,
107
+ systemPrompt: prompts_1.PROMPTS.GOAL_COMPLETION_CHECK.SYSTEM,
108
+ userPrompt: prompts_1.PROMPTS.GOAL_COMPLETION_CHECK.USER(goalDescription, completedActions, pageInfo)
109
+ };
110
+ try {
111
+ const response = await this.callLLM(request);
112
+ return JSON.parse(response.answer);
113
+ }
114
+ catch (error) {
115
+ this.log(`Failed to check goal completion: ${error}`, 'error');
116
+ // Conservative fallback - assume not complete if we can't determine
117
+ return {
118
+ isComplete: false,
119
+ reason: 'Error checking completion status'
120
+ };
121
+ }
122
+ }
123
+ /**
124
+ * Check goal completion with visual verification (uses vision model)
125
+ */
126
+ async checkGoalCompletionWithVision(goalDescription, completedActions, pageInfo, imageDataUrl, model = model_constants_1.VISION_MODEL) {
127
+ this.log(`👔 Checking goal completion with vision (${model})...`);
128
+ const request = {
129
+ model,
130
+ systemPrompt: `You are checking if a test automation goal has been completed by analyzing both DOM state and visual appearance.
131
+
132
+ CRITICAL: For action goals (login, submit, click, navigate), check if the PRIMARY ACTION and its SIDE EFFECTS are complete:
133
+ - "Login" = Fill fields AND click button AND verify navigation/page change
134
+ - "Submit form" = Fill fields AND click submit AND verify submission (success message/page change)
135
+ - "Click X" = Click X AND verify expected page change or UI update
136
+
137
+ For verification goals (verify, check, confirm), verify the VISUAL PRESENCE of expected elements.`,
138
+ userPrompt: `GOAL: ${goalDescription}
139
+
140
+ ACTIONS COMPLETED:
141
+ ${completedActions.map((action, i) => `${i + 1}. ${action}`).join('\n')}
142
+
143
+ CURRENT PAGE STATE:
144
+ URL: ${pageInfo.url}
145
+ Title: ${pageInfo.title}
146
+ Interactive Elements:
147
+ ${pageInfo.formattedElements}
148
+
149
+ Based on the screenshot AND page state, is this goal COMPLETE?
150
+
151
+ Respond ONLY with valid JSON:
152
+ {
153
+ "isComplete": true/false,
154
+ "reason": "Brief explanation based on what you SEE in the screenshot and DOM",
155
+ "nextSubGoal": "If incomplete, what specific next action is needed?"
156
+ }`,
157
+ imageUrl: imageDataUrl
158
+ };
159
+ try {
160
+ const response = await this.callLLM(request);
161
+ const parsed = JSON.parse(response.answer);
162
+ this.log(`👔 Vision goal check result: ${parsed.isComplete ? 'COMPLETE ✅' : 'INCOMPLETE ❌'}`);
163
+ return parsed;
164
+ }
165
+ catch (error) {
166
+ this.log(`Failed to check goal completion with vision: ${error}`, 'error');
167
+ // Conservative fallback - assume not complete if we can't determine
168
+ return {
169
+ isComplete: false,
170
+ reason: 'Error checking completion status with vision'
171
+ };
172
+ }
173
+ }
174
+ /**
175
+ * Ask LLM if a screenshot would help debug the current failure
176
+ */
177
+ async assessScreenshotNeed(stepDescription, errorMessage, attemptCount, pageInfo, model = model_constants_1.DEFAULT_MODEL) {
178
+ this.log('Assessing screenshot need with LLM...');
179
+ const request = {
180
+ model,
181
+ systemPrompt: prompts_1.PROMPTS.SCREENSHOT_NEED_ASSESSMENT.SYSTEM,
182
+ userPrompt: prompts_1.PROMPTS.SCREENSHOT_NEED_ASSESSMENT.USER(stepDescription, errorMessage, attemptCount, pageInfo)
183
+ };
184
+ try {
185
+ const response = await this.callLLM(request);
186
+ return JSON.parse(response.answer);
187
+ }
188
+ catch (error) {
189
+ this.log(`Failed to assess screenshot need: ${error}`, 'error');
190
+ // Conservative fallback - don't use expensive screenshot unless we're sure
191
+ return {
192
+ needsScreenshot: false,
193
+ reason: 'Error assessing need, defaulting to no screenshot'
194
+ };
195
+ }
196
+ }
197
+ /**
198
+ * Get diagnostic analysis from screenshot (supervisor role)
199
+ */
200
+ async getVisionDiagnostics(stepDescription, pageInfo, previousSteps, lastError, imageDataUrl, model = model_constants_1.VISION_MODEL) {
201
+ this.log('👔 SUPERVISOR: Analyzing screenshot for diagnostic insights...');
202
+ const previousCommands = previousSteps
203
+ .map(s => s.playwrightCommand)
204
+ .filter(Boolean)
205
+ .join('\n');
206
+ const attemptHistory = previousSteps.length > 0
207
+ ? `Previous attempts context: ${previousSteps.length} commands executed`
208
+ : '';
209
+ const errorContext = lastError
210
+ ? `Last Error: ${lastError}`
211
+ : '';
212
+ const request = {
213
+ model,
214
+ systemPrompt: prompts_1.PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.SYSTEM,
215
+ userPrompt: prompts_1.PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.USER(stepDescription, pageInfo, previousCommands, attemptHistory, errorContext),
216
+ imageUrl: imageDataUrl
217
+ };
218
+ try {
219
+ const response = await this.callLLM(request);
220
+ const diagnostics = JSON.parse(response.answer);
221
+ // Log supervisor's findings
222
+ this.log(`👔 SUPERVISOR ANALYSIS:`);
223
+ this.log(` 📸 Visual: ${diagnostics.visualAnalysis}`);
224
+ this.log(` 🔍 Root cause: ${diagnostics.rootCause}`);
225
+ this.log(` 📋 Instructions: ${diagnostics.specificInstructions}`);
226
+ this.log(` 💡 Approach: ${diagnostics.recommendedApproach}`);
227
+ if (diagnostics.elementsFound?.length > 0) {
228
+ this.log(` ✅ Elements found: ${diagnostics.elementsFound.join(', ')}`);
229
+ }
230
+ if (diagnostics.elementsNotFound?.length > 0) {
231
+ this.log(` ❌ Elements NOT found: ${diagnostics.elementsNotFound.join(', ')}`);
232
+ }
233
+ return diagnostics;
234
+ }
235
+ catch (error) {
236
+ this.log(`Failed to get vision diagnostics: ${error}`, 'error');
237
+ throw new Error(`Vision diagnostic analysis failed: ${error}`);
238
+ }
239
+ }
240
+ /**
241
+ * Generate command based on supervisor's instructions
242
+ */
243
+ async generateCommandFromSupervisorInstructions(stepDescription, supervisorDiagnostics, pageInfo, model = model_constants_1.DEFAULT_MODEL) {
244
+ this.log('🔨 WORKER: Generating command based on supervisor instructions...');
245
+ const request = {
246
+ model,
247
+ systemPrompt: prompts_1.PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.SYSTEM,
248
+ userPrompt: prompts_1.PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.USER(stepDescription, supervisorDiagnostics.specificInstructions, supervisorDiagnostics.visualAnalysis, supervisorDiagnostics.elementsFound || [], supervisorDiagnostics.elementsNotFound || [], pageInfo)
249
+ };
250
+ try {
251
+ const response = await this.callLLM(request);
252
+ const commandResponse = JSON.parse(response.answer);
253
+ if (commandResponse.reasoning) {
254
+ this.log(`🔨 WORKER reasoning: ${commandResponse.reasoning}`);
255
+ }
256
+ return commandResponse.command;
257
+ }
258
+ catch (error) {
259
+ this.log(`Failed to generate command from supervisor instructions: ${error}`, 'error');
260
+ throw new Error(`Command generation from supervisor instructions failed: ${error}`);
261
+ }
262
+ }
263
+ /**
264
+ * Generate Playwright command with vision (uses vision model)
265
+ */
266
+ async generatePlaywrightCommandWithVision(stepDescription, pageInfo, previousSteps, lastError, imageDataUrl, // Full data URL: data:image/png;base64,...
267
+ model = model_constants_1.VISION_MODEL) {
268
+ this.log(`⚠️ USING VISION MODE (${model})...`);
269
+ const previousCommands = previousSteps
270
+ .map(s => s.playwrightCommand)
271
+ .filter(Boolean)
272
+ .join('\n');
273
+ const attemptHistory = previousSteps.length > 0
274
+ ? `Previous attempts context: ${previousSteps.length} commands executed`
275
+ : '';
276
+ const errorContext = lastError
277
+ ? `Last Error: ${lastError}`
278
+ : '';
279
+ const request = {
280
+ model,
281
+ systemPrompt: prompts_1.PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.SYSTEM,
282
+ userPrompt: prompts_1.PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.USER(stepDescription, pageInfo, previousCommands, attemptHistory, errorContext),
283
+ imageUrl: imageDataUrl // Full data URL constructed by client
284
+ };
285
+ try {
286
+ const response = await this.callLLM(request);
287
+ const commandResponse = JSON.parse(response.answer);
288
+ // Log diagnostic insights from vision analysis
289
+ if (commandResponse.visualInsights) {
290
+ this.log(`📸 Visual insights: ${commandResponse.visualInsights}`);
291
+ }
292
+ if (commandResponse.failureRootCause) {
293
+ this.log(`🔍 Root cause analysis: ${commandResponse.failureRootCause}`);
294
+ }
295
+ if (commandResponse.recommendedAlternative) {
296
+ this.log(`💡 Recommended alternative: ${commandResponse.recommendedAlternative}`);
297
+ }
298
+ if (commandResponse.reasoning) {
299
+ this.log(`🧠 Vision-based reasoning: ${commandResponse.reasoning}`);
300
+ }
301
+ return commandResponse.command;
302
+ }
303
+ catch (error) {
304
+ this.log(`Failed to generate command with vision: ${error}`, 'error');
305
+ throw new Error(`Vision-enhanced command generation failed: ${error}`);
306
+ }
307
+ }
127
308
  /**
128
309
  * Break down scenario into steps
129
310
  */
130
- async breakdownScenario(scenario, model = 'gpt-4.1-mini') {
131
- console.log('Breaking down scenario with LLM...');
311
+ async breakdownScenario(scenario, model = model_constants_1.DEFAULT_MODEL) {
312
+ this.log('Breaking down scenario with LLM...');
313
+ this.log(`📝 INPUT SCENARIO: ${scenario}`);
132
314
  const request = {
133
315
  model,
134
- system_prompt: prompts_1.PROMPTS.SCENARIO_BREAKDOWN.SYSTEM,
135
- user_prompt: prompts_1.PROMPTS.SCENARIO_BREAKDOWN.USER(scenario)
316
+ systemPrompt: prompts_1.PROMPTS.SCENARIO_BREAKDOWN.SYSTEM,
317
+ userPrompt: prompts_1.PROMPTS.SCENARIO_BREAKDOWN.USER(scenario)
136
318
  };
137
319
  try {
138
320
  const response = await this.callLLM(request);
139
- const breakdownResponse = JSON.parse(response);
321
+ this.log(`🤖 RAW LLM RESPONSE: ${response.answer}`);
322
+ const breakdownResponse = JSON.parse(response.answer);
323
+ this.log(`📋 PARSED BREAKDOWN: ${JSON.stringify(breakdownResponse, null, 2)}`);
140
324
  // Validate and clean up steps
141
325
  const cleanedSteps = breakdownResponse.steps
142
326
  .map(step => step.trim())
143
327
  .filter(step => step.length > 0)
144
328
  .slice(0, 10); // Limit to 10 steps max
329
+ this.log(`✅ CLEANED STEPS: ${JSON.stringify(cleanedSteps, null, 2)}`);
145
330
  return cleanedSteps.map((desc, index) => ({
146
331
  stepNumber: index + 1,
147
332
  description: desc,
148
333
  }));
149
334
  }
150
335
  catch (error) {
151
- console.error('Failed to breakdown scenario:', error);
152
- // Fallback to simple breakdown
153
- const stepDescriptions = scenario.split('.').map(s => s.trim()).filter(s => s.length > 0);
336
+ this.log(`❌ Failed to breakdown scenario: ${error}`, 'error');
337
+ // Fallback to simple breakdown by newlines (preserves URLs)
338
+ const stepDescriptions = scenario.split('\n').map(s => s.trim()).filter(s => s.length > 0);
339
+ this.log(`🔄 FALLBACK STEPS: ${JSON.stringify(stepDescriptions, null, 2)}`);
154
340
  return stepDescriptions.map((desc, index) => ({
155
341
  stepNumber: index + 1,
156
342
  description: desc,
@@ -160,8 +346,8 @@ class LLMFacade {
160
346
  /**
161
347
  * Generate Playwright command for a step
162
348
  */
163
- async generatePlaywrightCommand(stepDescription, pageInfo, previousSteps, lastError, currentStep, model = 'gpt-4.1-mini') {
164
- console.log('Generating Playwright command with LLM...');
349
+ async generatePlaywrightCommand(stepDescription, pageInfo, previousSteps, lastError, currentStep, model = model_constants_1.DEFAULT_MODEL) {
350
+ this.log('Generating Playwright command with LLM...');
165
351
  const previousCommands = previousSteps
166
352
  .filter(s => s.playwrightCommand && s.success)
167
353
  .map(s => `// Step ${s.stepNumber}: ${s.description}\n${s.playwrightCommand}`)
@@ -173,16 +359,16 @@ class LLMFacade {
173
359
  const prompt = prompts_1.PROMPTS.PLAYWRIGHT_COMMAND.USER(stepDescription, pageInfo, previousCommands, attemptHistory, errorContext);
174
360
  const request = {
175
361
  model,
176
- system_prompt: prompts_1.PROMPTS.PLAYWRIGHT_COMMAND.SYSTEM,
177
- user_prompt: prompt
362
+ systemPrompt: prompts_1.PROMPTS.PLAYWRIGHT_COMMAND.SYSTEM,
363
+ userPrompt: prompt
178
364
  };
179
365
  try {
180
366
  const response = await this.callLLM(request);
181
- const commandResponse = JSON.parse(response);
367
+ const commandResponse = JSON.parse(response.answer);
182
368
  return commandResponse.command;
183
369
  }
184
370
  catch (error) {
185
- console.error('Failed to generate Playwright command:', error);
371
+ this.log(`Failed to generate Playwright command: ${error}`, 'error');
186
372
  return null;
187
373
  }
188
374
  }
@@ -190,41 +376,45 @@ class LLMFacade {
190
376
  * Parse script into steps for AI repair
191
377
  */
192
378
  async parseScriptIntoSteps(script, model = 'gpt-4o-mini') {
379
+ this.log('Parsing script into steps with LLM...');
193
380
  const request = {
194
381
  model,
195
- system_prompt: prompts_1.PROMPTS.SCRIPT_PARSING.SYSTEM,
196
- user_prompt: prompts_1.PROMPTS.SCRIPT_PARSING.USER(script)
382
+ systemPrompt: prompts_1.PROMPTS.SCRIPT_PARSING.SYSTEM,
383
+ userPrompt: prompts_1.PROMPTS.SCRIPT_PARSING.USER(script)
197
384
  };
198
385
  try {
199
386
  const response = await this.callLLM(request);
200
- const parsed = JSON.parse(response);
387
+ this.log(`Raw LLM parsing response (first 500 chars): ${response.answer.substring(0, 500)}`);
388
+ const parsed = JSON.parse(response.answer);
389
+ this.log(`Parsed JSON structure: ${JSON.stringify(parsed, null, 2).substring(0, 1000)}`);
201
390
  // Expect JSON object with steps array
202
391
  if (parsed.steps && Array.isArray(parsed.steps)) {
392
+ this.log(`LLM parsing successful, got ${parsed.steps.length} steps`);
203
393
  return parsed.steps;
204
394
  }
205
395
  else {
206
- console.error('Unexpected LLM response format - expected {steps: [...]}:', parsed);
396
+ this.log(`Unexpected LLM response format - expected {steps: [...]}: ${JSON.stringify(parsed)}`, 'error');
207
397
  return [];
208
398
  }
209
399
  }
210
400
  catch (error) {
211
- console.error('Failed to parse LLM response as JSON:', error);
401
+ this.log(`Failed to parse LLM response as JSON: ${error}`, 'error');
212
402
  return [];
213
403
  }
214
404
  }
215
405
  /**
216
406
  * Get repair suggestion for a failing step
217
407
  */
218
- async getRepairSuggestion(stepDescription, stepCode, errorMessage, pageInfo, failureHistory, recentRepairs, model = 'gpt-4.1-mini') {
408
+ async getRepairSuggestion(stepDescription, stepCode, errorMessage, pageInfo, failureHistory, recentRepairs, model = model_constants_1.DEFAULT_MODEL) {
219
409
  const request = {
220
410
  model,
221
- system_prompt: prompts_1.PROMPTS.REPAIR_SUGGESTION.SYSTEM,
222
- user_prompt: prompts_1.PROMPTS.REPAIR_SUGGESTION.USER(stepDescription, stepCode, errorMessage, pageInfo, failureHistory, recentRepairs)
411
+ systemPrompt: prompts_1.PROMPTS.REPAIR_SUGGESTION.SYSTEM,
412
+ userPrompt: prompts_1.PROMPTS.REPAIR_SUGGESTION.USER(stepDescription, stepCode, errorMessage, pageInfo, failureHistory, recentRepairs)
223
413
  };
224
414
  const response = await this.callLLM(request);
225
- console.log(`🤖 LLM Repair Response:`, response);
226
- const parsed = JSON.parse(response);
227
- console.log(`🤖 Parsed Repair Action:`, parsed);
415
+ this.log(`🤖 LLM Repair Response: ${response.answer}`);
416
+ const parsed = JSON.parse(response.answer);
417
+ this.log(`🤖 Parsed Repair Action: ${JSON.stringify(parsed)}`);
228
418
  // Convert string operation to enum
229
419
  if (parsed.action && parsed.action.operation) {
230
420
  switch (parsed.action.operation) {
@@ -246,14 +436,14 @@ class LLMFacade {
246
436
  /**
247
437
  * Assess repair confidence and generate advice
248
438
  */
249
- async assessRepairConfidence(originalScript, updatedScript, model = 'gpt-4.1-mini') {
439
+ async assessRepairConfidence(originalScript, updatedScript, model = model_constants_1.DEFAULT_MODEL) {
250
440
  const request = {
251
441
  model,
252
- system_prompt: prompts_1.PROMPTS.REPAIR_CONFIDENCE.SYSTEM,
253
- user_prompt: prompts_1.PROMPTS.REPAIR_CONFIDENCE.USER(originalScript, updatedScript)
442
+ systemPrompt: prompts_1.PROMPTS.REPAIR_CONFIDENCE.SYSTEM,
443
+ userPrompt: prompts_1.PROMPTS.REPAIR_CONFIDENCE.USER(originalScript, updatedScript)
254
444
  };
255
445
  const response = await this.callLLM(request);
256
- return JSON.parse(response);
446
+ return JSON.parse(response.answer);
257
447
  }
258
448
  /**
259
449
  * Generate final script with repair advice
@@ -261,16 +451,16 @@ class LLMFacade {
261
451
  async generateFinalScript(originalScript, updatedScript, newRepairAdvice, model = 'gpt-4o-mini') {
262
452
  const request = {
263
453
  model,
264
- system_prompt: prompts_1.PROMPTS.FINAL_SCRIPT.SYSTEM,
265
- user_prompt: prompts_1.PROMPTS.FINAL_SCRIPT.USER(originalScript, updatedScript, newRepairAdvice)
454
+ systemPrompt: prompts_1.PROMPTS.FINAL_SCRIPT.SYSTEM,
455
+ userPrompt: prompts_1.PROMPTS.FINAL_SCRIPT.USER(originalScript, updatedScript, newRepairAdvice)
266
456
  };
267
457
  const response = await this.callLLM(request);
268
458
  try {
269
- const parsed = JSON.parse(response);
459
+ const parsed = JSON.parse(response.answer);
270
460
  return parsed.script || updatedScript;
271
461
  }
272
462
  catch (error) {
273
- console.error('Failed to parse final script response:', error);
463
+ this.log(`Failed to parse final script response: ${error}`, 'error');
274
464
  return updatedScript;
275
465
  }
276
466
  }
@@ -311,15 +501,77 @@ LEARNING FROM FAILURES:
311
501
  if (currentStep?.error)
312
502
  errors.push(currentStep.error);
313
503
  const errorText = errors.join(' | ');
504
+ // Detect if we're repeatedly looking for elements that don't exist
505
+ const attemptedCommands = currentStep?.attempts
506
+ ?.map(a => a.command)
507
+ .filter(Boolean) || [];
508
+ const lookingForNonExistent = attemptedCommands.some(cmd => cmd?.includes('getByText') ||
509
+ cmd?.includes('toBeVisible') ||
510
+ cmd?.includes('waitFor')) && errors.some(err => err.includes('not found') ||
511
+ err.includes('Timeout') ||
512
+ err.includes('Expected: visible'));
513
+ let hallucinationWarning = '';
514
+ if (lookingForNonExistent && attemptedCommands.length >= 2) {
515
+ hallucinationWarning = `
516
+ ⚠️ HALLUCINATION ALERT:
517
+ You've made ${attemptedCommands.length} attempts trying to find/verify elements that don't exist.
518
+ STOP looking for these elements. They are NOT in the DOM.
519
+ Instead:
520
+ - Check if the goal is ALREADY COMPLETE (action succeeded = goal done)
521
+ - Use alternative verification (state changes, network, page load)
522
+ - Move on if the primary action succeeded
523
+ `;
524
+ }
314
525
  return `ERROR CONTEXT:
315
526
  Last Error: ${errorText}
316
-
527
+ ${hallucinationWarning}
317
528
  ANALYZE THE ERROR AND ADAPT:
318
529
  - Study the error message to understand what went wrong
530
+ - If element "not found" after 2+ attempts, it probably doesn't exist - stop looking for it
319
531
  - Try a completely different approach than what failed
320
532
  - Consider alternative selectors, timing, or interaction methods
321
533
  - Never repeat the exact same command that failed`;
322
534
  }
535
+ /**
536
+ * Cleanup generated script - remove redundancies and make minor adjustments
537
+ */
538
+ async cleanupScript(script, model) {
539
+ try {
540
+ const response = await this.llmProvider.callLLM({
541
+ model: model || model_constants_1.DEFAULT_MODEL,
542
+ systemPrompt: prompts_1.PROMPTS.SCRIPT_CLEANUP.SYSTEM,
543
+ userPrompt: prompts_1.PROMPTS.SCRIPT_CLEANUP.USER(script)
544
+ });
545
+ // Parse JSON response
546
+ const jsonMatch = response.answer.match(/\{[\s\S]*\}/);
547
+ if (!jsonMatch) {
548
+ console.log('[LLMFacade] Cleanup response not in JSON format, returning original script');
549
+ return { script, changes: [], skipped: 'Response not in JSON format' };
550
+ }
551
+ const parsed = JSON.parse(jsonMatch[0]);
552
+ // Validate response
553
+ if (!parsed.script) {
554
+ console.log('[LLMFacade] Cleanup response missing script field, returning original');
555
+ return { script, changes: [], skipped: 'Invalid response format' };
556
+ }
557
+ console.log(`[LLMFacade] Script cleanup completed. Changes: ${parsed.changes?.length || 0}`);
558
+ if (parsed.changes && parsed.changes.length > 0) {
559
+ parsed.changes.forEach((change, i) => {
560
+ console.log(`[LLMFacade] ${i + 1}. ${change}`);
561
+ });
562
+ }
563
+ return {
564
+ script: parsed.script,
565
+ changes: parsed.changes || [],
566
+ skipped: parsed.skipped
567
+ };
568
+ }
569
+ catch (error) {
570
+ console.error('[LLMFacade] Script cleanup failed:', error.message);
571
+ // Return original script on error
572
+ return { script, changes: [], skipped: `Error: ${error.message}` };
573
+ }
574
+ }
323
575
  }
324
576
  exports.LLMFacade = LLMFacade;
325
577
  //# sourceMappingURL=llm-facade.js.map