testchimp-runner-core 0.0.21 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VISION_DIAGNOSTICS_IMPROVEMENTS.md +336 -0
- package/dist/credit-usage-service.d.ts +9 -0
- package/dist/credit-usage-service.d.ts.map +1 -1
- package/dist/credit-usage-service.js +20 -5
- package/dist/credit-usage-service.js.map +1 -1
- package/dist/execution-service.d.ts +7 -2
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +91 -36
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +30 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +91 -26
- package/dist/index.js.map +1 -1
- package/dist/llm-facade.d.ts +64 -8
- package/dist/llm-facade.d.ts.map +1 -1
- package/dist/llm-facade.js +361 -109
- package/dist/llm-facade.js.map +1 -1
- package/dist/llm-provider.d.ts +39 -0
- package/dist/llm-provider.d.ts.map +1 -0
- package/dist/llm-provider.js +7 -0
- package/dist/llm-provider.js.map +1 -0
- package/dist/model-constants.d.ts +21 -0
- package/dist/model-constants.d.ts.map +1 -0
- package/dist/model-constants.js +24 -0
- package/dist/model-constants.js.map +1 -0
- package/dist/orchestrator/index.d.ts +8 -0
- package/dist/orchestrator/index.d.ts.map +1 -0
- package/dist/orchestrator/index.js +23 -0
- package/dist/orchestrator/index.js.map +1 -0
- package/dist/orchestrator/orchestrator-agent.d.ts +66 -0
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -0
- package/dist/orchestrator/orchestrator-agent.js +855 -0
- package/dist/orchestrator/orchestrator-agent.js.map +1 -0
- package/dist/orchestrator/tool-registry.d.ts +74 -0
- package/dist/orchestrator/tool-registry.d.ts.map +1 -0
- package/dist/orchestrator/tool-registry.js +131 -0
- package/dist/orchestrator/tool-registry.js.map +1 -0
- package/dist/orchestrator/tools/check-page-ready.d.ts +13 -0
- package/dist/orchestrator/tools/check-page-ready.d.ts.map +1 -0
- package/dist/orchestrator/tools/check-page-ready.js +72 -0
- package/dist/orchestrator/tools/check-page-ready.js.map +1 -0
- package/dist/orchestrator/tools/extract-data.d.ts +13 -0
- package/dist/orchestrator/tools/extract-data.d.ts.map +1 -0
- package/dist/orchestrator/tools/extract-data.js +84 -0
- package/dist/orchestrator/tools/extract-data.js.map +1 -0
- package/dist/orchestrator/tools/index.d.ts +10 -0
- package/dist/orchestrator/tools/index.d.ts.map +1 -0
- package/dist/orchestrator/tools/index.js +18 -0
- package/dist/orchestrator/tools/index.js.map +1 -0
- package/dist/orchestrator/tools/inspect-page.d.ts +13 -0
- package/dist/orchestrator/tools/inspect-page.d.ts.map +1 -0
- package/dist/orchestrator/tools/inspect-page.js +39 -0
- package/dist/orchestrator/tools/inspect-page.js.map +1 -0
- package/dist/orchestrator/tools/recall-history.d.ts +13 -0
- package/dist/orchestrator/tools/recall-history.d.ts.map +1 -0
- package/dist/orchestrator/tools/recall-history.js +64 -0
- package/dist/orchestrator/tools/recall-history.js.map +1 -0
- package/dist/orchestrator/tools/take-screenshot.d.ts +15 -0
- package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -0
- package/dist/orchestrator/tools/take-screenshot.js +112 -0
- package/dist/orchestrator/tools/take-screenshot.js.map +1 -0
- package/dist/orchestrator/types.d.ts +133 -0
- package/dist/orchestrator/types.d.ts.map +1 -0
- package/dist/orchestrator/types.js +28 -0
- package/dist/orchestrator/types.js.map +1 -0
- package/dist/playwright-mcp-service.d.ts +9 -0
- package/dist/playwright-mcp-service.d.ts.map +1 -1
- package/dist/playwright-mcp-service.js +20 -5
- package/dist/playwright-mcp-service.js.map +1 -1
- package/dist/progress-reporter.d.ts +97 -0
- package/dist/progress-reporter.d.ts.map +1 -0
- package/dist/progress-reporter.js +18 -0
- package/dist/progress-reporter.js.map +1 -0
- package/dist/prompts.d.ts +24 -0
- package/dist/prompts.d.ts.map +1 -1
- package/dist/prompts.js +593 -68
- package/dist/prompts.js.map +1 -1
- package/dist/providers/backend-proxy-llm-provider.d.ts +25 -0
- package/dist/providers/backend-proxy-llm-provider.d.ts.map +1 -0
- package/dist/providers/backend-proxy-llm-provider.js +76 -0
- package/dist/providers/backend-proxy-llm-provider.js.map +1 -0
- package/dist/providers/local-llm-provider.d.ts +21 -0
- package/dist/providers/local-llm-provider.d.ts.map +1 -0
- package/dist/providers/local-llm-provider.js +35 -0
- package/dist/providers/local-llm-provider.js.map +1 -0
- package/dist/scenario-service.d.ts +27 -1
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +48 -12
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +39 -2
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +614 -86
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/script-utils.d.ts +2 -0
- package/dist/script-utils.d.ts.map +1 -1
- package/dist/script-utils.js +44 -4
- package/dist/script-utils.js.map +1 -1
- package/dist/types.d.ts +11 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/browser-utils.d.ts +20 -1
- package/dist/utils/browser-utils.d.ts.map +1 -1
- package/dist/utils/browser-utils.js +102 -51
- package/dist/utils/browser-utils.js.map +1 -1
- package/dist/utils/page-info-utils.d.ts +23 -4
- package/dist/utils/page-info-utils.d.ts.map +1 -1
- package/dist/utils/page-info-utils.js +174 -43
- package/dist/utils/page-info-utils.js.map +1 -1
- package/package.json +1 -2
- package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +642 -0
- package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +844 -0
- package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +539 -0
- package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +241 -0
- package/plandocs/PHASE1_FINAL_STATUS.md +210 -0
- package/plandocs/PLANNING_SESSION_SUMMARY.md +372 -0
- package/plandocs/SCRIPT_CLEANUP_FEATURE.md +201 -0
- package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +364 -0
- package/plandocs/SELECTOR_IMPROVEMENTS.md +139 -0
- package/src/credit-usage-service.ts +23 -5
- package/src/execution-service.ts +152 -42
- package/src/index.ts +169 -26
- package/src/llm-facade.ts +500 -126
- package/src/llm-provider.ts +43 -0
- package/src/model-constants.ts +23 -0
- package/src/orchestrator/index.ts +33 -0
- package/src/orchestrator/orchestrator-agent.ts +1037 -0
- package/src/orchestrator/tool-registry.ts +182 -0
- package/src/orchestrator/tools/check-page-ready.ts +75 -0
- package/src/orchestrator/tools/extract-data.ts +92 -0
- package/src/orchestrator/tools/index.ts +11 -0
- package/src/orchestrator/tools/inspect-page.ts +42 -0
- package/src/orchestrator/tools/recall-history.ts +72 -0
- package/src/orchestrator/tools/take-screenshot.ts +128 -0
- package/src/orchestrator/types.ts +200 -0
- package/src/playwright-mcp-service.ts +23 -5
- package/src/progress-reporter.ts +109 -0
- package/src/prompts.ts +606 -69
- package/src/providers/backend-proxy-llm-provider.ts +91 -0
- package/src/providers/local-llm-provider.ts +38 -0
- package/src/scenario-service.ts +83 -13
- package/src/scenario-worker-class.ts +740 -72
- package/src/script-utils.ts +50 -5
- package/src/types.ts +13 -1
- package/src/utils/browser-utils.ts +123 -51
- package/src/utils/page-info-utils.ts +210 -53
- package/testchimp-runner-core-0.0.22.tgz +0 -0
package/dist/llm-facade.js
CHANGED
|
@@ -1,104 +1,77 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
3
|
exports.LLMFacade = void 0;
|
|
7
|
-
const axios_1 = __importDefault(require("axios"));
|
|
8
4
|
const prompts_1 = require("./prompts");
|
|
9
5
|
const types_1 = require("./types");
|
|
10
|
-
const
|
|
11
|
-
const env_loader_1 = require("./env-loader");
|
|
6
|
+
const model_constants_1 = require("./model-constants");
|
|
12
7
|
class LLMFacade {
|
|
13
|
-
constructor(
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
this.backendUrl = backendUrl;
|
|
17
|
-
console.log(`LLMFacade initialized with provided backend URL: ${this.backendUrl}`);
|
|
18
|
-
}
|
|
19
|
-
else {
|
|
20
|
-
// Fall back to environment configuration for backward compatibility
|
|
21
|
-
const envConfig = (0, env_loader_1.loadEnvConfig)();
|
|
22
|
-
this.backendUrl = envConfig.TESTCHIMP_BACKEND_URL;
|
|
23
|
-
console.log(`LLMFacade initialized with environment backend URL: ${this.backendUrl}`);
|
|
24
|
-
}
|
|
25
|
-
// Use provided auth config or try to create from environment
|
|
26
|
-
this.authConfig = authConfig || (0, auth_config_1.createAuthConfigFromEnv)();
|
|
27
|
-
if (!this.authConfig) {
|
|
28
|
-
console.warn('TestChimp authentication not configured. LLM calls may fail.');
|
|
29
|
-
}
|
|
8
|
+
constructor(llmProvider) {
|
|
9
|
+
this.llmProvider = llmProvider;
|
|
10
|
+
this.log('LLMFacade initialized with pluggable LLM provider');
|
|
30
11
|
}
|
|
31
12
|
/**
|
|
32
|
-
*
|
|
13
|
+
* Set token usage callback for tracking
|
|
33
14
|
*/
|
|
34
|
-
|
|
35
|
-
this.
|
|
15
|
+
setTokenUsageCallback(callback) {
|
|
16
|
+
this.tokenUsageCallback = callback;
|
|
36
17
|
}
|
|
37
18
|
/**
|
|
38
|
-
*
|
|
19
|
+
* Set a logger callback for capturing execution logs
|
|
39
20
|
*/
|
|
40
|
-
|
|
41
|
-
|
|
21
|
+
setLogger(logger) {
|
|
22
|
+
this.logger = logger;
|
|
23
|
+
this.llmProvider.setLogger?.(logger);
|
|
42
24
|
}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
25
|
+
/**
|
|
26
|
+
* Log a message using the configured logger
|
|
27
|
+
*/
|
|
28
|
+
log(message, level = 'log') {
|
|
29
|
+
if (this.logger) {
|
|
30
|
+
this.logger(message, level);
|
|
31
|
+
}
|
|
32
|
+
// Console fallback for debug visibility
|
|
33
|
+
if (level === 'error') {
|
|
34
|
+
console.error(message);
|
|
46
35
|
}
|
|
36
|
+
else if (level === 'warn') {
|
|
37
|
+
console.warn(message);
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
console.log(message);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
async callLLM(request) {
|
|
47
44
|
try {
|
|
48
|
-
const
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
headers: {
|
|
53
|
-
...authHeaders,
|
|
54
|
-
'Content-Type': 'application/json'
|
|
55
|
-
},
|
|
56
|
-
timeout: 30000 // 30 second timeout for LLM calls
|
|
57
|
-
});
|
|
58
|
-
if (response.data && response.data.answer) {
|
|
59
|
-
return response.data.answer;
|
|
60
|
-
}
|
|
61
|
-
else {
|
|
62
|
-
throw new Error('Invalid response from LLM backend');
|
|
45
|
+
const response = await this.llmProvider.callLLM(request);
|
|
46
|
+
// Report token usage if callback is set
|
|
47
|
+
if (response.usage && this.tokenUsageCallback) {
|
|
48
|
+
this.tokenUsageCallback(response.usage.inputTokens, response.usage.outputTokens, !!request.imageUrl);
|
|
63
49
|
}
|
|
50
|
+
return response;
|
|
64
51
|
}
|
|
65
52
|
catch (error) {
|
|
66
|
-
//
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
}
|
|
70
|
-
else if (error.response?.status === 401) {
|
|
71
|
-
throw new Error('Authentication failed. Please check your API credentials.');
|
|
72
|
-
}
|
|
73
|
-
else if (error.response?.status === 403) {
|
|
74
|
-
throw new Error('Access denied. Please check your API permissions.');
|
|
75
|
-
}
|
|
76
|
-
else if (error.response?.status === 429) {
|
|
77
|
-
throw new Error('Rate limit exceeded. Please try again later.');
|
|
78
|
-
}
|
|
79
|
-
else {
|
|
80
|
-
console.error('LLM call failed:', error);
|
|
81
|
-
throw new Error(`LLM call failed: ${error.message}`);
|
|
82
|
-
}
|
|
53
|
+
// Let provider handle its own error messages, just re-throw
|
|
54
|
+
this.log(`LLM call failed: ${error}`, 'error');
|
|
55
|
+
throw error;
|
|
83
56
|
}
|
|
84
57
|
}
|
|
85
58
|
/**
|
|
86
59
|
* Generate a test name from scenario description
|
|
87
60
|
*/
|
|
88
|
-
async generateTestName(scenario, model =
|
|
89
|
-
|
|
61
|
+
async generateTestName(scenario, model = model_constants_1.DEFAULT_MODEL) {
|
|
62
|
+
this.log('Generating test name with LLM...');
|
|
90
63
|
const request = {
|
|
91
64
|
model,
|
|
92
|
-
|
|
93
|
-
|
|
65
|
+
systemPrompt: prompts_1.PROMPTS.TEST_NAME_GENERATION.SYSTEM,
|
|
66
|
+
userPrompt: prompts_1.PROMPTS.TEST_NAME_GENERATION.USER(scenario)
|
|
94
67
|
};
|
|
95
68
|
try {
|
|
96
69
|
const response = await this.callLLM(request);
|
|
97
|
-
const testNameResponse = JSON.parse(response);
|
|
70
|
+
const testNameResponse = JSON.parse(response.answer);
|
|
98
71
|
return testNameResponse.testName;
|
|
99
72
|
}
|
|
100
73
|
catch (error) {
|
|
101
|
-
|
|
74
|
+
this.log(`Failed to generate test name: ${error}`, 'error');
|
|
102
75
|
// Fallback to a simple generated name
|
|
103
76
|
return `Test: ${scenario.substring(0, 50)}...`;
|
|
104
77
|
}
|
|
@@ -107,50 +80,263 @@ class LLMFacade {
|
|
|
107
80
|
* Generate hashtags for semantic grouping
|
|
108
81
|
*/
|
|
109
82
|
async generateHashtags(scenario, model = 'gpt-4o-mini') {
|
|
110
|
-
|
|
83
|
+
this.log('Generating hashtags with LLM...');
|
|
111
84
|
const request = {
|
|
112
85
|
model,
|
|
113
|
-
|
|
114
|
-
|
|
86
|
+
systemPrompt: prompts_1.PROMPTS.HASHTAG_GENERATION.SYSTEM,
|
|
87
|
+
userPrompt: prompts_1.PROMPTS.HASHTAG_GENERATION.USER(scenario)
|
|
115
88
|
};
|
|
116
89
|
try {
|
|
117
90
|
const response = await this.callLLM(request);
|
|
118
|
-
const hashtagResponse = JSON.parse(response);
|
|
91
|
+
const hashtagResponse = JSON.parse(response.answer);
|
|
119
92
|
return hashtagResponse.hashtags || [];
|
|
120
93
|
}
|
|
121
94
|
catch (error) {
|
|
122
|
-
|
|
95
|
+
this.log(`Failed to generate hashtags: ${error}`, 'error');
|
|
123
96
|
// Fallback to empty array
|
|
124
97
|
return [];
|
|
125
98
|
}
|
|
126
99
|
}
|
|
100
|
+
/**
|
|
101
|
+
* Check if a goal has been completed based on actions taken and current page state
|
|
102
|
+
*/
|
|
103
|
+
async checkGoalCompletion(goalDescription, completedActions, pageInfo, model = model_constants_1.DEFAULT_MODEL) {
|
|
104
|
+
this.log('Checking goal completion with LLM...');
|
|
105
|
+
const request = {
|
|
106
|
+
model,
|
|
107
|
+
systemPrompt: prompts_1.PROMPTS.GOAL_COMPLETION_CHECK.SYSTEM,
|
|
108
|
+
userPrompt: prompts_1.PROMPTS.GOAL_COMPLETION_CHECK.USER(goalDescription, completedActions, pageInfo)
|
|
109
|
+
};
|
|
110
|
+
try {
|
|
111
|
+
const response = await this.callLLM(request);
|
|
112
|
+
return JSON.parse(response.answer);
|
|
113
|
+
}
|
|
114
|
+
catch (error) {
|
|
115
|
+
this.log(`Failed to check goal completion: ${error}`, 'error');
|
|
116
|
+
// Conservative fallback - assume not complete if we can't determine
|
|
117
|
+
return {
|
|
118
|
+
isComplete: false,
|
|
119
|
+
reason: 'Error checking completion status'
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Check goal completion with visual verification (uses vision model)
|
|
125
|
+
*/
|
|
126
|
+
async checkGoalCompletionWithVision(goalDescription, completedActions, pageInfo, imageDataUrl, model = model_constants_1.VISION_MODEL) {
|
|
127
|
+
this.log(`👔 Checking goal completion with vision (${model})...`);
|
|
128
|
+
const request = {
|
|
129
|
+
model,
|
|
130
|
+
systemPrompt: `You are checking if a test automation goal has been completed by analyzing both DOM state and visual appearance.
|
|
131
|
+
|
|
132
|
+
CRITICAL: For action goals (login, submit, click, navigate), check if the PRIMARY ACTION and its SIDE EFFECTS are complete:
|
|
133
|
+
- "Login" = Fill fields AND click button AND verify navigation/page change
|
|
134
|
+
- "Submit form" = Fill fields AND click submit AND verify submission (success message/page change)
|
|
135
|
+
- "Click X" = Click X AND verify expected page change or UI update
|
|
136
|
+
|
|
137
|
+
For verification goals (verify, check, confirm), verify the VISUAL PRESENCE of expected elements.`,
|
|
138
|
+
userPrompt: `GOAL: ${goalDescription}
|
|
139
|
+
|
|
140
|
+
ACTIONS COMPLETED:
|
|
141
|
+
${completedActions.map((action, i) => `${i + 1}. ${action}`).join('\n')}
|
|
142
|
+
|
|
143
|
+
CURRENT PAGE STATE:
|
|
144
|
+
URL: ${pageInfo.url}
|
|
145
|
+
Title: ${pageInfo.title}
|
|
146
|
+
Interactive Elements:
|
|
147
|
+
${pageInfo.formattedElements}
|
|
148
|
+
|
|
149
|
+
Based on the screenshot AND page state, is this goal COMPLETE?
|
|
150
|
+
|
|
151
|
+
Respond ONLY with valid JSON:
|
|
152
|
+
{
|
|
153
|
+
"isComplete": true/false,
|
|
154
|
+
"reason": "Brief explanation based on what you SEE in the screenshot and DOM",
|
|
155
|
+
"nextSubGoal": "If incomplete, what specific next action is needed?"
|
|
156
|
+
}`,
|
|
157
|
+
imageUrl: imageDataUrl
|
|
158
|
+
};
|
|
159
|
+
try {
|
|
160
|
+
const response = await this.callLLM(request);
|
|
161
|
+
const parsed = JSON.parse(response.answer);
|
|
162
|
+
this.log(`👔 Vision goal check result: ${parsed.isComplete ? 'COMPLETE ✅' : 'INCOMPLETE ❌'}`);
|
|
163
|
+
return parsed;
|
|
164
|
+
}
|
|
165
|
+
catch (error) {
|
|
166
|
+
this.log(`Failed to check goal completion with vision: ${error}`, 'error');
|
|
167
|
+
// Conservative fallback - assume not complete if we can't determine
|
|
168
|
+
return {
|
|
169
|
+
isComplete: false,
|
|
170
|
+
reason: 'Error checking completion status with vision'
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Ask LLM if a screenshot would help debug the current failure
|
|
176
|
+
*/
|
|
177
|
+
async assessScreenshotNeed(stepDescription, errorMessage, attemptCount, pageInfo, model = model_constants_1.DEFAULT_MODEL) {
|
|
178
|
+
this.log('Assessing screenshot need with LLM...');
|
|
179
|
+
const request = {
|
|
180
|
+
model,
|
|
181
|
+
systemPrompt: prompts_1.PROMPTS.SCREENSHOT_NEED_ASSESSMENT.SYSTEM,
|
|
182
|
+
userPrompt: prompts_1.PROMPTS.SCREENSHOT_NEED_ASSESSMENT.USER(stepDescription, errorMessage, attemptCount, pageInfo)
|
|
183
|
+
};
|
|
184
|
+
try {
|
|
185
|
+
const response = await this.callLLM(request);
|
|
186
|
+
return JSON.parse(response.answer);
|
|
187
|
+
}
|
|
188
|
+
catch (error) {
|
|
189
|
+
this.log(`Failed to assess screenshot need: ${error}`, 'error');
|
|
190
|
+
// Conservative fallback - don't use expensive screenshot unless we're sure
|
|
191
|
+
return {
|
|
192
|
+
needsScreenshot: false,
|
|
193
|
+
reason: 'Error assessing need, defaulting to no screenshot'
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Get diagnostic analysis from screenshot (supervisor role)
|
|
199
|
+
*/
|
|
200
|
+
async getVisionDiagnostics(stepDescription, pageInfo, previousSteps, lastError, imageDataUrl, model = model_constants_1.VISION_MODEL) {
|
|
201
|
+
this.log('👔 SUPERVISOR: Analyzing screenshot for diagnostic insights...');
|
|
202
|
+
const previousCommands = previousSteps
|
|
203
|
+
.map(s => s.playwrightCommand)
|
|
204
|
+
.filter(Boolean)
|
|
205
|
+
.join('\n');
|
|
206
|
+
const attemptHistory = previousSteps.length > 0
|
|
207
|
+
? `Previous attempts context: ${previousSteps.length} commands executed`
|
|
208
|
+
: '';
|
|
209
|
+
const errorContext = lastError
|
|
210
|
+
? `Last Error: ${lastError}`
|
|
211
|
+
: '';
|
|
212
|
+
const request = {
|
|
213
|
+
model,
|
|
214
|
+
systemPrompt: prompts_1.PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.SYSTEM,
|
|
215
|
+
userPrompt: prompts_1.PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.USER(stepDescription, pageInfo, previousCommands, attemptHistory, errorContext),
|
|
216
|
+
imageUrl: imageDataUrl
|
|
217
|
+
};
|
|
218
|
+
try {
|
|
219
|
+
const response = await this.callLLM(request);
|
|
220
|
+
const diagnostics = JSON.parse(response.answer);
|
|
221
|
+
// Log supervisor's findings
|
|
222
|
+
this.log(`👔 SUPERVISOR ANALYSIS:`);
|
|
223
|
+
this.log(` 📸 Visual: ${diagnostics.visualAnalysis}`);
|
|
224
|
+
this.log(` 🔍 Root cause: ${diagnostics.rootCause}`);
|
|
225
|
+
this.log(` 📋 Instructions: ${diagnostics.specificInstructions}`);
|
|
226
|
+
this.log(` 💡 Approach: ${diagnostics.recommendedApproach}`);
|
|
227
|
+
if (diagnostics.elementsFound?.length > 0) {
|
|
228
|
+
this.log(` ✅ Elements found: ${diagnostics.elementsFound.join(', ')}`);
|
|
229
|
+
}
|
|
230
|
+
if (diagnostics.elementsNotFound?.length > 0) {
|
|
231
|
+
this.log(` ❌ Elements NOT found: ${diagnostics.elementsNotFound.join(', ')}`);
|
|
232
|
+
}
|
|
233
|
+
return diagnostics;
|
|
234
|
+
}
|
|
235
|
+
catch (error) {
|
|
236
|
+
this.log(`Failed to get vision diagnostics: ${error}`, 'error');
|
|
237
|
+
throw new Error(`Vision diagnostic analysis failed: ${error}`);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Generate command based on supervisor's instructions
|
|
242
|
+
*/
|
|
243
|
+
async generateCommandFromSupervisorInstructions(stepDescription, supervisorDiagnostics, pageInfo, model = model_constants_1.DEFAULT_MODEL) {
|
|
244
|
+
this.log('🔨 WORKER: Generating command based on supervisor instructions...');
|
|
245
|
+
const request = {
|
|
246
|
+
model,
|
|
247
|
+
systemPrompt: prompts_1.PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.SYSTEM,
|
|
248
|
+
userPrompt: prompts_1.PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.USER(stepDescription, supervisorDiagnostics.specificInstructions, supervisorDiagnostics.visualAnalysis, supervisorDiagnostics.elementsFound || [], supervisorDiagnostics.elementsNotFound || [], pageInfo)
|
|
249
|
+
};
|
|
250
|
+
try {
|
|
251
|
+
const response = await this.callLLM(request);
|
|
252
|
+
const commandResponse = JSON.parse(response.answer);
|
|
253
|
+
if (commandResponse.reasoning) {
|
|
254
|
+
this.log(`🔨 WORKER reasoning: ${commandResponse.reasoning}`);
|
|
255
|
+
}
|
|
256
|
+
return commandResponse.command;
|
|
257
|
+
}
|
|
258
|
+
catch (error) {
|
|
259
|
+
this.log(`Failed to generate command from supervisor instructions: ${error}`, 'error');
|
|
260
|
+
throw new Error(`Command generation from supervisor instructions failed: ${error}`);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Generate Playwright command with vision (uses vision model)
|
|
265
|
+
*/
|
|
266
|
+
async generatePlaywrightCommandWithVision(stepDescription, pageInfo, previousSteps, lastError, imageDataUrl, // Full data URL: data:image/png;base64,...
|
|
267
|
+
model = model_constants_1.VISION_MODEL) {
|
|
268
|
+
this.log(`⚠️ USING VISION MODE (${model})...`);
|
|
269
|
+
const previousCommands = previousSteps
|
|
270
|
+
.map(s => s.playwrightCommand)
|
|
271
|
+
.filter(Boolean)
|
|
272
|
+
.join('\n');
|
|
273
|
+
const attemptHistory = previousSteps.length > 0
|
|
274
|
+
? `Previous attempts context: ${previousSteps.length} commands executed`
|
|
275
|
+
: '';
|
|
276
|
+
const errorContext = lastError
|
|
277
|
+
? `Last Error: ${lastError}`
|
|
278
|
+
: '';
|
|
279
|
+
const request = {
|
|
280
|
+
model,
|
|
281
|
+
systemPrompt: prompts_1.PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.SYSTEM,
|
|
282
|
+
userPrompt: prompts_1.PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.USER(stepDescription, pageInfo, previousCommands, attemptHistory, errorContext),
|
|
283
|
+
imageUrl: imageDataUrl // Full data URL constructed by client
|
|
284
|
+
};
|
|
285
|
+
try {
|
|
286
|
+
const response = await this.callLLM(request);
|
|
287
|
+
const commandResponse = JSON.parse(response.answer);
|
|
288
|
+
// Log diagnostic insights from vision analysis
|
|
289
|
+
if (commandResponse.visualInsights) {
|
|
290
|
+
this.log(`📸 Visual insights: ${commandResponse.visualInsights}`);
|
|
291
|
+
}
|
|
292
|
+
if (commandResponse.failureRootCause) {
|
|
293
|
+
this.log(`🔍 Root cause analysis: ${commandResponse.failureRootCause}`);
|
|
294
|
+
}
|
|
295
|
+
if (commandResponse.recommendedAlternative) {
|
|
296
|
+
this.log(`💡 Recommended alternative: ${commandResponse.recommendedAlternative}`);
|
|
297
|
+
}
|
|
298
|
+
if (commandResponse.reasoning) {
|
|
299
|
+
this.log(`🧠 Vision-based reasoning: ${commandResponse.reasoning}`);
|
|
300
|
+
}
|
|
301
|
+
return commandResponse.command;
|
|
302
|
+
}
|
|
303
|
+
catch (error) {
|
|
304
|
+
this.log(`Failed to generate command with vision: ${error}`, 'error');
|
|
305
|
+
throw new Error(`Vision-enhanced command generation failed: ${error}`);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
127
308
|
/**
|
|
128
309
|
* Break down scenario into steps
|
|
129
310
|
*/
|
|
130
|
-
async breakdownScenario(scenario, model =
|
|
131
|
-
|
|
311
|
+
async breakdownScenario(scenario, model = model_constants_1.DEFAULT_MODEL) {
|
|
312
|
+
this.log('Breaking down scenario with LLM...');
|
|
313
|
+
this.log(`📝 INPUT SCENARIO: ${scenario}`);
|
|
132
314
|
const request = {
|
|
133
315
|
model,
|
|
134
|
-
|
|
135
|
-
|
|
316
|
+
systemPrompt: prompts_1.PROMPTS.SCENARIO_BREAKDOWN.SYSTEM,
|
|
317
|
+
userPrompt: prompts_1.PROMPTS.SCENARIO_BREAKDOWN.USER(scenario)
|
|
136
318
|
};
|
|
137
319
|
try {
|
|
138
320
|
const response = await this.callLLM(request);
|
|
139
|
-
|
|
321
|
+
this.log(`🤖 RAW LLM RESPONSE: ${response.answer}`);
|
|
322
|
+
const breakdownResponse = JSON.parse(response.answer);
|
|
323
|
+
this.log(`📋 PARSED BREAKDOWN: ${JSON.stringify(breakdownResponse, null, 2)}`);
|
|
140
324
|
// Validate and clean up steps
|
|
141
325
|
const cleanedSteps = breakdownResponse.steps
|
|
142
326
|
.map(step => step.trim())
|
|
143
327
|
.filter(step => step.length > 0)
|
|
144
328
|
.slice(0, 10); // Limit to 10 steps max
|
|
329
|
+
this.log(`✅ CLEANED STEPS: ${JSON.stringify(cleanedSteps, null, 2)}`);
|
|
145
330
|
return cleanedSteps.map((desc, index) => ({
|
|
146
331
|
stepNumber: index + 1,
|
|
147
332
|
description: desc,
|
|
148
333
|
}));
|
|
149
334
|
}
|
|
150
335
|
catch (error) {
|
|
151
|
-
|
|
152
|
-
// Fallback to simple breakdown
|
|
153
|
-
const stepDescriptions = scenario.split('
|
|
336
|
+
this.log(`❌ Failed to breakdown scenario: ${error}`, 'error');
|
|
337
|
+
// Fallback to simple breakdown by newlines (preserves URLs)
|
|
338
|
+
const stepDescriptions = scenario.split('\n').map(s => s.trim()).filter(s => s.length > 0);
|
|
339
|
+
this.log(`🔄 FALLBACK STEPS: ${JSON.stringify(stepDescriptions, null, 2)}`);
|
|
154
340
|
return stepDescriptions.map((desc, index) => ({
|
|
155
341
|
stepNumber: index + 1,
|
|
156
342
|
description: desc,
|
|
@@ -160,8 +346,8 @@ class LLMFacade {
|
|
|
160
346
|
/**
|
|
161
347
|
* Generate Playwright command for a step
|
|
162
348
|
*/
|
|
163
|
-
async generatePlaywrightCommand(stepDescription, pageInfo, previousSteps, lastError, currentStep, model =
|
|
164
|
-
|
|
349
|
+
async generatePlaywrightCommand(stepDescription, pageInfo, previousSteps, lastError, currentStep, model = model_constants_1.DEFAULT_MODEL) {
|
|
350
|
+
this.log('Generating Playwright command with LLM...');
|
|
165
351
|
const previousCommands = previousSteps
|
|
166
352
|
.filter(s => s.playwrightCommand && s.success)
|
|
167
353
|
.map(s => `// Step ${s.stepNumber}: ${s.description}\n${s.playwrightCommand}`)
|
|
@@ -173,16 +359,16 @@ class LLMFacade {
|
|
|
173
359
|
const prompt = prompts_1.PROMPTS.PLAYWRIGHT_COMMAND.USER(stepDescription, pageInfo, previousCommands, attemptHistory, errorContext);
|
|
174
360
|
const request = {
|
|
175
361
|
model,
|
|
176
|
-
|
|
177
|
-
|
|
362
|
+
systemPrompt: prompts_1.PROMPTS.PLAYWRIGHT_COMMAND.SYSTEM,
|
|
363
|
+
userPrompt: prompt
|
|
178
364
|
};
|
|
179
365
|
try {
|
|
180
366
|
const response = await this.callLLM(request);
|
|
181
|
-
const commandResponse = JSON.parse(response);
|
|
367
|
+
const commandResponse = JSON.parse(response.answer);
|
|
182
368
|
return commandResponse.command;
|
|
183
369
|
}
|
|
184
370
|
catch (error) {
|
|
185
|
-
|
|
371
|
+
this.log(`Failed to generate Playwright command: ${error}`, 'error');
|
|
186
372
|
return null;
|
|
187
373
|
}
|
|
188
374
|
}
|
|
@@ -190,41 +376,45 @@ class LLMFacade {
|
|
|
190
376
|
* Parse script into steps for AI repair
|
|
191
377
|
*/
|
|
192
378
|
async parseScriptIntoSteps(script, model = 'gpt-4o-mini') {
|
|
379
|
+
this.log('Parsing script into steps with LLM...');
|
|
193
380
|
const request = {
|
|
194
381
|
model,
|
|
195
|
-
|
|
196
|
-
|
|
382
|
+
systemPrompt: prompts_1.PROMPTS.SCRIPT_PARSING.SYSTEM,
|
|
383
|
+
userPrompt: prompts_1.PROMPTS.SCRIPT_PARSING.USER(script)
|
|
197
384
|
};
|
|
198
385
|
try {
|
|
199
386
|
const response = await this.callLLM(request);
|
|
200
|
-
|
|
387
|
+
this.log(`Raw LLM parsing response (first 500 chars): ${response.answer.substring(0, 500)}`);
|
|
388
|
+
const parsed = JSON.parse(response.answer);
|
|
389
|
+
this.log(`Parsed JSON structure: ${JSON.stringify(parsed, null, 2).substring(0, 1000)}`);
|
|
201
390
|
// Expect JSON object with steps array
|
|
202
391
|
if (parsed.steps && Array.isArray(parsed.steps)) {
|
|
392
|
+
this.log(`LLM parsing successful, got ${parsed.steps.length} steps`);
|
|
203
393
|
return parsed.steps;
|
|
204
394
|
}
|
|
205
395
|
else {
|
|
206
|
-
|
|
396
|
+
this.log(`Unexpected LLM response format - expected {steps: [...]}: ${JSON.stringify(parsed)}`, 'error');
|
|
207
397
|
return [];
|
|
208
398
|
}
|
|
209
399
|
}
|
|
210
400
|
catch (error) {
|
|
211
|
-
|
|
401
|
+
this.log(`Failed to parse LLM response as JSON: ${error}`, 'error');
|
|
212
402
|
return [];
|
|
213
403
|
}
|
|
214
404
|
}
|
|
215
405
|
/**
|
|
216
406
|
* Get repair suggestion for a failing step
|
|
217
407
|
*/
|
|
218
|
-
async getRepairSuggestion(stepDescription, stepCode, errorMessage, pageInfo, failureHistory, recentRepairs, model =
|
|
408
|
+
async getRepairSuggestion(stepDescription, stepCode, errorMessage, pageInfo, failureHistory, recentRepairs, model = model_constants_1.DEFAULT_MODEL) {
|
|
219
409
|
const request = {
|
|
220
410
|
model,
|
|
221
|
-
|
|
222
|
-
|
|
411
|
+
systemPrompt: prompts_1.PROMPTS.REPAIR_SUGGESTION.SYSTEM,
|
|
412
|
+
userPrompt: prompts_1.PROMPTS.REPAIR_SUGGESTION.USER(stepDescription, stepCode, errorMessage, pageInfo, failureHistory, recentRepairs)
|
|
223
413
|
};
|
|
224
414
|
const response = await this.callLLM(request);
|
|
225
|
-
|
|
226
|
-
const parsed = JSON.parse(response);
|
|
227
|
-
|
|
415
|
+
this.log(`🤖 LLM Repair Response: ${response.answer}`);
|
|
416
|
+
const parsed = JSON.parse(response.answer);
|
|
417
|
+
this.log(`🤖 Parsed Repair Action: ${JSON.stringify(parsed)}`);
|
|
228
418
|
// Convert string operation to enum
|
|
229
419
|
if (parsed.action && parsed.action.operation) {
|
|
230
420
|
switch (parsed.action.operation) {
|
|
@@ -246,14 +436,14 @@ class LLMFacade {
|
|
|
246
436
|
/**
|
|
247
437
|
* Assess repair confidence and generate advice
|
|
248
438
|
*/
|
|
249
|
-
async assessRepairConfidence(originalScript, updatedScript, model =
|
|
439
|
+
async assessRepairConfidence(originalScript, updatedScript, model = model_constants_1.DEFAULT_MODEL) {
|
|
250
440
|
const request = {
|
|
251
441
|
model,
|
|
252
|
-
|
|
253
|
-
|
|
442
|
+
systemPrompt: prompts_1.PROMPTS.REPAIR_CONFIDENCE.SYSTEM,
|
|
443
|
+
userPrompt: prompts_1.PROMPTS.REPAIR_CONFIDENCE.USER(originalScript, updatedScript)
|
|
254
444
|
};
|
|
255
445
|
const response = await this.callLLM(request);
|
|
256
|
-
return JSON.parse(response);
|
|
446
|
+
return JSON.parse(response.answer);
|
|
257
447
|
}
|
|
258
448
|
/**
|
|
259
449
|
* Generate final script with repair advice
|
|
@@ -261,16 +451,16 @@ class LLMFacade {
|
|
|
261
451
|
async generateFinalScript(originalScript, updatedScript, newRepairAdvice, model = 'gpt-4o-mini') {
|
|
262
452
|
const request = {
|
|
263
453
|
model,
|
|
264
|
-
|
|
265
|
-
|
|
454
|
+
systemPrompt: prompts_1.PROMPTS.FINAL_SCRIPT.SYSTEM,
|
|
455
|
+
userPrompt: prompts_1.PROMPTS.FINAL_SCRIPT.USER(originalScript, updatedScript, newRepairAdvice)
|
|
266
456
|
};
|
|
267
457
|
const response = await this.callLLM(request);
|
|
268
458
|
try {
|
|
269
|
-
const parsed = JSON.parse(response);
|
|
459
|
+
const parsed = JSON.parse(response.answer);
|
|
270
460
|
return parsed.script || updatedScript;
|
|
271
461
|
}
|
|
272
462
|
catch (error) {
|
|
273
|
-
|
|
463
|
+
this.log(`Failed to parse final script response: ${error}`, 'error');
|
|
274
464
|
return updatedScript;
|
|
275
465
|
}
|
|
276
466
|
}
|
|
@@ -311,15 +501,77 @@ LEARNING FROM FAILURES:
|
|
|
311
501
|
if (currentStep?.error)
|
|
312
502
|
errors.push(currentStep.error);
|
|
313
503
|
const errorText = errors.join(' | ');
|
|
504
|
+
// Detect if we're repeatedly looking for elements that don't exist
|
|
505
|
+
const attemptedCommands = currentStep?.attempts
|
|
506
|
+
?.map(a => a.command)
|
|
507
|
+
.filter(Boolean) || [];
|
|
508
|
+
const lookingForNonExistent = attemptedCommands.some(cmd => cmd?.includes('getByText') ||
|
|
509
|
+
cmd?.includes('toBeVisible') ||
|
|
510
|
+
cmd?.includes('waitFor')) && errors.some(err => err.includes('not found') ||
|
|
511
|
+
err.includes('Timeout') ||
|
|
512
|
+
err.includes('Expected: visible'));
|
|
513
|
+
let hallucinationWarning = '';
|
|
514
|
+
if (lookingForNonExistent && attemptedCommands.length >= 2) {
|
|
515
|
+
hallucinationWarning = `
|
|
516
|
+
⚠️ HALLUCINATION ALERT:
|
|
517
|
+
You've made ${attemptedCommands.length} attempts trying to find/verify elements that don't exist.
|
|
518
|
+
STOP looking for these elements. They are NOT in the DOM.
|
|
519
|
+
Instead:
|
|
520
|
+
- Check if the goal is ALREADY COMPLETE (action succeeded = goal done)
|
|
521
|
+
- Use alternative verification (state changes, network, page load)
|
|
522
|
+
- Move on if the primary action succeeded
|
|
523
|
+
`;
|
|
524
|
+
}
|
|
314
525
|
return `ERROR CONTEXT:
|
|
315
526
|
Last Error: ${errorText}
|
|
316
|
-
|
|
527
|
+
${hallucinationWarning}
|
|
317
528
|
ANALYZE THE ERROR AND ADAPT:
|
|
318
529
|
- Study the error message to understand what went wrong
|
|
530
|
+
- If element "not found" after 2+ attempts, it probably doesn't exist - stop looking for it
|
|
319
531
|
- Try a completely different approach than what failed
|
|
320
532
|
- Consider alternative selectors, timing, or interaction methods
|
|
321
533
|
- Never repeat the exact same command that failed`;
|
|
322
534
|
}
|
|
535
|
+
/**
|
|
536
|
+
* Cleanup generated script - remove redundancies and make minor adjustments
|
|
537
|
+
*/
|
|
538
|
+
async cleanupScript(script, model) {
|
|
539
|
+
try {
|
|
540
|
+
const response = await this.llmProvider.callLLM({
|
|
541
|
+
model: model || model_constants_1.DEFAULT_MODEL,
|
|
542
|
+
systemPrompt: prompts_1.PROMPTS.SCRIPT_CLEANUP.SYSTEM,
|
|
543
|
+
userPrompt: prompts_1.PROMPTS.SCRIPT_CLEANUP.USER(script)
|
|
544
|
+
});
|
|
545
|
+
// Parse JSON response
|
|
546
|
+
const jsonMatch = response.answer.match(/\{[\s\S]*\}/);
|
|
547
|
+
if (!jsonMatch) {
|
|
548
|
+
console.log('[LLMFacade] Cleanup response not in JSON format, returning original script');
|
|
549
|
+
return { script, changes: [], skipped: 'Response not in JSON format' };
|
|
550
|
+
}
|
|
551
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
552
|
+
// Validate response
|
|
553
|
+
if (!parsed.script) {
|
|
554
|
+
console.log('[LLMFacade] Cleanup response missing script field, returning original');
|
|
555
|
+
return { script, changes: [], skipped: 'Invalid response format' };
|
|
556
|
+
}
|
|
557
|
+
console.log(`[LLMFacade] Script cleanup completed. Changes: ${parsed.changes?.length || 0}`);
|
|
558
|
+
if (parsed.changes && parsed.changes.length > 0) {
|
|
559
|
+
parsed.changes.forEach((change, i) => {
|
|
560
|
+
console.log(`[LLMFacade] ${i + 1}. ${change}`);
|
|
561
|
+
});
|
|
562
|
+
}
|
|
563
|
+
return {
|
|
564
|
+
script: parsed.script,
|
|
565
|
+
changes: parsed.changes || [],
|
|
566
|
+
skipped: parsed.skipped
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
catch (error) {
|
|
570
|
+
console.error('[LLMFacade] Script cleanup failed:', error.message);
|
|
571
|
+
// Return original script on error
|
|
572
|
+
return { script, changes: [], skipped: `Error: ${error.message}` };
|
|
573
|
+
}
|
|
574
|
+
}
|
|
323
575
|
}
|
|
324
576
|
exports.LLMFacade = LLMFacade;
|
|
325
577
|
//# sourceMappingURL=llm-facade.js.map
|