testchimp-runner-core 0.0.21 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VISION_DIAGNOSTICS_IMPROVEMENTS.md +336 -0
- package/dist/credit-usage-service.d.ts +9 -0
- package/dist/credit-usage-service.d.ts.map +1 -1
- package/dist/credit-usage-service.js +20 -5
- package/dist/credit-usage-service.js.map +1 -1
- package/dist/execution-service.d.ts +7 -2
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +91 -36
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +30 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +91 -26
- package/dist/index.js.map +1 -1
- package/dist/llm-facade.d.ts +64 -8
- package/dist/llm-facade.d.ts.map +1 -1
- package/dist/llm-facade.js +361 -109
- package/dist/llm-facade.js.map +1 -1
- package/dist/llm-provider.d.ts +39 -0
- package/dist/llm-provider.d.ts.map +1 -0
- package/dist/llm-provider.js +7 -0
- package/dist/llm-provider.js.map +1 -0
- package/dist/model-constants.d.ts +21 -0
- package/dist/model-constants.d.ts.map +1 -0
- package/dist/model-constants.js +24 -0
- package/dist/model-constants.js.map +1 -0
- package/dist/orchestrator/index.d.ts +8 -0
- package/dist/orchestrator/index.d.ts.map +1 -0
- package/dist/orchestrator/index.js +23 -0
- package/dist/orchestrator/index.js.map +1 -0
- package/dist/orchestrator/orchestrator-agent.d.ts +66 -0
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -0
- package/dist/orchestrator/orchestrator-agent.js +855 -0
- package/dist/orchestrator/orchestrator-agent.js.map +1 -0
- package/dist/orchestrator/tool-registry.d.ts +74 -0
- package/dist/orchestrator/tool-registry.d.ts.map +1 -0
- package/dist/orchestrator/tool-registry.js +131 -0
- package/dist/orchestrator/tool-registry.js.map +1 -0
- package/dist/orchestrator/tools/check-page-ready.d.ts +13 -0
- package/dist/orchestrator/tools/check-page-ready.d.ts.map +1 -0
- package/dist/orchestrator/tools/check-page-ready.js +72 -0
- package/dist/orchestrator/tools/check-page-ready.js.map +1 -0
- package/dist/orchestrator/tools/extract-data.d.ts +13 -0
- package/dist/orchestrator/tools/extract-data.d.ts.map +1 -0
- package/dist/orchestrator/tools/extract-data.js +84 -0
- package/dist/orchestrator/tools/extract-data.js.map +1 -0
- package/dist/orchestrator/tools/index.d.ts +10 -0
- package/dist/orchestrator/tools/index.d.ts.map +1 -0
- package/dist/orchestrator/tools/index.js +18 -0
- package/dist/orchestrator/tools/index.js.map +1 -0
- package/dist/orchestrator/tools/inspect-page.d.ts +13 -0
- package/dist/orchestrator/tools/inspect-page.d.ts.map +1 -0
- package/dist/orchestrator/tools/inspect-page.js +39 -0
- package/dist/orchestrator/tools/inspect-page.js.map +1 -0
- package/dist/orchestrator/tools/recall-history.d.ts +13 -0
- package/dist/orchestrator/tools/recall-history.d.ts.map +1 -0
- package/dist/orchestrator/tools/recall-history.js +64 -0
- package/dist/orchestrator/tools/recall-history.js.map +1 -0
- package/dist/orchestrator/tools/take-screenshot.d.ts +15 -0
- package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -0
- package/dist/orchestrator/tools/take-screenshot.js +112 -0
- package/dist/orchestrator/tools/take-screenshot.js.map +1 -0
- package/dist/orchestrator/types.d.ts +133 -0
- package/dist/orchestrator/types.d.ts.map +1 -0
- package/dist/orchestrator/types.js +28 -0
- package/dist/orchestrator/types.js.map +1 -0
- package/dist/playwright-mcp-service.d.ts +9 -0
- package/dist/playwright-mcp-service.d.ts.map +1 -1
- package/dist/playwright-mcp-service.js +20 -5
- package/dist/playwright-mcp-service.js.map +1 -1
- package/dist/progress-reporter.d.ts +97 -0
- package/dist/progress-reporter.d.ts.map +1 -0
- package/dist/progress-reporter.js +18 -0
- package/dist/progress-reporter.js.map +1 -0
- package/dist/prompts.d.ts +24 -0
- package/dist/prompts.d.ts.map +1 -1
- package/dist/prompts.js +593 -68
- package/dist/prompts.js.map +1 -1
- package/dist/providers/backend-proxy-llm-provider.d.ts +25 -0
- package/dist/providers/backend-proxy-llm-provider.d.ts.map +1 -0
- package/dist/providers/backend-proxy-llm-provider.js +76 -0
- package/dist/providers/backend-proxy-llm-provider.js.map +1 -0
- package/dist/providers/local-llm-provider.d.ts +21 -0
- package/dist/providers/local-llm-provider.d.ts.map +1 -0
- package/dist/providers/local-llm-provider.js +35 -0
- package/dist/providers/local-llm-provider.js.map +1 -0
- package/dist/scenario-service.d.ts +27 -1
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +48 -12
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +39 -2
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +614 -86
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/script-utils.d.ts +2 -0
- package/dist/script-utils.d.ts.map +1 -1
- package/dist/script-utils.js +44 -4
- package/dist/script-utils.js.map +1 -1
- package/dist/types.d.ts +11 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/browser-utils.d.ts +20 -1
- package/dist/utils/browser-utils.d.ts.map +1 -1
- package/dist/utils/browser-utils.js +102 -51
- package/dist/utils/browser-utils.js.map +1 -1
- package/dist/utils/page-info-utils.d.ts +23 -4
- package/dist/utils/page-info-utils.d.ts.map +1 -1
- package/dist/utils/page-info-utils.js +174 -43
- package/dist/utils/page-info-utils.js.map +1 -1
- package/package.json +1 -2
- package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +642 -0
- package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +844 -0
- package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +539 -0
- package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +241 -0
- package/plandocs/PHASE1_FINAL_STATUS.md +210 -0
- package/plandocs/PLANNING_SESSION_SUMMARY.md +372 -0
- package/plandocs/SCRIPT_CLEANUP_FEATURE.md +201 -0
- package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +364 -0
- package/plandocs/SELECTOR_IMPROVEMENTS.md +139 -0
- package/src/credit-usage-service.ts +23 -5
- package/src/execution-service.ts +152 -42
- package/src/index.ts +169 -26
- package/src/llm-facade.ts +500 -126
- package/src/llm-provider.ts +43 -0
- package/src/model-constants.ts +23 -0
- package/src/orchestrator/index.ts +33 -0
- package/src/orchestrator/orchestrator-agent.ts +1037 -0
- package/src/orchestrator/tool-registry.ts +182 -0
- package/src/orchestrator/tools/check-page-ready.ts +75 -0
- package/src/orchestrator/tools/extract-data.ts +92 -0
- package/src/orchestrator/tools/index.ts +11 -0
- package/src/orchestrator/tools/inspect-page.ts +42 -0
- package/src/orchestrator/tools/recall-history.ts +72 -0
- package/src/orchestrator/tools/take-screenshot.ts +128 -0
- package/src/orchestrator/types.ts +200 -0
- package/src/playwright-mcp-service.ts +23 -5
- package/src/progress-reporter.ts +109 -0
- package/src/prompts.ts +606 -69
- package/src/providers/backend-proxy-llm-provider.ts +91 -0
- package/src/providers/local-llm-provider.ts +38 -0
- package/src/scenario-service.ts +83 -13
- package/src/scenario-worker-class.ts +740 -72
- package/src/script-utils.ts +50 -5
- package/src/types.ts +13 -1
- package/src/utils/browser-utils.ts +123 -51
- package/src/utils/page-info-utils.ts +210 -53
- package/testchimp-runner-core-0.0.22.tgz +0 -0
package/src/llm-facade.ts
CHANGED
|
@@ -1,20 +1,8 @@
|
|
|
1
|
-
import axios from 'axios';
|
|
2
1
|
import { PROMPTS } from './prompts';
|
|
3
2
|
import { PageInfo } from './utils/page-info-utils';
|
|
4
3
|
import { StepOperation } from './types';
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
|
|
8
|
-
// LLM Request/Response interfaces for backend proxy
|
|
9
|
-
interface CallLLMRequest {
|
|
10
|
-
model?: string;
|
|
11
|
-
system_prompt?: string;
|
|
12
|
-
user_prompt?: string;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
interface CallLLMResponse {
|
|
16
|
-
answer?: string;
|
|
17
|
-
}
|
|
4
|
+
import { DEFAULT_MODEL, VISION_MODEL } from './model-constants';
|
|
5
|
+
import { LLMProvider, LLMRequest, LLMResponse } from './llm-provider';
|
|
18
6
|
|
|
19
7
|
// LLM Response interfaces
|
|
20
8
|
export interface LLMScenarioBreakdownResponse {
|
|
@@ -49,6 +37,27 @@ export interface RepairConfidenceResponse {
|
|
|
49
37
|
advice: string;
|
|
50
38
|
}
|
|
51
39
|
|
|
40
|
+
export interface GoalCompletionResponse {
|
|
41
|
+
isComplete: boolean;
|
|
42
|
+
reason: string;
|
|
43
|
+
nextSubGoal?: string;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface ScreenshotNeedResponse {
|
|
47
|
+
needsScreenshot: boolean;
|
|
48
|
+
reason: string;
|
|
49
|
+
alternativeApproach?: string;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface VisionDiagnosticResponse {
|
|
53
|
+
visualAnalysis: string; // What the supervisor sees in the screenshot
|
|
54
|
+
rootCause: string; // Why previous attempts failed
|
|
55
|
+
specificInstructions: string; // Exact instructions for the worker agent
|
|
56
|
+
recommendedApproach: string; // What strategy to use (selector-based, state-based, etc.)
|
|
57
|
+
elementsFound: string[]; // What elements are actually visible
|
|
58
|
+
elementsNotFound: string[]; // What elements were expected but not visible
|
|
59
|
+
}
|
|
60
|
+
|
|
52
61
|
export interface ScenarioStep {
|
|
53
62
|
stepNumber: number;
|
|
54
63
|
description: string;
|
|
@@ -66,101 +75,86 @@ export interface ScenarioStep {
|
|
|
66
75
|
}
|
|
67
76
|
|
|
68
77
|
export class LLMFacade {
|
|
69
|
-
|
|
70
|
-
private
|
|
71
|
-
|
|
72
|
-
constructor(authConfig?: AuthConfig, backendUrl?: string) {
|
|
73
|
-
// Use provided backend URL or fall back to environment configuration
|
|
74
|
-
if (backendUrl) {
|
|
75
|
-
this.backendUrl = backendUrl;
|
|
76
|
-
console.log(`LLMFacade initialized with provided backend URL: ${this.backendUrl}`);
|
|
77
|
-
} else {
|
|
78
|
-
// Fall back to environment configuration for backward compatibility
|
|
79
|
-
const envConfig = loadEnvConfig();
|
|
80
|
-
this.backendUrl = envConfig.TESTCHIMP_BACKEND_URL;
|
|
81
|
-
console.log(`LLMFacade initialized with environment backend URL: ${this.backendUrl}`);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
// Use provided auth config or try to create from environment
|
|
85
|
-
this.authConfig = authConfig || createAuthConfigFromEnv();
|
|
86
|
-
|
|
87
|
-
if (!this.authConfig) {
|
|
88
|
-
console.warn('TestChimp authentication not configured. LLM calls may fail.');
|
|
89
|
-
}
|
|
90
|
-
}
|
|
78
|
+
public llmProvider: LLMProvider; // Expose for orchestrator direct access
|
|
79
|
+
private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
|
|
80
|
+
private tokenUsageCallback?: (inputTokens: number, outputTokens: number, includesImage: boolean) => void;
|
|
91
81
|
|
|
82
|
+
constructor(llmProvider: LLMProvider) {
|
|
83
|
+
this.llmProvider = llmProvider;
|
|
84
|
+
this.log('LLMFacade initialized with pluggable LLM provider');
|
|
85
|
+
}
|
|
86
|
+
|
|
92
87
|
/**
|
|
93
|
-
*
|
|
88
|
+
* Set token usage callback for tracking
|
|
94
89
|
*/
|
|
95
|
-
|
|
96
|
-
this.
|
|
90
|
+
setTokenUsageCallback(callback: (inputTokens: number, outputTokens: number, includesImage: boolean) => void): void {
|
|
91
|
+
this.tokenUsageCallback = callback;
|
|
97
92
|
}
|
|
98
93
|
|
|
99
94
|
/**
|
|
100
|
-
*
|
|
95
|
+
* Set a logger callback for capturing execution logs
|
|
101
96
|
*/
|
|
102
|
-
|
|
103
|
-
|
|
97
|
+
setLogger(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void {
|
|
98
|
+
this.logger = logger;
|
|
99
|
+
this.llmProvider.setLogger?.(logger);
|
|
104
100
|
}
|
|
105
101
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
102
|
+
/**
|
|
103
|
+
* Log a message using the configured logger
|
|
104
|
+
*/
|
|
105
|
+
private log(message: string, level: 'log' | 'error' | 'warn' = 'log'): void {
|
|
106
|
+
if (this.logger) {
|
|
107
|
+
this.logger(message, level);
|
|
108
|
+
}
|
|
109
|
+
// Console fallback for debug visibility
|
|
110
|
+
if (level === 'error') {
|
|
111
|
+
console.error(message);
|
|
112
|
+
} else if (level === 'warn') {
|
|
113
|
+
console.warn(message);
|
|
114
|
+
} else {
|
|
115
|
+
console.log(message);
|
|
109
116
|
}
|
|
117
|
+
}
|
|
110
118
|
|
|
119
|
+
private async callLLM(request: LLMRequest): Promise<LLMResponse> {
|
|
111
120
|
try {
|
|
112
|
-
const
|
|
113
|
-
const url = `${this.backendUrl}/localagent/call_llm`;
|
|
114
|
-
console.log(`repairing step`);
|
|
121
|
+
const response = await this.llmProvider.callLLM(request);
|
|
115
122
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
if (response.data && response.data.answer) {
|
|
125
|
-
return response.data.answer;
|
|
126
|
-
} else {
|
|
127
|
-
throw new Error('Invalid response from LLM backend');
|
|
123
|
+
// Report token usage if callback is set
|
|
124
|
+
if (response.usage && this.tokenUsageCallback) {
|
|
125
|
+
this.tokenUsageCallback(
|
|
126
|
+
response.usage.inputTokens,
|
|
127
|
+
response.usage.outputTokens,
|
|
128
|
+
!!request.imageUrl
|
|
129
|
+
);
|
|
128
130
|
}
|
|
131
|
+
|
|
132
|
+
return response;
|
|
129
133
|
} catch (error: any) {
|
|
130
|
-
//
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
} else if (error.response?.status === 401) {
|
|
134
|
-
throw new Error('Authentication failed. Please check your API credentials.');
|
|
135
|
-
} else if (error.response?.status === 403) {
|
|
136
|
-
throw new Error('Access denied. Please check your API permissions.');
|
|
137
|
-
} else if (error.response?.status === 429) {
|
|
138
|
-
throw new Error('Rate limit exceeded. Please try again later.');
|
|
139
|
-
} else {
|
|
140
|
-
console.error('LLM call failed:', error);
|
|
141
|
-
throw new Error(`LLM call failed: ${error.message}`);
|
|
142
|
-
}
|
|
134
|
+
// Let provider handle its own error messages, just re-throw
|
|
135
|
+
this.log(`LLM call failed: ${error}`, 'error');
|
|
136
|
+
throw error;
|
|
143
137
|
}
|
|
144
138
|
}
|
|
145
139
|
|
|
146
140
|
/**
|
|
147
141
|
* Generate a test name from scenario description
|
|
148
142
|
*/
|
|
149
|
-
async generateTestName(scenario: string, model: string =
|
|
150
|
-
|
|
143
|
+
async generateTestName(scenario: string, model: string = DEFAULT_MODEL): Promise<string> {
|
|
144
|
+
this.log('Generating test name with LLM...');
|
|
151
145
|
|
|
152
|
-
const request:
|
|
146
|
+
const request: LLMRequest = {
|
|
153
147
|
model,
|
|
154
|
-
|
|
155
|
-
|
|
148
|
+
systemPrompt: PROMPTS.TEST_NAME_GENERATION.SYSTEM,
|
|
149
|
+
userPrompt: PROMPTS.TEST_NAME_GENERATION.USER(scenario)
|
|
156
150
|
};
|
|
157
151
|
|
|
158
152
|
try {
|
|
159
153
|
const response = await this.callLLM(request);
|
|
160
|
-
const testNameResponse = JSON.parse(response) as LLMTestNameResponse;
|
|
154
|
+
const testNameResponse = JSON.parse(response.answer) as LLMTestNameResponse;
|
|
161
155
|
return testNameResponse.testName;
|
|
162
156
|
} catch (error) {
|
|
163
|
-
|
|
157
|
+
this.log(`Failed to generate test name: ${error}`, 'error');
|
|
164
158
|
// Fallback to a simple generated name
|
|
165
159
|
return `Test: ${scenario.substring(0, 50)}...`;
|
|
166
160
|
}
|
|
@@ -170,40 +164,337 @@ export class LLMFacade {
|
|
|
170
164
|
* Generate hashtags for semantic grouping
|
|
171
165
|
*/
|
|
172
166
|
async generateHashtags(scenario: string, model: string = 'gpt-4o-mini'): Promise<string[]> {
|
|
173
|
-
|
|
167
|
+
this.log('Generating hashtags with LLM...');
|
|
174
168
|
|
|
175
|
-
const request:
|
|
169
|
+
const request: LLMRequest = {
|
|
176
170
|
model,
|
|
177
|
-
|
|
178
|
-
|
|
171
|
+
systemPrompt: PROMPTS.HASHTAG_GENERATION.SYSTEM,
|
|
172
|
+
userPrompt: PROMPTS.HASHTAG_GENERATION.USER(scenario)
|
|
179
173
|
};
|
|
180
174
|
|
|
181
175
|
try {
|
|
182
176
|
const response = await this.callLLM(request);
|
|
183
|
-
const hashtagResponse = JSON.parse(response) as { hashtags: string[] };
|
|
177
|
+
const hashtagResponse = JSON.parse(response.answer) as { hashtags: string[] };
|
|
184
178
|
return hashtagResponse.hashtags || [];
|
|
185
179
|
} catch (error) {
|
|
186
|
-
|
|
180
|
+
this.log(`Failed to generate hashtags: ${error}`, 'error');
|
|
187
181
|
// Fallback to empty array
|
|
188
182
|
return [];
|
|
189
183
|
}
|
|
190
184
|
}
|
|
191
185
|
|
|
186
|
+
/**
|
|
187
|
+
* Check if a goal has been completed based on actions taken and current page state
|
|
188
|
+
*/
|
|
189
|
+
async checkGoalCompletion(
|
|
190
|
+
goalDescription: string,
|
|
191
|
+
completedActions: string[],
|
|
192
|
+
pageInfo: any,
|
|
193
|
+
model: string = DEFAULT_MODEL
|
|
194
|
+
): Promise<GoalCompletionResponse> {
|
|
195
|
+
this.log('Checking goal completion with LLM...');
|
|
196
|
+
|
|
197
|
+
const request: LLMRequest = {
|
|
198
|
+
model,
|
|
199
|
+
systemPrompt: PROMPTS.GOAL_COMPLETION_CHECK.SYSTEM,
|
|
200
|
+
userPrompt: PROMPTS.GOAL_COMPLETION_CHECK.USER(goalDescription, completedActions, pageInfo)
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
try {
|
|
204
|
+
const response = await this.callLLM(request);
|
|
205
|
+
return JSON.parse(response.answer) as GoalCompletionResponse;
|
|
206
|
+
} catch (error) {
|
|
207
|
+
this.log(`Failed to check goal completion: ${error}`, 'error');
|
|
208
|
+
// Conservative fallback - assume not complete if we can't determine
|
|
209
|
+
return {
|
|
210
|
+
isComplete: false,
|
|
211
|
+
reason: 'Error checking completion status'
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Check goal completion with visual verification (uses vision model)
|
|
218
|
+
*/
|
|
219
|
+
async checkGoalCompletionWithVision(
|
|
220
|
+
goalDescription: string,
|
|
221
|
+
completedActions: string[],
|
|
222
|
+
pageInfo: any,
|
|
223
|
+
imageDataUrl: string,
|
|
224
|
+
model: string = VISION_MODEL
|
|
225
|
+
): Promise<GoalCompletionResponse> {
|
|
226
|
+
this.log(`👔 Checking goal completion with vision (${model})...`);
|
|
227
|
+
|
|
228
|
+
const request: LLMRequest = {
|
|
229
|
+
model,
|
|
230
|
+
systemPrompt: `You are checking if a test automation goal has been completed by analyzing both DOM state and visual appearance.
|
|
231
|
+
|
|
232
|
+
CRITICAL: For action goals (login, submit, click, navigate), check if the PRIMARY ACTION and its SIDE EFFECTS are complete:
|
|
233
|
+
- "Login" = Fill fields AND click button AND verify navigation/page change
|
|
234
|
+
- "Submit form" = Fill fields AND click submit AND verify submission (success message/page change)
|
|
235
|
+
- "Click X" = Click X AND verify expected page change or UI update
|
|
236
|
+
|
|
237
|
+
For verification goals (verify, check, confirm), verify the VISUAL PRESENCE of expected elements.`,
|
|
238
|
+
userPrompt: `GOAL: ${goalDescription}
|
|
239
|
+
|
|
240
|
+
ACTIONS COMPLETED:
|
|
241
|
+
${completedActions.map((action, i) => `${i + 1}. ${action}`).join('\n')}
|
|
242
|
+
|
|
243
|
+
CURRENT PAGE STATE:
|
|
244
|
+
URL: ${pageInfo.url}
|
|
245
|
+
Title: ${pageInfo.title}
|
|
246
|
+
Interactive Elements:
|
|
247
|
+
${pageInfo.formattedElements}
|
|
248
|
+
|
|
249
|
+
Based on the screenshot AND page state, is this goal COMPLETE?
|
|
250
|
+
|
|
251
|
+
Respond ONLY with valid JSON:
|
|
252
|
+
{
|
|
253
|
+
"isComplete": true/false,
|
|
254
|
+
"reason": "Brief explanation based on what you SEE in the screenshot and DOM",
|
|
255
|
+
"nextSubGoal": "If incomplete, what specific next action is needed?"
|
|
256
|
+
}`,
|
|
257
|
+
imageUrl: imageDataUrl
|
|
258
|
+
};
|
|
259
|
+
|
|
260
|
+
try {
|
|
261
|
+
const response = await this.callLLM(request);
|
|
262
|
+
const parsed = JSON.parse(response.answer) as GoalCompletionResponse;
|
|
263
|
+
this.log(`👔 Vision goal check result: ${parsed.isComplete ? 'COMPLETE ✅' : 'INCOMPLETE ❌'}`);
|
|
264
|
+
return parsed;
|
|
265
|
+
} catch (error) {
|
|
266
|
+
this.log(`Failed to check goal completion with vision: ${error}`, 'error');
|
|
267
|
+
// Conservative fallback - assume not complete if we can't determine
|
|
268
|
+
return {
|
|
269
|
+
isComplete: false,
|
|
270
|
+
reason: 'Error checking completion status with vision'
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Ask LLM if a screenshot would help debug the current failure
|
|
277
|
+
*/
|
|
278
|
+
async assessScreenshotNeed(
|
|
279
|
+
stepDescription: string,
|
|
280
|
+
errorMessage: string,
|
|
281
|
+
attemptCount: number,
|
|
282
|
+
pageInfo: any,
|
|
283
|
+
model: string = DEFAULT_MODEL
|
|
284
|
+
): Promise<ScreenshotNeedResponse> {
|
|
285
|
+
this.log('Assessing screenshot need with LLM...');
|
|
286
|
+
|
|
287
|
+
const request: LLMRequest = {
|
|
288
|
+
model,
|
|
289
|
+
systemPrompt: PROMPTS.SCREENSHOT_NEED_ASSESSMENT.SYSTEM,
|
|
290
|
+
userPrompt: PROMPTS.SCREENSHOT_NEED_ASSESSMENT.USER(stepDescription, errorMessage, attemptCount, pageInfo)
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
try {
|
|
294
|
+
const response = await this.callLLM(request);
|
|
295
|
+
return JSON.parse(response.answer) as ScreenshotNeedResponse;
|
|
296
|
+
} catch (error) {
|
|
297
|
+
this.log(`Failed to assess screenshot need: ${error}`, 'error');
|
|
298
|
+
// Conservative fallback - don't use expensive screenshot unless we're sure
|
|
299
|
+
return {
|
|
300
|
+
needsScreenshot: false,
|
|
301
|
+
reason: 'Error assessing need, defaulting to no screenshot'
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Get diagnostic analysis from screenshot (supervisor role)
|
|
308
|
+
*/
|
|
309
|
+
async getVisionDiagnostics(
|
|
310
|
+
stepDescription: string,
|
|
311
|
+
pageInfo: any,
|
|
312
|
+
previousSteps: any[],
|
|
313
|
+
lastError: string | undefined,
|
|
314
|
+
imageDataUrl: string,
|
|
315
|
+
model: string = VISION_MODEL
|
|
316
|
+
): Promise<VisionDiagnosticResponse> {
|
|
317
|
+
this.log('👔 SUPERVISOR: Analyzing screenshot for diagnostic insights...');
|
|
318
|
+
|
|
319
|
+
const previousCommands = previousSteps
|
|
320
|
+
.map(s => s.playwrightCommand)
|
|
321
|
+
.filter(Boolean)
|
|
322
|
+
.join('\n');
|
|
323
|
+
|
|
324
|
+
const attemptHistory = previousSteps.length > 0
|
|
325
|
+
? `Previous attempts context: ${previousSteps.length} commands executed`
|
|
326
|
+
: '';
|
|
327
|
+
|
|
328
|
+
const errorContext = lastError
|
|
329
|
+
? `Last Error: ${lastError}`
|
|
330
|
+
: '';
|
|
331
|
+
|
|
332
|
+
const request: LLMRequest = {
|
|
333
|
+
model,
|
|
334
|
+
systemPrompt: PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.SYSTEM,
|
|
335
|
+
userPrompt: PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.USER(
|
|
336
|
+
stepDescription,
|
|
337
|
+
pageInfo,
|
|
338
|
+
previousCommands,
|
|
339
|
+
attemptHistory,
|
|
340
|
+
errorContext
|
|
341
|
+
),
|
|
342
|
+
imageUrl: imageDataUrl
|
|
343
|
+
};
|
|
344
|
+
|
|
345
|
+
try {
|
|
346
|
+
const response = await this.callLLM(request);
|
|
347
|
+
const diagnostics = JSON.parse(response.answer) as VisionDiagnosticResponse;
|
|
348
|
+
|
|
349
|
+
// Log supervisor's findings
|
|
350
|
+
this.log(`👔 SUPERVISOR ANALYSIS:`);
|
|
351
|
+
this.log(` 📸 Visual: ${diagnostics.visualAnalysis}`);
|
|
352
|
+
this.log(` 🔍 Root cause: ${diagnostics.rootCause}`);
|
|
353
|
+
this.log(` 📋 Instructions: ${diagnostics.specificInstructions}`);
|
|
354
|
+
this.log(` 💡 Approach: ${diagnostics.recommendedApproach}`);
|
|
355
|
+
if (diagnostics.elementsFound?.length > 0) {
|
|
356
|
+
this.log(` ✅ Elements found: ${diagnostics.elementsFound.join(', ')}`);
|
|
357
|
+
}
|
|
358
|
+
if (diagnostics.elementsNotFound?.length > 0) {
|
|
359
|
+
this.log(` ❌ Elements NOT found: ${diagnostics.elementsNotFound.join(', ')}`);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
return diagnostics;
|
|
363
|
+
} catch (error) {
|
|
364
|
+
this.log(`Failed to get vision diagnostics: ${error}`, 'error');
|
|
365
|
+
throw new Error(`Vision diagnostic analysis failed: ${error}`);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
/**
|
|
370
|
+
* Generate command based on supervisor's instructions
|
|
371
|
+
*/
|
|
372
|
+
async generateCommandFromSupervisorInstructions(
|
|
373
|
+
stepDescription: string,
|
|
374
|
+
supervisorDiagnostics: VisionDiagnosticResponse,
|
|
375
|
+
pageInfo: any,
|
|
376
|
+
model: string = DEFAULT_MODEL
|
|
377
|
+
): Promise<string> {
|
|
378
|
+
this.log('🔨 WORKER: Generating command based on supervisor instructions...');
|
|
379
|
+
|
|
380
|
+
const request: LLMRequest = {
|
|
381
|
+
model,
|
|
382
|
+
systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.SYSTEM,
|
|
383
|
+
userPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.USER(
|
|
384
|
+
stepDescription,
|
|
385
|
+
supervisorDiagnostics.specificInstructions,
|
|
386
|
+
supervisorDiagnostics.visualAnalysis,
|
|
387
|
+
supervisorDiagnostics.elementsFound || [],
|
|
388
|
+
supervisorDiagnostics.elementsNotFound || [],
|
|
389
|
+
pageInfo
|
|
390
|
+
)
|
|
391
|
+
};
|
|
392
|
+
|
|
393
|
+
try {
|
|
394
|
+
const response = await this.callLLM(request);
|
|
395
|
+
const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse;
|
|
396
|
+
|
|
397
|
+
if (commandResponse.reasoning) {
|
|
398
|
+
this.log(`🔨 WORKER reasoning: ${commandResponse.reasoning}`);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
return commandResponse.command;
|
|
402
|
+
} catch (error) {
|
|
403
|
+
this.log(`Failed to generate command from supervisor instructions: ${error}`, 'error');
|
|
404
|
+
throw new Error(`Command generation from supervisor instructions failed: ${error}`);
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Generate Playwright command with vision (uses vision model)
|
|
410
|
+
*/
|
|
411
|
+
async generatePlaywrightCommandWithVision(
|
|
412
|
+
stepDescription: string,
|
|
413
|
+
pageInfo: any,
|
|
414
|
+
previousSteps: any[],
|
|
415
|
+
lastError: string | undefined,
|
|
416
|
+
imageDataUrl: string, // Full data URL: data:image/png;base64,...
|
|
417
|
+
model: string = VISION_MODEL
|
|
418
|
+
): Promise<string> {
|
|
419
|
+
this.log(`⚠️ USING VISION MODE (${model})...`);
|
|
420
|
+
|
|
421
|
+
const previousCommands = previousSteps
|
|
422
|
+
.map(s => s.playwrightCommand)
|
|
423
|
+
.filter(Boolean)
|
|
424
|
+
.join('\n');
|
|
425
|
+
|
|
426
|
+
const attemptHistory = previousSteps.length > 0
|
|
427
|
+
? `Previous attempts context: ${previousSteps.length} commands executed`
|
|
428
|
+
: '';
|
|
429
|
+
|
|
430
|
+
const errorContext = lastError
|
|
431
|
+
? `Last Error: ${lastError}`
|
|
432
|
+
: '';
|
|
433
|
+
|
|
434
|
+
const request: LLMRequest = {
|
|
435
|
+
model,
|
|
436
|
+
systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.SYSTEM,
|
|
437
|
+
userPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.USER(
|
|
438
|
+
stepDescription,
|
|
439
|
+
pageInfo,
|
|
440
|
+
previousCommands,
|
|
441
|
+
attemptHistory,
|
|
442
|
+
errorContext
|
|
443
|
+
),
|
|
444
|
+
imageUrl: imageDataUrl // Full data URL constructed by client
|
|
445
|
+
};
|
|
446
|
+
|
|
447
|
+
try {
|
|
448
|
+
const response = await this.callLLM(request);
|
|
449
|
+
const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse & {
|
|
450
|
+
visualInsights?: string;
|
|
451
|
+
failureRootCause?: string;
|
|
452
|
+
recommendedAlternative?: string;
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
// Log diagnostic insights from vision analysis
|
|
456
|
+
if (commandResponse.visualInsights) {
|
|
457
|
+
this.log(`📸 Visual insights: ${commandResponse.visualInsights}`);
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
if (commandResponse.failureRootCause) {
|
|
461
|
+
this.log(`🔍 Root cause analysis: ${commandResponse.failureRootCause}`);
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
if (commandResponse.recommendedAlternative) {
|
|
465
|
+
this.log(`💡 Recommended alternative: ${commandResponse.recommendedAlternative}`);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
if (commandResponse.reasoning) {
|
|
469
|
+
this.log(`🧠 Vision-based reasoning: ${commandResponse.reasoning}`);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
return commandResponse.command;
|
|
473
|
+
} catch (error) {
|
|
474
|
+
this.log(`Failed to generate command with vision: ${error}`, 'error');
|
|
475
|
+
throw new Error(`Vision-enhanced command generation failed: ${error}`);
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
192
479
|
/**
|
|
193
480
|
* Break down scenario into steps
|
|
194
481
|
*/
|
|
195
|
-
async breakdownScenario(scenario: string, model: string =
|
|
196
|
-
|
|
482
|
+
async breakdownScenario(scenario: string, model: string = DEFAULT_MODEL): Promise<ScenarioStep[]> {
|
|
483
|
+
this.log('Breaking down scenario with LLM...');
|
|
484
|
+
this.log(`📝 INPUT SCENARIO: ${scenario}`);
|
|
197
485
|
|
|
198
|
-
const request:
|
|
486
|
+
const request: LLMRequest = {
|
|
199
487
|
model,
|
|
200
|
-
|
|
201
|
-
|
|
488
|
+
systemPrompt: PROMPTS.SCENARIO_BREAKDOWN.SYSTEM,
|
|
489
|
+
userPrompt: PROMPTS.SCENARIO_BREAKDOWN.USER(scenario)
|
|
202
490
|
};
|
|
203
491
|
|
|
204
492
|
try {
|
|
205
493
|
const response = await this.callLLM(request);
|
|
206
|
-
|
|
494
|
+
this.log(`🤖 RAW LLM RESPONSE: ${response.answer}`);
|
|
495
|
+
|
|
496
|
+
const breakdownResponse = JSON.parse(response.answer) as LLMScenarioBreakdownResponse;
|
|
497
|
+
this.log(`📋 PARSED BREAKDOWN: ${JSON.stringify(breakdownResponse, null, 2)}`);
|
|
207
498
|
|
|
208
499
|
// Validate and clean up steps
|
|
209
500
|
const cleanedSteps = breakdownResponse.steps
|
|
@@ -211,14 +502,17 @@ export class LLMFacade {
|
|
|
211
502
|
.filter(step => step.length > 0)
|
|
212
503
|
.slice(0, 10); // Limit to 10 steps max
|
|
213
504
|
|
|
505
|
+
this.log(`✅ CLEANED STEPS: ${JSON.stringify(cleanedSteps, null, 2)}`);
|
|
506
|
+
|
|
214
507
|
return cleanedSteps.map((desc, index) => ({
|
|
215
508
|
stepNumber: index + 1,
|
|
216
509
|
description: desc,
|
|
217
510
|
}));
|
|
218
511
|
} catch (error) {
|
|
219
|
-
|
|
220
|
-
// Fallback to simple breakdown
|
|
221
|
-
const stepDescriptions = scenario.split('
|
|
512
|
+
this.log(`❌ Failed to breakdown scenario: ${error}`, 'error');
|
|
513
|
+
// Fallback to simple breakdown by newlines (preserves URLs)
|
|
514
|
+
const stepDescriptions = scenario.split('\n').map(s => s.trim()).filter(s => s.length > 0);
|
|
515
|
+
this.log(`🔄 FALLBACK STEPS: ${JSON.stringify(stepDescriptions, null, 2)}`);
|
|
222
516
|
return stepDescriptions.map((desc, index) => ({
|
|
223
517
|
stepNumber: index + 1,
|
|
224
518
|
description: desc,
|
|
@@ -235,9 +529,9 @@ export class LLMFacade {
|
|
|
235
529
|
previousSteps: ScenarioStep[],
|
|
236
530
|
lastError?: string,
|
|
237
531
|
currentStep?: ScenarioStep,
|
|
238
|
-
model: string =
|
|
532
|
+
model: string = DEFAULT_MODEL
|
|
239
533
|
): Promise<string | null> {
|
|
240
|
-
|
|
534
|
+
this.log('Generating Playwright command with LLM...');
|
|
241
535
|
|
|
242
536
|
const previousCommands = previousSteps
|
|
243
537
|
.filter(s => s.playwrightCommand && s.success)
|
|
@@ -258,18 +552,18 @@ export class LLMFacade {
|
|
|
258
552
|
errorContext
|
|
259
553
|
);
|
|
260
554
|
|
|
261
|
-
const request:
|
|
555
|
+
const request: LLMRequest = {
|
|
262
556
|
model,
|
|
263
|
-
|
|
264
|
-
|
|
557
|
+
systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND.SYSTEM,
|
|
558
|
+
userPrompt: prompt
|
|
265
559
|
};
|
|
266
560
|
|
|
267
561
|
try {
|
|
268
562
|
const response = await this.callLLM(request);
|
|
269
|
-
const commandResponse = JSON.parse(response) as LLMPlaywrightCommandResponse;
|
|
563
|
+
const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse;
|
|
270
564
|
return commandResponse.command;
|
|
271
565
|
} catch (error) {
|
|
272
|
-
|
|
566
|
+
this.log(`Failed to generate Playwright command: ${error}`, 'error');
|
|
273
567
|
return null;
|
|
274
568
|
}
|
|
275
569
|
}
|
|
@@ -278,25 +572,31 @@ export class LLMFacade {
|
|
|
278
572
|
* Parse script into steps for AI repair
|
|
279
573
|
*/
|
|
280
574
|
async parseScriptIntoSteps(script: string, model: string = 'gpt-4o-mini'): Promise<Array<{ description: string; code: string; success?: boolean; error?: string }>> {
|
|
281
|
-
|
|
575
|
+
this.log('Parsing script into steps with LLM...');
|
|
576
|
+
|
|
577
|
+
const request: LLMRequest = {
|
|
282
578
|
model,
|
|
283
|
-
|
|
284
|
-
|
|
579
|
+
systemPrompt: PROMPTS.SCRIPT_PARSING.SYSTEM,
|
|
580
|
+
userPrompt: PROMPTS.SCRIPT_PARSING.USER(script)
|
|
285
581
|
};
|
|
286
582
|
|
|
287
583
|
try {
|
|
288
584
|
const response = await this.callLLM(request);
|
|
289
|
-
|
|
585
|
+
this.log(`Raw LLM parsing response (first 500 chars): ${response.answer.substring(0, 500)}`);
|
|
586
|
+
|
|
587
|
+
const parsed = JSON.parse(response.answer);
|
|
588
|
+
this.log(`Parsed JSON structure: ${JSON.stringify(parsed, null, 2).substring(0, 1000)}`);
|
|
290
589
|
|
|
291
590
|
// Expect JSON object with steps array
|
|
292
591
|
if (parsed.steps && Array.isArray(parsed.steps)) {
|
|
592
|
+
this.log(`LLM parsing successful, got ${parsed.steps.length} steps`);
|
|
293
593
|
return parsed.steps;
|
|
294
594
|
} else {
|
|
295
|
-
|
|
595
|
+
this.log(`Unexpected LLM response format - expected {steps: [...]}: ${JSON.stringify(parsed)}`, 'error');
|
|
296
596
|
return [];
|
|
297
597
|
}
|
|
298
598
|
} catch (error) {
|
|
299
|
-
|
|
599
|
+
this.log(`Failed to parse LLM response as JSON: ${error}`, 'error');
|
|
300
600
|
return [];
|
|
301
601
|
}
|
|
302
602
|
}
|
|
@@ -311,12 +611,12 @@ export class LLMFacade {
|
|
|
311
611
|
pageInfo: PageInfo,
|
|
312
612
|
failureHistory: string,
|
|
313
613
|
recentRepairs: string,
|
|
314
|
-
model: string =
|
|
614
|
+
model: string = DEFAULT_MODEL
|
|
315
615
|
): Promise<RepairSuggestionResponse> {
|
|
316
|
-
const request:
|
|
616
|
+
const request: LLMRequest = {
|
|
317
617
|
model,
|
|
318
|
-
|
|
319
|
-
|
|
618
|
+
systemPrompt: PROMPTS.REPAIR_SUGGESTION.SYSTEM,
|
|
619
|
+
userPrompt: PROMPTS.REPAIR_SUGGESTION.USER(
|
|
320
620
|
stepDescription,
|
|
321
621
|
stepCode,
|
|
322
622
|
errorMessage,
|
|
@@ -327,9 +627,9 @@ export class LLMFacade {
|
|
|
327
627
|
};
|
|
328
628
|
|
|
329
629
|
const response = await this.callLLM(request);
|
|
330
|
-
|
|
331
|
-
const parsed = JSON.parse(response) as any;
|
|
332
|
-
|
|
630
|
+
this.log(`🤖 LLM Repair Response: ${response.answer}`);
|
|
631
|
+
const parsed = JSON.parse(response.answer) as any;
|
|
632
|
+
this.log(`🤖 Parsed Repair Action: ${JSON.stringify(parsed)}`);
|
|
333
633
|
|
|
334
634
|
// Convert string operation to enum
|
|
335
635
|
if (parsed.action && parsed.action.operation) {
|
|
@@ -357,16 +657,16 @@ export class LLMFacade {
|
|
|
357
657
|
async assessRepairConfidence(
|
|
358
658
|
originalScript: string,
|
|
359
659
|
updatedScript: string,
|
|
360
|
-
model: string =
|
|
660
|
+
model: string = DEFAULT_MODEL
|
|
361
661
|
): Promise<RepairConfidenceResponse> {
|
|
362
|
-
const request:
|
|
662
|
+
const request: LLMRequest = {
|
|
363
663
|
model,
|
|
364
|
-
|
|
365
|
-
|
|
664
|
+
systemPrompt: PROMPTS.REPAIR_CONFIDENCE.SYSTEM,
|
|
665
|
+
userPrompt: PROMPTS.REPAIR_CONFIDENCE.USER(originalScript, updatedScript)
|
|
366
666
|
};
|
|
367
667
|
|
|
368
668
|
const response = await this.callLLM(request);
|
|
369
|
-
return JSON.parse(response) as RepairConfidenceResponse;
|
|
669
|
+
return JSON.parse(response.answer) as RepairConfidenceResponse;
|
|
370
670
|
}
|
|
371
671
|
|
|
372
672
|
/**
|
|
@@ -378,18 +678,18 @@ export class LLMFacade {
|
|
|
378
678
|
newRepairAdvice: string,
|
|
379
679
|
model: string = 'gpt-4o-mini'
|
|
380
680
|
): Promise<string> {
|
|
381
|
-
const request:
|
|
681
|
+
const request: LLMRequest = {
|
|
382
682
|
model,
|
|
383
|
-
|
|
384
|
-
|
|
683
|
+
systemPrompt: PROMPTS.FINAL_SCRIPT.SYSTEM,
|
|
684
|
+
userPrompt: PROMPTS.FINAL_SCRIPT.USER(originalScript, updatedScript, newRepairAdvice)
|
|
385
685
|
};
|
|
386
686
|
|
|
387
687
|
const response = await this.callLLM(request);
|
|
388
688
|
try {
|
|
389
|
-
const parsed = JSON.parse(response);
|
|
689
|
+
const parsed = JSON.parse(response.answer);
|
|
390
690
|
return parsed.script || updatedScript;
|
|
391
691
|
} catch (error) {
|
|
392
|
-
|
|
692
|
+
this.log(`Failed to parse final script response: ${error}`, 'error');
|
|
393
693
|
return updatedScript;
|
|
394
694
|
}
|
|
395
695
|
}
|
|
@@ -435,13 +735,87 @@ LEARNING FROM FAILURES:
|
|
|
435
735
|
|
|
436
736
|
const errorText = errors.join(' | ');
|
|
437
737
|
|
|
738
|
+
// Detect if we're repeatedly looking for elements that don't exist
|
|
739
|
+
const attemptedCommands = currentStep?.attempts
|
|
740
|
+
?.map(a => a.command)
|
|
741
|
+
.filter(Boolean) || [];
|
|
742
|
+
|
|
743
|
+
const lookingForNonExistent = attemptedCommands.some(cmd =>
|
|
744
|
+
cmd?.includes('getByText') ||
|
|
745
|
+
cmd?.includes('toBeVisible') ||
|
|
746
|
+
cmd?.includes('waitFor')
|
|
747
|
+
) && errors.some(err =>
|
|
748
|
+
err.includes('not found') ||
|
|
749
|
+
err.includes('Timeout') ||
|
|
750
|
+
err.includes('Expected: visible')
|
|
751
|
+
);
|
|
752
|
+
|
|
753
|
+
let hallucinationWarning = '';
|
|
754
|
+
if (lookingForNonExistent && attemptedCommands.length >= 2) {
|
|
755
|
+
hallucinationWarning = `
|
|
756
|
+
⚠️ HALLUCINATION ALERT:
|
|
757
|
+
You've made ${attemptedCommands.length} attempts trying to find/verify elements that don't exist.
|
|
758
|
+
STOP looking for these elements. They are NOT in the DOM.
|
|
759
|
+
Instead:
|
|
760
|
+
- Check if the goal is ALREADY COMPLETE (action succeeded = goal done)
|
|
761
|
+
- Use alternative verification (state changes, network, page load)
|
|
762
|
+
- Move on if the primary action succeeded
|
|
763
|
+
`;
|
|
764
|
+
}
|
|
765
|
+
|
|
438
766
|
return `ERROR CONTEXT:
|
|
439
767
|
Last Error: ${errorText}
|
|
440
|
-
|
|
768
|
+
${hallucinationWarning}
|
|
441
769
|
ANALYZE THE ERROR AND ADAPT:
|
|
442
770
|
- Study the error message to understand what went wrong
|
|
771
|
+
- If element "not found" after 2+ attempts, it probably doesn't exist - stop looking for it
|
|
443
772
|
- Try a completely different approach than what failed
|
|
444
773
|
- Consider alternative selectors, timing, or interaction methods
|
|
445
774
|
- Never repeat the exact same command that failed`;
|
|
446
775
|
}
|
|
776
|
+
|
|
777
|
+
/**
|
|
778
|
+
* Cleanup generated script - remove redundancies and make minor adjustments
|
|
779
|
+
*/
|
|
780
|
+
async cleanupScript(script: string, model?: string): Promise<{ script: string; changes: string[]; skipped?: string }> {
|
|
781
|
+
try {
|
|
782
|
+
const response = await this.llmProvider.callLLM({
|
|
783
|
+
model: model || DEFAULT_MODEL,
|
|
784
|
+
systemPrompt: PROMPTS.SCRIPT_CLEANUP.SYSTEM,
|
|
785
|
+
userPrompt: PROMPTS.SCRIPT_CLEANUP.USER(script)
|
|
786
|
+
});
|
|
787
|
+
|
|
788
|
+
// Parse JSON response
|
|
789
|
+
const jsonMatch = response.answer.match(/\{[\s\S]*\}/);
|
|
790
|
+
if (!jsonMatch) {
|
|
791
|
+
console.log('[LLMFacade] Cleanup response not in JSON format, returning original script');
|
|
792
|
+
return { script, changes: [], skipped: 'Response not in JSON format' };
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
796
|
+
|
|
797
|
+
// Validate response
|
|
798
|
+
if (!parsed.script) {
|
|
799
|
+
console.log('[LLMFacade] Cleanup response missing script field, returning original');
|
|
800
|
+
return { script, changes: [], skipped: 'Invalid response format' };
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
console.log(`[LLMFacade] Script cleanup completed. Changes: ${parsed.changes?.length || 0}`);
|
|
804
|
+
if (parsed.changes && parsed.changes.length > 0) {
|
|
805
|
+
parsed.changes.forEach((change: string, i: number) => {
|
|
806
|
+
console.log(`[LLMFacade] ${i + 1}. ${change}`);
|
|
807
|
+
});
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
return {
|
|
811
|
+
script: parsed.script,
|
|
812
|
+
changes: parsed.changes || [],
|
|
813
|
+
skipped: parsed.skipped
|
|
814
|
+
};
|
|
815
|
+
} catch (error: any) {
|
|
816
|
+
console.error('[LLMFacade] Script cleanup failed:', error.message);
|
|
817
|
+
// Return original script on error
|
|
818
|
+
return { script, changes: [], skipped: `Error: ${error.message}` };
|
|
819
|
+
}
|
|
820
|
+
}
|
|
447
821
|
}
|