testchimp-runner-core 0.0.34 → 0.0.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-service.d.ts +1 -4
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +155 -468
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -1
- package/dist/index.js.map +1 -1
- package/dist/orchestrator/decision-parser.d.ts +18 -0
- package/dist/orchestrator/decision-parser.d.ts.map +1 -0
- package/dist/orchestrator/decision-parser.js +127 -0
- package/dist/orchestrator/decision-parser.js.map +1 -0
- package/dist/orchestrator/index.d.ts +4 -2
- package/dist/orchestrator/index.d.ts.map +1 -1
- package/dist/orchestrator/index.js +14 -2
- package/dist/orchestrator/index.js.map +1 -1
- package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +534 -204
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.js +529 -247
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
- package/dist/orchestrator/page-som-handler.d.ts +106 -0
- package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
- package/dist/orchestrator/page-som-handler.js +1353 -0
- package/dist/orchestrator/page-som-handler.js.map +1 -0
- package/dist/orchestrator/som-types.d.ts +149 -0
- package/dist/orchestrator/som-types.d.ts.map +1 -0
- package/dist/orchestrator/som-types.js +87 -0
- package/dist/orchestrator/som-types.js.map +1 -0
- package/dist/orchestrator/tool-registry.d.ts +2 -0
- package/dist/orchestrator/tool-registry.d.ts.map +1 -1
- package/dist/orchestrator/tool-registry.js.map +1 -1
- package/dist/orchestrator/tools/index.d.ts +4 -1
- package/dist/orchestrator/tools/index.d.ts.map +1 -1
- package/dist/orchestrator/tools/index.js +7 -2
- package/dist/orchestrator/tools/index.js.map +1 -1
- package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
- package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
- package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
- package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
- package/dist/orchestrator/types.d.ts +23 -1
- package/dist/orchestrator/types.d.ts.map +1 -1
- package/dist/orchestrator/types.js +11 -1
- package/dist/orchestrator/types.js.map +1 -1
- package/dist/scenario-service.d.ts +5 -0
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +17 -0
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +4 -0
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +18 -3
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/testing/agent-tester.d.ts +35 -0
- package/dist/testing/agent-tester.d.ts.map +1 -0
- package/dist/testing/agent-tester.js +84 -0
- package/dist/testing/agent-tester.js.map +1 -0
- package/dist/testing/ref-translator-tester.d.ts +44 -0
- package/dist/testing/ref-translator-tester.d.ts.map +1 -0
- package/dist/testing/ref-translator-tester.js +104 -0
- package/dist/testing/ref-translator-tester.js.map +1 -0
- package/dist/utils/hierarchical-selector.d.ts +47 -0
- package/dist/utils/hierarchical-selector.d.ts.map +1 -0
- package/dist/utils/hierarchical-selector.js +212 -0
- package/dist/utils/hierarchical-selector.js.map +1 -0
- package/dist/utils/page-info-retry.d.ts +14 -0
- package/dist/utils/page-info-retry.d.ts.map +1 -0
- package/dist/utils/page-info-retry.js +60 -0
- package/dist/utils/page-info-retry.js.map +1 -0
- package/dist/utils/page-info-utils.d.ts +1 -0
- package/dist/utils/page-info-utils.d.ts.map +1 -1
- package/dist/utils/page-info-utils.js +46 -18
- package/dist/utils/page-info-utils.js.map +1 -1
- package/dist/utils/ref-attacher.d.ts +21 -0
- package/dist/utils/ref-attacher.d.ts.map +1 -0
- package/dist/utils/ref-attacher.js +149 -0
- package/dist/utils/ref-attacher.js.map +1 -0
- package/dist/utils/ref-translator.d.ts +49 -0
- package/dist/utils/ref-translator.d.ts.map +1 -0
- package/dist/utils/ref-translator.js +276 -0
- package/dist/utils/ref-translator.js.map +1 -0
- package/package.json +6 -1
- package/RELEASE_0.0.26.md +0 -165
- package/RELEASE_0.0.27.md +0 -236
- package/RELEASE_0.0.28.md +0 -286
- package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
- package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
- package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
- package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
- package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
- package/plandocs/INTEGRATION_COMPLETE.md +0 -322
- package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
- package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
- package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
- package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
- package/plandocs/PHASE_1_COMPLETE.md +0 -165
- package/plandocs/PHASE_1_SUMMARY.md +0 -184
- package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
- package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
- package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
- package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
- package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
- package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
- package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
- package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
- package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
- package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
- package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
- package/src/auth-config.ts +0 -84
- package/src/credit-usage-service.ts +0 -188
- package/src/env-loader.ts +0 -103
- package/src/execution-service.ts +0 -1413
- package/src/file-handler.ts +0 -104
- package/src/index.ts +0 -422
- package/src/llm-facade.ts +0 -821
- package/src/llm-provider.ts +0 -53
- package/src/model-constants.ts +0 -35
- package/src/orchestrator/index.ts +0 -34
- package/src/orchestrator/orchestrator-agent.ts +0 -862
- package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
- package/src/orchestrator/orchestrator-prompts.ts +0 -474
- package/src/orchestrator/tool-registry.ts +0 -182
- package/src/orchestrator/tools/check-page-ready.ts +0 -75
- package/src/orchestrator/tools/extract-data.ts +0 -92
- package/src/orchestrator/tools/index.ts +0 -12
- package/src/orchestrator/tools/inspect-page.ts +0 -42
- package/src/orchestrator/tools/recall-history.ts +0 -72
- package/src/orchestrator/tools/take-screenshot.ts +0 -128
- package/src/orchestrator/tools/verify-action-result.ts +0 -159
- package/src/orchestrator/types.ts +0 -248
- package/src/playwright-mcp-service.ts +0 -224
- package/src/progress-reporter.ts +0 -144
- package/src/prompts.ts +0 -842
- package/src/providers/backend-proxy-llm-provider.ts +0 -91
- package/src/providers/local-llm-provider.ts +0 -38
- package/src/scenario-service.ts +0 -232
- package/src/scenario-worker-class.ts +0 -1089
- package/src/script-utils.ts +0 -203
- package/src/types.ts +0 -239
- package/src/utils/browser-utils.ts +0 -348
- package/src/utils/coordinate-converter.ts +0 -162
- package/src/utils/page-info-utils.ts +0 -250
- package/testchimp-runner-core-0.0.33.tgz +0 -0
- package/tsconfig.json +0 -19
package/src/llm-facade.ts
DELETED
|
@@ -1,821 +0,0 @@
|
|
|
1
|
-
import { PROMPTS } from './prompts';
|
|
2
|
-
import { PageInfo } from './utils/page-info-utils';
|
|
3
|
-
import { StepOperation } from './types';
|
|
4
|
-
import { DEFAULT_MODEL, DEFAULT_SIMPLER_MODEL, VISION_MODEL } from './model-constants';
|
|
5
|
-
import { LLMProvider, LLMRequest, LLMResponse } from './llm-provider';
|
|
6
|
-
|
|
7
|
-
// LLM Response interfaces
|
|
8
|
-
export interface LLMScenarioBreakdownResponse {
|
|
9
|
-
steps: string[];
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
export interface LLMPlaywrightCommandResponse {
|
|
13
|
-
command: string;
|
|
14
|
-
reasoning?: string;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
export interface LLMTestNameResponse {
|
|
18
|
-
testName: string;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
export interface RepairSuggestionResponse {
|
|
22
|
-
shouldContinue: boolean;
|
|
23
|
-
reason: string;
|
|
24
|
-
action: {
|
|
25
|
-
operation: StepOperation;
|
|
26
|
-
stepIndex?: number;
|
|
27
|
-
newStep?: {
|
|
28
|
-
description: string;
|
|
29
|
-
code: string;
|
|
30
|
-
};
|
|
31
|
-
insertAfterIndex?: number;
|
|
32
|
-
};
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
export interface RepairConfidenceResponse {
|
|
36
|
-
confidence: number;
|
|
37
|
-
advice: string;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
export interface GoalCompletionResponse {
|
|
41
|
-
isComplete: boolean;
|
|
42
|
-
reason: string;
|
|
43
|
-
nextSubGoal?: string;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
export interface ScreenshotNeedResponse {
|
|
47
|
-
needsScreenshot: boolean;
|
|
48
|
-
reason: string;
|
|
49
|
-
alternativeApproach?: string;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
export interface VisionDiagnosticResponse {
|
|
53
|
-
visualAnalysis: string; // What the supervisor sees in the screenshot
|
|
54
|
-
rootCause: string; // Why previous attempts failed
|
|
55
|
-
specificInstructions: string; // Exact instructions for the worker agent
|
|
56
|
-
recommendedApproach: string; // What strategy to use (selector-based, state-based, etc.)
|
|
57
|
-
elementsFound: string[]; // What elements are actually visible
|
|
58
|
-
elementsNotFound: string[]; // What elements were expected but not visible
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
export interface ScenarioStep {
|
|
62
|
-
stepNumber: number;
|
|
63
|
-
description: string;
|
|
64
|
-
playwrightCommand?: string;
|
|
65
|
-
success?: boolean;
|
|
66
|
-
error?: string;
|
|
67
|
-
retryCount?: number;
|
|
68
|
-
attempts?: Array<{
|
|
69
|
-
attemptNumber: number;
|
|
70
|
-
command?: string;
|
|
71
|
-
success: boolean;
|
|
72
|
-
error?: string;
|
|
73
|
-
timestamp: number;
|
|
74
|
-
}>;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
export class LLMFacade {
|
|
78
|
-
public llmProvider: LLMProvider; // Expose for orchestrator direct access
|
|
79
|
-
private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
|
|
80
|
-
private tokenUsageCallback?: (inputTokens: number, outputTokens: number, includesImage: boolean) => void;
|
|
81
|
-
|
|
82
|
-
constructor(llmProvider: LLMProvider) {
|
|
83
|
-
this.llmProvider = llmProvider;
|
|
84
|
-
this.log('LLMFacade initialized with pluggable LLM provider');
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
/**
|
|
88
|
-
* Set token usage callback for tracking
|
|
89
|
-
*/
|
|
90
|
-
setTokenUsageCallback(callback: (inputTokens: number, outputTokens: number, includesImage: boolean) => void): void {
|
|
91
|
-
this.tokenUsageCallback = callback;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* Set a logger callback for capturing execution logs
|
|
96
|
-
*/
|
|
97
|
-
setLogger(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void {
|
|
98
|
-
this.logger = logger;
|
|
99
|
-
this.llmProvider.setLogger?.(logger);
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Log a message using the configured logger
|
|
104
|
-
*/
|
|
105
|
-
private log(message: string, level: 'log' | 'error' | 'warn' = 'log'): void {
|
|
106
|
-
if (this.logger) {
|
|
107
|
-
this.logger(message, level);
|
|
108
|
-
}
|
|
109
|
-
// Console fallback for debug visibility
|
|
110
|
-
if (level === 'error') {
|
|
111
|
-
console.error(message);
|
|
112
|
-
} else if (level === 'warn') {
|
|
113
|
-
console.warn(message);
|
|
114
|
-
} else {
|
|
115
|
-
console.log(message);
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
private async callLLM(request: LLMRequest): Promise<LLMResponse> {
|
|
120
|
-
try {
|
|
121
|
-
const response = await this.llmProvider.callLLM(request);
|
|
122
|
-
|
|
123
|
-
// Report token usage if callback is set
|
|
124
|
-
if (response.usage && this.tokenUsageCallback) {
|
|
125
|
-
this.tokenUsageCallback(
|
|
126
|
-
response.usage.inputTokens,
|
|
127
|
-
response.usage.outputTokens,
|
|
128
|
-
!!request.imageUrl
|
|
129
|
-
);
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
return response;
|
|
133
|
-
} catch (error: any) {
|
|
134
|
-
// Let provider handle its own error messages, just re-throw
|
|
135
|
-
this.log(`LLM call failed: ${error}`, 'error');
|
|
136
|
-
throw error;
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
/**
|
|
141
|
-
* Generate a test name from scenario description
|
|
142
|
-
*/
|
|
143
|
-
async generateTestName(scenario: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<string> {
|
|
144
|
-
this.log('Generating test name with LLM...');
|
|
145
|
-
|
|
146
|
-
const request: LLMRequest = {
|
|
147
|
-
model,
|
|
148
|
-
systemPrompt: PROMPTS.TEST_NAME_GENERATION.SYSTEM,
|
|
149
|
-
userPrompt: PROMPTS.TEST_NAME_GENERATION.USER(scenario)
|
|
150
|
-
};
|
|
151
|
-
|
|
152
|
-
try {
|
|
153
|
-
const response = await this.callLLM(request);
|
|
154
|
-
const testNameResponse = JSON.parse(response.answer) as LLMTestNameResponse;
|
|
155
|
-
return testNameResponse.testName;
|
|
156
|
-
} catch (error) {
|
|
157
|
-
this.log(`Failed to generate test name: ${error}`, 'error');
|
|
158
|
-
// Fallback to a simple generated name
|
|
159
|
-
return `Test: ${scenario.substring(0, 50)}...`;
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
/**
|
|
164
|
-
* Generate hashtags for semantic grouping
|
|
165
|
-
*/
|
|
166
|
-
async generateHashtags(scenario: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<string[]> {
|
|
167
|
-
this.log('Generating hashtags with LLM...');
|
|
168
|
-
|
|
169
|
-
const request: LLMRequest = {
|
|
170
|
-
model,
|
|
171
|
-
systemPrompt: PROMPTS.HASHTAG_GENERATION.SYSTEM,
|
|
172
|
-
userPrompt: PROMPTS.HASHTAG_GENERATION.USER(scenario)
|
|
173
|
-
};
|
|
174
|
-
|
|
175
|
-
try {
|
|
176
|
-
const response = await this.callLLM(request);
|
|
177
|
-
const hashtagResponse = JSON.parse(response.answer) as { hashtags: string[] };
|
|
178
|
-
return hashtagResponse.hashtags || [];
|
|
179
|
-
} catch (error) {
|
|
180
|
-
this.log(`Failed to generate hashtags: ${error}`, 'error');
|
|
181
|
-
// Fallback to empty array
|
|
182
|
-
return [];
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
/**
|
|
187
|
-
* Check if a goal has been completed based on actions taken and current page state
|
|
188
|
-
*/
|
|
189
|
-
async checkGoalCompletion(
|
|
190
|
-
goalDescription: string,
|
|
191
|
-
completedActions: string[],
|
|
192
|
-
pageInfo: any,
|
|
193
|
-
model: string = DEFAULT_MODEL
|
|
194
|
-
): Promise<GoalCompletionResponse> {
|
|
195
|
-
this.log('Checking goal completion with LLM...');
|
|
196
|
-
|
|
197
|
-
const request: LLMRequest = {
|
|
198
|
-
model,
|
|
199
|
-
systemPrompt: PROMPTS.GOAL_COMPLETION_CHECK.SYSTEM,
|
|
200
|
-
userPrompt: PROMPTS.GOAL_COMPLETION_CHECK.USER(goalDescription, completedActions, pageInfo)
|
|
201
|
-
};
|
|
202
|
-
|
|
203
|
-
try {
|
|
204
|
-
const response = await this.callLLM(request);
|
|
205
|
-
return JSON.parse(response.answer) as GoalCompletionResponse;
|
|
206
|
-
} catch (error) {
|
|
207
|
-
this.log(`Failed to check goal completion: ${error}`, 'error');
|
|
208
|
-
// Conservative fallback - assume not complete if we can't determine
|
|
209
|
-
return {
|
|
210
|
-
isComplete: false,
|
|
211
|
-
reason: 'Error checking completion status'
|
|
212
|
-
};
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
/**
|
|
217
|
-
* Check goal completion with visual verification (uses vision model)
|
|
218
|
-
*/
|
|
219
|
-
async checkGoalCompletionWithVision(
|
|
220
|
-
goalDescription: string,
|
|
221
|
-
completedActions: string[],
|
|
222
|
-
pageInfo: any,
|
|
223
|
-
imageDataUrl: string,
|
|
224
|
-
model: string = VISION_MODEL
|
|
225
|
-
): Promise<GoalCompletionResponse> {
|
|
226
|
-
this.log(`👔 Checking goal completion with vision (${model})...`);
|
|
227
|
-
|
|
228
|
-
const request: LLMRequest = {
|
|
229
|
-
model,
|
|
230
|
-
systemPrompt: `You are checking if a test automation goal has been completed by analyzing both DOM state and visual appearance.
|
|
231
|
-
|
|
232
|
-
CRITICAL: For action goals (login, submit, click, navigate), check if the PRIMARY ACTION and its SIDE EFFECTS are complete:
|
|
233
|
-
- "Login" = Fill fields AND click button AND verify navigation/page change
|
|
234
|
-
- "Submit form" = Fill fields AND click submit AND verify submission (success message/page change)
|
|
235
|
-
- "Click X" = Click X AND verify expected page change or UI update
|
|
236
|
-
|
|
237
|
-
For verification goals (verify, check, confirm), verify the VISUAL PRESENCE of expected elements.`,
|
|
238
|
-
userPrompt: `GOAL: ${goalDescription}
|
|
239
|
-
|
|
240
|
-
ACTIONS COMPLETED:
|
|
241
|
-
${completedActions.map((action, i) => `${i + 1}. ${action}`).join('\n')}
|
|
242
|
-
|
|
243
|
-
CURRENT PAGE STATE:
|
|
244
|
-
URL: ${pageInfo.url}
|
|
245
|
-
Title: ${pageInfo.title}
|
|
246
|
-
Interactive Elements:
|
|
247
|
-
${pageInfo.formattedElements}
|
|
248
|
-
|
|
249
|
-
Based on the screenshot AND page state, is this goal COMPLETE?
|
|
250
|
-
|
|
251
|
-
Respond ONLY with valid JSON:
|
|
252
|
-
{
|
|
253
|
-
"isComplete": true/false,
|
|
254
|
-
"reason": "Brief explanation based on what you SEE in the screenshot and DOM",
|
|
255
|
-
"nextSubGoal": "If incomplete, what specific next action is needed?"
|
|
256
|
-
}`,
|
|
257
|
-
imageUrl: imageDataUrl
|
|
258
|
-
};
|
|
259
|
-
|
|
260
|
-
try {
|
|
261
|
-
const response = await this.callLLM(request);
|
|
262
|
-
const parsed = JSON.parse(response.answer) as GoalCompletionResponse;
|
|
263
|
-
this.log(`👔 Vision goal check result: ${parsed.isComplete ? 'COMPLETE ✅' : 'INCOMPLETE ❌'}`);
|
|
264
|
-
return parsed;
|
|
265
|
-
} catch (error) {
|
|
266
|
-
this.log(`Failed to check goal completion with vision: ${error}`, 'error');
|
|
267
|
-
// Conservative fallback - assume not complete if we can't determine
|
|
268
|
-
return {
|
|
269
|
-
isComplete: false,
|
|
270
|
-
reason: 'Error checking completion status with vision'
|
|
271
|
-
};
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
/**
|
|
276
|
-
* Ask LLM if a screenshot would help debug the current failure
|
|
277
|
-
*/
|
|
278
|
-
async assessScreenshotNeed(
|
|
279
|
-
stepDescription: string,
|
|
280
|
-
errorMessage: string,
|
|
281
|
-
attemptCount: number,
|
|
282
|
-
pageInfo: any,
|
|
283
|
-
model: string = DEFAULT_SIMPLER_MODEL
|
|
284
|
-
): Promise<ScreenshotNeedResponse> {
|
|
285
|
-
this.log('Assessing screenshot need with LLM...');
|
|
286
|
-
|
|
287
|
-
const request: LLMRequest = {
|
|
288
|
-
model,
|
|
289
|
-
systemPrompt: PROMPTS.SCREENSHOT_NEED_ASSESSMENT.SYSTEM,
|
|
290
|
-
userPrompt: PROMPTS.SCREENSHOT_NEED_ASSESSMENT.USER(stepDescription, errorMessage, attemptCount, pageInfo)
|
|
291
|
-
};
|
|
292
|
-
|
|
293
|
-
try {
|
|
294
|
-
const response = await this.callLLM(request);
|
|
295
|
-
return JSON.parse(response.answer) as ScreenshotNeedResponse;
|
|
296
|
-
} catch (error) {
|
|
297
|
-
this.log(`Failed to assess screenshot need: ${error}`, 'error');
|
|
298
|
-
// Conservative fallback - don't use expensive screenshot unless we're sure
|
|
299
|
-
return {
|
|
300
|
-
needsScreenshot: false,
|
|
301
|
-
reason: 'Error assessing need, defaulting to no screenshot'
|
|
302
|
-
};
|
|
303
|
-
}
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
/**
|
|
307
|
-
* Get diagnostic analysis from screenshot (supervisor role)
|
|
308
|
-
*/
|
|
309
|
-
async getVisionDiagnostics(
|
|
310
|
-
stepDescription: string,
|
|
311
|
-
pageInfo: any,
|
|
312
|
-
previousSteps: any[],
|
|
313
|
-
lastError: string | undefined,
|
|
314
|
-
imageDataUrl: string,
|
|
315
|
-
model: string = VISION_MODEL
|
|
316
|
-
): Promise<VisionDiagnosticResponse> {
|
|
317
|
-
this.log('👔 SUPERVISOR: Analyzing screenshot for diagnostic insights...');
|
|
318
|
-
|
|
319
|
-
const previousCommands = previousSteps
|
|
320
|
-
.map(s => s.playwrightCommand)
|
|
321
|
-
.filter(Boolean)
|
|
322
|
-
.join('\n');
|
|
323
|
-
|
|
324
|
-
const attemptHistory = previousSteps.length > 0
|
|
325
|
-
? `Previous attempts context: ${previousSteps.length} commands executed`
|
|
326
|
-
: '';
|
|
327
|
-
|
|
328
|
-
const errorContext = lastError
|
|
329
|
-
? `Last Error: ${lastError}`
|
|
330
|
-
: '';
|
|
331
|
-
|
|
332
|
-
const request: LLMRequest = {
|
|
333
|
-
model,
|
|
334
|
-
systemPrompt: PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.SYSTEM,
|
|
335
|
-
userPrompt: PROMPTS.VISION_DIAGNOSTIC_ANALYSIS.USER(
|
|
336
|
-
stepDescription,
|
|
337
|
-
pageInfo,
|
|
338
|
-
previousCommands,
|
|
339
|
-
attemptHistory,
|
|
340
|
-
errorContext
|
|
341
|
-
),
|
|
342
|
-
imageUrl: imageDataUrl
|
|
343
|
-
};
|
|
344
|
-
|
|
345
|
-
try {
|
|
346
|
-
const response = await this.callLLM(request);
|
|
347
|
-
const diagnostics = JSON.parse(response.answer) as VisionDiagnosticResponse;
|
|
348
|
-
|
|
349
|
-
// Log supervisor's findings
|
|
350
|
-
this.log(`👔 SUPERVISOR ANALYSIS:`);
|
|
351
|
-
this.log(` 📸 Visual: ${diagnostics.visualAnalysis}`);
|
|
352
|
-
this.log(` 🔍 Root cause: ${diagnostics.rootCause}`);
|
|
353
|
-
this.log(` 📋 Instructions: ${diagnostics.specificInstructions}`);
|
|
354
|
-
this.log(` 💡 Approach: ${diagnostics.recommendedApproach}`);
|
|
355
|
-
if (diagnostics.elementsFound?.length > 0) {
|
|
356
|
-
this.log(` ✅ Elements found: ${diagnostics.elementsFound.join(', ')}`);
|
|
357
|
-
}
|
|
358
|
-
if (diagnostics.elementsNotFound?.length > 0) {
|
|
359
|
-
this.log(` ❌ Elements NOT found: ${diagnostics.elementsNotFound.join(', ')}`);
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
return diagnostics;
|
|
363
|
-
} catch (error) {
|
|
364
|
-
this.log(`Failed to get vision diagnostics: ${error}`, 'error');
|
|
365
|
-
throw new Error(`Vision diagnostic analysis failed: ${error}`);
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
/**
|
|
370
|
-
* Generate command based on supervisor's instructions
|
|
371
|
-
*/
|
|
372
|
-
async generateCommandFromSupervisorInstructions(
|
|
373
|
-
stepDescription: string,
|
|
374
|
-
supervisorDiagnostics: VisionDiagnosticResponse,
|
|
375
|
-
pageInfo: any,
|
|
376
|
-
model: string = DEFAULT_MODEL
|
|
377
|
-
): Promise<string> {
|
|
378
|
-
this.log('🔨 WORKER: Generating command based on supervisor instructions...');
|
|
379
|
-
|
|
380
|
-
const request: LLMRequest = {
|
|
381
|
-
model,
|
|
382
|
-
systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.SYSTEM,
|
|
383
|
-
userPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_SUPERVISOR.USER(
|
|
384
|
-
stepDescription,
|
|
385
|
-
supervisorDiagnostics.specificInstructions,
|
|
386
|
-
supervisorDiagnostics.visualAnalysis,
|
|
387
|
-
supervisorDiagnostics.elementsFound || [],
|
|
388
|
-
supervisorDiagnostics.elementsNotFound || [],
|
|
389
|
-
pageInfo
|
|
390
|
-
)
|
|
391
|
-
};
|
|
392
|
-
|
|
393
|
-
try {
|
|
394
|
-
const response = await this.callLLM(request);
|
|
395
|
-
const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse;
|
|
396
|
-
|
|
397
|
-
if (commandResponse.reasoning) {
|
|
398
|
-
this.log(`🔨 WORKER reasoning: ${commandResponse.reasoning}`);
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
return commandResponse.command;
|
|
402
|
-
} catch (error) {
|
|
403
|
-
this.log(`Failed to generate command from supervisor instructions: ${error}`, 'error');
|
|
404
|
-
throw new Error(`Command generation from supervisor instructions failed: ${error}`);
|
|
405
|
-
}
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
/**
|
|
409
|
-
* Generate Playwright command with vision (uses vision model)
|
|
410
|
-
*/
|
|
411
|
-
async generatePlaywrightCommandWithVision(
|
|
412
|
-
stepDescription: string,
|
|
413
|
-
pageInfo: any,
|
|
414
|
-
previousSteps: any[],
|
|
415
|
-
lastError: string | undefined,
|
|
416
|
-
imageDataUrl: string, // Full data URL: data:image/png;base64,...
|
|
417
|
-
model: string = VISION_MODEL
|
|
418
|
-
): Promise<string> {
|
|
419
|
-
this.log(`⚠️ USING VISION MODE (${model})...`);
|
|
420
|
-
|
|
421
|
-
const previousCommands = previousSteps
|
|
422
|
-
.map(s => s.playwrightCommand)
|
|
423
|
-
.filter(Boolean)
|
|
424
|
-
.join('\n');
|
|
425
|
-
|
|
426
|
-
const attemptHistory = previousSteps.length > 0
|
|
427
|
-
? `Previous attempts context: ${previousSteps.length} commands executed`
|
|
428
|
-
: '';
|
|
429
|
-
|
|
430
|
-
const errorContext = lastError
|
|
431
|
-
? `Last Error: ${lastError}`
|
|
432
|
-
: '';
|
|
433
|
-
|
|
434
|
-
const request: LLMRequest = {
|
|
435
|
-
model,
|
|
436
|
-
systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.SYSTEM,
|
|
437
|
-
userPrompt: PROMPTS.PLAYWRIGHT_COMMAND_WITH_VISION.USER(
|
|
438
|
-
stepDescription,
|
|
439
|
-
pageInfo,
|
|
440
|
-
previousCommands,
|
|
441
|
-
attemptHistory,
|
|
442
|
-
errorContext
|
|
443
|
-
),
|
|
444
|
-
imageUrl: imageDataUrl // Full data URL constructed by client
|
|
445
|
-
};
|
|
446
|
-
|
|
447
|
-
try {
|
|
448
|
-
const response = await this.callLLM(request);
|
|
449
|
-
const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse & {
|
|
450
|
-
visualInsights?: string;
|
|
451
|
-
failureRootCause?: string;
|
|
452
|
-
recommendedAlternative?: string;
|
|
453
|
-
};
|
|
454
|
-
|
|
455
|
-
// Log diagnostic insights from vision analysis
|
|
456
|
-
if (commandResponse.visualInsights) {
|
|
457
|
-
this.log(`📸 Visual insights: ${commandResponse.visualInsights}`);
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
if (commandResponse.failureRootCause) {
|
|
461
|
-
this.log(`🔍 Root cause analysis: ${commandResponse.failureRootCause}`);
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
if (commandResponse.recommendedAlternative) {
|
|
465
|
-
this.log(`💡 Recommended alternative: ${commandResponse.recommendedAlternative}`);
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
if (commandResponse.reasoning) {
|
|
469
|
-
this.log(`🧠 Vision-based reasoning: ${commandResponse.reasoning}`);
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
return commandResponse.command;
|
|
473
|
-
} catch (error) {
|
|
474
|
-
this.log(`Failed to generate command with vision: ${error}`, 'error');
|
|
475
|
-
throw new Error(`Vision-enhanced command generation failed: ${error}`);
|
|
476
|
-
}
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
/**
|
|
480
|
-
* Break down scenario into steps
|
|
481
|
-
*/
|
|
482
|
-
async breakdownScenario(scenario: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<ScenarioStep[]> {
|
|
483
|
-
this.log('Breaking down scenario with LLM...');
|
|
484
|
-
this.log(`📝 INPUT SCENARIO: ${scenario}`);
|
|
485
|
-
|
|
486
|
-
const request: LLMRequest = {
|
|
487
|
-
model,
|
|
488
|
-
systemPrompt: PROMPTS.SCENARIO_BREAKDOWN.SYSTEM,
|
|
489
|
-
userPrompt: PROMPTS.SCENARIO_BREAKDOWN.USER(scenario)
|
|
490
|
-
};
|
|
491
|
-
|
|
492
|
-
try {
|
|
493
|
-
const response = await this.callLLM(request);
|
|
494
|
-
this.log(`🤖 RAW LLM RESPONSE: ${response.answer}`);
|
|
495
|
-
|
|
496
|
-
const breakdownResponse = JSON.parse(response.answer) as LLMScenarioBreakdownResponse;
|
|
497
|
-
this.log(`📋 PARSED BREAKDOWN: ${JSON.stringify(breakdownResponse, null, 2)}`);
|
|
498
|
-
|
|
499
|
-
// Validate and clean up steps
|
|
500
|
-
const cleanedSteps = breakdownResponse.steps
|
|
501
|
-
.map(step => step.trim())
|
|
502
|
-
.filter(step => step.length > 0)
|
|
503
|
-
.slice(0, 10); // Limit to 10 steps max
|
|
504
|
-
|
|
505
|
-
this.log(`✅ CLEANED STEPS: ${JSON.stringify(cleanedSteps, null, 2)}`);
|
|
506
|
-
|
|
507
|
-
return cleanedSteps.map((desc, index) => ({
|
|
508
|
-
stepNumber: index + 1,
|
|
509
|
-
description: desc,
|
|
510
|
-
}));
|
|
511
|
-
} catch (error) {
|
|
512
|
-
this.log(`❌ Failed to breakdown scenario: ${error}`, 'error');
|
|
513
|
-
// Fallback to simple breakdown by newlines (preserves URLs)
|
|
514
|
-
const stepDescriptions = scenario.split('\n').map(s => s.trim()).filter(s => s.length > 0);
|
|
515
|
-
this.log(`🔄 FALLBACK STEPS: ${JSON.stringify(stepDescriptions, null, 2)}`);
|
|
516
|
-
return stepDescriptions.map((desc, index) => ({
|
|
517
|
-
stepNumber: index + 1,
|
|
518
|
-
description: desc,
|
|
519
|
-
}));
|
|
520
|
-
}
|
|
521
|
-
}
|
|
522
|
-
|
|
523
|
-
/**
|
|
524
|
-
* Generate Playwright command for a step
|
|
525
|
-
*/
|
|
526
|
-
async generatePlaywrightCommand(
|
|
527
|
-
stepDescription: string,
|
|
528
|
-
pageInfo: PageInfo,
|
|
529
|
-
previousSteps: ScenarioStep[],
|
|
530
|
-
lastError?: string,
|
|
531
|
-
currentStep?: ScenarioStep,
|
|
532
|
-
model: string = DEFAULT_MODEL
|
|
533
|
-
): Promise<string | null> {
|
|
534
|
-
this.log('Generating Playwright command with LLM...');
|
|
535
|
-
|
|
536
|
-
const previousCommands = previousSteps
|
|
537
|
-
.filter(s => s.playwrightCommand && s.success)
|
|
538
|
-
.map(s => `// Step ${s.stepNumber}: ${s.description}\n${s.playwrightCommand}`)
|
|
539
|
-
.join('\n');
|
|
540
|
-
|
|
541
|
-
// Build comprehensive attempt history for current step
|
|
542
|
-
const attemptHistory = this.buildAttemptHistory(currentStep);
|
|
543
|
-
|
|
544
|
-
// Provide raw error context for LLM analysis
|
|
545
|
-
const errorContext = this.buildErrorContext(lastError, currentStep);
|
|
546
|
-
|
|
547
|
-
const prompt = PROMPTS.PLAYWRIGHT_COMMAND.USER(
|
|
548
|
-
stepDescription,
|
|
549
|
-
pageInfo,
|
|
550
|
-
previousCommands,
|
|
551
|
-
attemptHistory,
|
|
552
|
-
errorContext
|
|
553
|
-
);
|
|
554
|
-
|
|
555
|
-
const request: LLMRequest = {
|
|
556
|
-
model,
|
|
557
|
-
systemPrompt: PROMPTS.PLAYWRIGHT_COMMAND.SYSTEM,
|
|
558
|
-
userPrompt: prompt
|
|
559
|
-
};
|
|
560
|
-
|
|
561
|
-
try {
|
|
562
|
-
const response = await this.callLLM(request);
|
|
563
|
-
const commandResponse = JSON.parse(response.answer) as LLMPlaywrightCommandResponse;
|
|
564
|
-
return commandResponse.command;
|
|
565
|
-
} catch (error) {
|
|
566
|
-
this.log(`Failed to generate Playwright command: ${error}`, 'error');
|
|
567
|
-
return null;
|
|
568
|
-
}
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
/**
|
|
572
|
-
* Parse script into steps for AI repair
|
|
573
|
-
*/
|
|
574
|
-
async parseScriptIntoSteps(script: string, model: string = DEFAULT_SIMPLER_MODEL): Promise<Array<{ description: string; code: string; success?: boolean; error?: string }>> {
|
|
575
|
-
this.log('Parsing script into steps with LLM...');
|
|
576
|
-
|
|
577
|
-
const request: LLMRequest = {
|
|
578
|
-
model,
|
|
579
|
-
systemPrompt: PROMPTS.SCRIPT_PARSING.SYSTEM,
|
|
580
|
-
userPrompt: PROMPTS.SCRIPT_PARSING.USER(script)
|
|
581
|
-
};
|
|
582
|
-
|
|
583
|
-
try {
|
|
584
|
-
const response = await this.callLLM(request);
|
|
585
|
-
this.log(`Raw LLM parsing response (first 500 chars): ${response.answer.substring(0, 500)}`);
|
|
586
|
-
|
|
587
|
-
const parsed = JSON.parse(response.answer);
|
|
588
|
-
this.log(`Parsed JSON structure: ${JSON.stringify(parsed, null, 2).substring(0, 1000)}`);
|
|
589
|
-
|
|
590
|
-
// Expect JSON object with steps array
|
|
591
|
-
if (parsed.steps && Array.isArray(parsed.steps)) {
|
|
592
|
-
this.log(`LLM parsing successful, got ${parsed.steps.length} steps`);
|
|
593
|
-
return parsed.steps;
|
|
594
|
-
} else {
|
|
595
|
-
this.log(`Unexpected LLM response format - expected {steps: [...]}: ${JSON.stringify(parsed)}`, 'error');
|
|
596
|
-
return [];
|
|
597
|
-
}
|
|
598
|
-
} catch (error) {
|
|
599
|
-
this.log(`Failed to parse LLM response as JSON: ${error}`, 'error');
|
|
600
|
-
return [];
|
|
601
|
-
}
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
/**
|
|
605
|
-
* Get repair suggestion for a failing step
|
|
606
|
-
*/
|
|
607
|
-
async getRepairSuggestion(
|
|
608
|
-
stepDescription: string,
|
|
609
|
-
stepCode: string,
|
|
610
|
-
errorMessage: string,
|
|
611
|
-
pageInfo: PageInfo,
|
|
612
|
-
failureHistory: string,
|
|
613
|
-
recentRepairs: string,
|
|
614
|
-
model: string = DEFAULT_MODEL
|
|
615
|
-
): Promise<RepairSuggestionResponse> {
|
|
616
|
-
const request: LLMRequest = {
|
|
617
|
-
model,
|
|
618
|
-
systemPrompt: PROMPTS.REPAIR_SUGGESTION.SYSTEM,
|
|
619
|
-
userPrompt: PROMPTS.REPAIR_SUGGESTION.USER(
|
|
620
|
-
stepDescription,
|
|
621
|
-
stepCode,
|
|
622
|
-
errorMessage,
|
|
623
|
-
pageInfo,
|
|
624
|
-
failureHistory,
|
|
625
|
-
recentRepairs,
|
|
626
|
-
)
|
|
627
|
-
};
|
|
628
|
-
|
|
629
|
-
const response = await this.callLLM(request);
|
|
630
|
-
this.log(`🤖 LLM Repair Response: ${response.answer}`);
|
|
631
|
-
const parsed = JSON.parse(response.answer) as any;
|
|
632
|
-
this.log(`🤖 Parsed Repair Action: ${JSON.stringify(parsed)}`);
|
|
633
|
-
|
|
634
|
-
// Convert string operation to enum
|
|
635
|
-
if (parsed.action && parsed.action.operation) {
|
|
636
|
-
switch (parsed.action.operation) {
|
|
637
|
-
case 'MODIFY':
|
|
638
|
-
parsed.action.operation = StepOperation.MODIFY;
|
|
639
|
-
break;
|
|
640
|
-
case 'INSERT':
|
|
641
|
-
parsed.action.operation = StepOperation.INSERT;
|
|
642
|
-
break;
|
|
643
|
-
case 'REMOVE':
|
|
644
|
-
parsed.action.operation = StepOperation.REMOVE;
|
|
645
|
-
break;
|
|
646
|
-
default:
|
|
647
|
-
parsed.action.operation = StepOperation.MODIFY;
|
|
648
|
-
}
|
|
649
|
-
}
|
|
650
|
-
|
|
651
|
-
return parsed as RepairSuggestionResponse;
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
/**
|
|
655
|
-
* Assess repair confidence and generate advice
|
|
656
|
-
*/
|
|
657
|
-
async assessRepairConfidence(
|
|
658
|
-
originalScript: string,
|
|
659
|
-
updatedScript: string,
|
|
660
|
-
model: string = DEFAULT_SIMPLER_MODEL
|
|
661
|
-
): Promise<RepairConfidenceResponse> {
|
|
662
|
-
const request: LLMRequest = {
|
|
663
|
-
model,
|
|
664
|
-
systemPrompt: PROMPTS.REPAIR_CONFIDENCE.SYSTEM,
|
|
665
|
-
userPrompt: PROMPTS.REPAIR_CONFIDENCE.USER(originalScript, updatedScript)
|
|
666
|
-
};
|
|
667
|
-
|
|
668
|
-
const response = await this.callLLM(request);
|
|
669
|
-
return JSON.parse(response.answer) as RepairConfidenceResponse;
|
|
670
|
-
}
|
|
671
|
-
|
|
672
|
-
/**
|
|
673
|
-
* Generate final script with repair advice
|
|
674
|
-
*/
|
|
675
|
-
async generateFinalScript(
|
|
676
|
-
originalScript: string,
|
|
677
|
-
updatedScript: string,
|
|
678
|
-
newRepairAdvice: string,
|
|
679
|
-
model: string = DEFAULT_SIMPLER_MODEL
|
|
680
|
-
): Promise<string> {
|
|
681
|
-
const request: LLMRequest = {
|
|
682
|
-
model,
|
|
683
|
-
systemPrompt: PROMPTS.FINAL_SCRIPT.SYSTEM,
|
|
684
|
-
userPrompt: PROMPTS.FINAL_SCRIPT.USER(originalScript, updatedScript, newRepairAdvice)
|
|
685
|
-
};
|
|
686
|
-
|
|
687
|
-
const response = await this.callLLM(request);
|
|
688
|
-
try {
|
|
689
|
-
const parsed = JSON.parse(response.answer);
|
|
690
|
-
return parsed.script || updatedScript;
|
|
691
|
-
} catch (error) {
|
|
692
|
-
this.log(`Failed to parse final script response: ${error}`, 'error');
|
|
693
|
-
return updatedScript;
|
|
694
|
-
}
|
|
695
|
-
}
|
|
696
|
-
|
|
697
|
-
/**
|
|
698
|
-
* Build attempt history for current step
|
|
699
|
-
*/
|
|
700
|
-
private buildAttemptHistory(currentStep?: ScenarioStep): string {
|
|
701
|
-
if (!currentStep || !currentStep.attempts || currentStep.attempts.length === 0) {
|
|
702
|
-
return 'This is the first attempt for this step.';
|
|
703
|
-
}
|
|
704
|
-
|
|
705
|
-
const attempts = currentStep.attempts.map((attempt, index) => {
|
|
706
|
-
const status = attempt.success ? '✅ SUCCESS' : '❌ FAILED';
|
|
707
|
-
return `Attempt ${attempt.attemptNumber} (${status}):
|
|
708
|
-
Command: ${attempt.command || 'No command generated'}
|
|
709
|
-
${attempt.error ? `Error: ${attempt.error}` : 'No error'}
|
|
710
|
-
Timestamp: ${new Date(attempt.timestamp).toISOString()}`;
|
|
711
|
-
}).join('\n\n');
|
|
712
|
-
|
|
713
|
-
return `Current step attempt history:
|
|
714
|
-
${attempts}
|
|
715
|
-
|
|
716
|
-
LEARNING FROM FAILURES:
|
|
717
|
-
- Analyze what went wrong in each attempt
|
|
718
|
-
- Try completely different approaches for failed attempts
|
|
719
|
-
- If a selector failed, try alternative selectors
|
|
720
|
-
- If timing failed, add proper waits
|
|
721
|
-
- If element not found, try different strategies`;
|
|
722
|
-
}
|
|
723
|
-
|
|
724
|
-
/**
|
|
725
|
-
* Build error context for LLM analysis
|
|
726
|
-
*/
|
|
727
|
-
private buildErrorContext(lastError?: string, currentStep?: ScenarioStep): string {
|
|
728
|
-
if (!lastError && (!currentStep || !currentStep.error)) {
|
|
729
|
-
return '';
|
|
730
|
-
}
|
|
731
|
-
|
|
732
|
-
const errors = [];
|
|
733
|
-
if (lastError) errors.push(lastError);
|
|
734
|
-
if (currentStep?.error) errors.push(currentStep.error);
|
|
735
|
-
|
|
736
|
-
const errorText = errors.join(' | ');
|
|
737
|
-
|
|
738
|
-
// Detect if we're repeatedly looking for elements that don't exist
|
|
739
|
-
const attemptedCommands = currentStep?.attempts
|
|
740
|
-
?.map(a => a.command)
|
|
741
|
-
.filter(Boolean) || [];
|
|
742
|
-
|
|
743
|
-
const lookingForNonExistent = attemptedCommands.some(cmd =>
|
|
744
|
-
cmd?.includes('getByText') ||
|
|
745
|
-
cmd?.includes('toBeVisible') ||
|
|
746
|
-
cmd?.includes('waitFor')
|
|
747
|
-
) && errors.some(err =>
|
|
748
|
-
err.includes('not found') ||
|
|
749
|
-
err.includes('Timeout') ||
|
|
750
|
-
err.includes('Expected: visible')
|
|
751
|
-
);
|
|
752
|
-
|
|
753
|
-
let hallucinationWarning = '';
|
|
754
|
-
if (lookingForNonExistent && attemptedCommands.length >= 2) {
|
|
755
|
-
hallucinationWarning = `
|
|
756
|
-
⚠️ HALLUCINATION ALERT:
|
|
757
|
-
You've made ${attemptedCommands.length} attempts trying to find/verify elements that don't exist.
|
|
758
|
-
STOP looking for these elements. They are NOT in the DOM.
|
|
759
|
-
Instead:
|
|
760
|
-
- Check if the goal is ALREADY COMPLETE (action succeeded = goal done)
|
|
761
|
-
- Use alternative verification (state changes, network, page load)
|
|
762
|
-
- Move on if the primary action succeeded
|
|
763
|
-
`;
|
|
764
|
-
}
|
|
765
|
-
|
|
766
|
-
return `ERROR CONTEXT:
|
|
767
|
-
Last Error: ${errorText}
|
|
768
|
-
${hallucinationWarning}
|
|
769
|
-
ANALYZE THE ERROR AND ADAPT:
|
|
770
|
-
- Study the error message to understand what went wrong
|
|
771
|
-
- If element "not found" after 2+ attempts, it probably doesn't exist - stop looking for it
|
|
772
|
-
- Try a completely different approach than what failed
|
|
773
|
-
- Consider alternative selectors, timing, or interaction methods
|
|
774
|
-
- Never repeat the exact same command that failed`;
|
|
775
|
-
}
|
|
776
|
-
|
|
777
|
-
/**
|
|
778
|
-
* Cleanup generated script - remove redundancies and make minor adjustments
|
|
779
|
-
*/
|
|
780
|
-
async cleanupScript(script: string, model?: string): Promise<{ script: string; changes: string[]; skipped?: string }> {
|
|
781
|
-
try {
|
|
782
|
-
const response = await this.llmProvider.callLLM({
|
|
783
|
-
model: model || DEFAULT_MODEL,
|
|
784
|
-
systemPrompt: PROMPTS.SCRIPT_CLEANUP.SYSTEM,
|
|
785
|
-
userPrompt: PROMPTS.SCRIPT_CLEANUP.USER(script)
|
|
786
|
-
});
|
|
787
|
-
|
|
788
|
-
// Parse JSON response
|
|
789
|
-
const jsonMatch = response.answer.match(/\{[\s\S]*\}/);
|
|
790
|
-
if (!jsonMatch) {
|
|
791
|
-
console.log('[LLMFacade] Cleanup response not in JSON format, returning original script');
|
|
792
|
-
return { script, changes: [], skipped: 'Response not in JSON format' };
|
|
793
|
-
}
|
|
794
|
-
|
|
795
|
-
const parsed = JSON.parse(jsonMatch[0]);
|
|
796
|
-
|
|
797
|
-
// Validate response
|
|
798
|
-
if (!parsed.script) {
|
|
799
|
-
console.log('[LLMFacade] Cleanup response missing script field, returning original');
|
|
800
|
-
return { script, changes: [], skipped: 'Invalid response format' };
|
|
801
|
-
}
|
|
802
|
-
|
|
803
|
-
console.log(`[LLMFacade] Script cleanup completed. Changes: ${parsed.changes?.length || 0}`);
|
|
804
|
-
if (parsed.changes && parsed.changes.length > 0) {
|
|
805
|
-
parsed.changes.forEach((change: string, i: number) => {
|
|
806
|
-
console.log(`[LLMFacade] ${i + 1}. ${change}`);
|
|
807
|
-
});
|
|
808
|
-
}
|
|
809
|
-
|
|
810
|
-
return {
|
|
811
|
-
script: parsed.script,
|
|
812
|
-
changes: parsed.changes || [],
|
|
813
|
-
skipped: parsed.skipped
|
|
814
|
-
};
|
|
815
|
-
} catch (error: any) {
|
|
816
|
-
console.error('[LLMFacade] Script cleanup failed:', error.message);
|
|
817
|
-
// Return original script on error
|
|
818
|
-
return { script, changes: [], skipped: `Error: ${error.message}` };
|
|
819
|
-
}
|
|
820
|
-
}
|
|
821
|
-
}
|