skopix 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +65 -0
- package/.github/workflows/docker.yml +78 -0
- package/cli/commands/agent.js +378 -0
- package/cli/commands/config.js +67 -0
- package/cli/commands/dashboard.js +3524 -0
- package/cli/commands/init.js +190 -0
- package/cli/commands/report.js +41 -0
- package/cli/commands/run.js +350 -0
- package/cli/index.js +85 -0
- package/cli/ui.js +126 -0
- package/core/auth.js +148 -0
- package/core/browser.js +1049 -0
- package/core/credentials.js +47 -0
- package/core/db.js +503 -0
- package/core/llm.js +641 -0
- package/core/recorder.js +653 -0
- package/core/reporter.js +282 -0
- package/core/tracker.js +768 -0
- package/package.json +54 -0
- package/web/app/index.html +5937 -0
- package/web/index.html +644 -0
- package/web/invite.html +244 -0
- package/web/login.html +271 -0
- package/web/reset.html +222 -0
- package/web/setup.html +300 -0
package/core/llm.js
ADDED
|
@@ -0,0 +1,641 @@
|
|
|
1
|
+
import dotenv from 'dotenv';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import axios from 'axios';
|
|
4
|
+
|
|
5
|
+
const envPath = path.resolve(process.cwd(), '.skopix.env');
|
|
6
|
+
dotenv.config({ path: envPath });
|
|
7
|
+
dotenv.config();
|
|
8
|
+
|
|
9
|
+
export class LLMRouter {
|
|
10
|
+
constructor(provider, modelOverride) {
|
|
11
|
+
this.provider = provider || process.env.SKOPIX_PROVIDER || 'gemini';
|
|
12
|
+
this.modelOverride = modelOverride;
|
|
13
|
+
this.modelName = null;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
async verify() {
|
|
17
|
+
switch (this.provider) {
|
|
18
|
+
case 'gemini':
|
|
19
|
+
if (!process.env.GEMINI_API_KEY) throw new Error('GEMINI_API_KEY not set. Run skopix init.');
|
|
20
|
+
this.modelName = this.modelOverride || 'gemini-2.5-flash';
|
|
21
|
+
break;
|
|
22
|
+
case 'ollama':
|
|
23
|
+
this.modelName = this.modelOverride || process.env.OLLAMA_MODEL || 'llama3.1';
|
|
24
|
+
await this._verifyOllama();
|
|
25
|
+
break;
|
|
26
|
+
case 'openai':
|
|
27
|
+
if (!process.env.OPENAI_API_KEY) throw new Error('OPENAI_API_KEY not set. Run skopix init.');
|
|
28
|
+
this.modelName = this.modelOverride || 'gpt-4o-mini';
|
|
29
|
+
break;
|
|
30
|
+
default:
|
|
31
|
+
throw new Error('Unknown provider: ' + this.provider);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async _verifyOllama() {
|
|
36
|
+
const base = process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
|
|
37
|
+
try {
|
|
38
|
+
await axios.get(base + '/api/tags', { timeout: 5000 });
|
|
39
|
+
} catch {
|
|
40
|
+
throw new Error('Cannot connect to Ollama at ' + base);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async decide({ goal, url, currentUrl, domSnapshot, stepNumber, previousSteps, credentials }) {
|
|
45
|
+
const systemPrompt = buildSystemPrompt();
|
|
46
|
+
const userPrompt = buildUserPrompt({ goal, url, currentUrl, domSnapshot, stepNumber, previousSteps, credentials });
|
|
47
|
+
|
|
48
|
+
let rawResponse;
|
|
49
|
+
const maxRetries = 3;
|
|
50
|
+
|
|
51
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
52
|
+
try {
|
|
53
|
+
switch (this.provider) {
|
|
54
|
+
case 'gemini':
|
|
55
|
+
rawResponse = await this._callGemini(systemPrompt, userPrompt);
|
|
56
|
+
break;
|
|
57
|
+
case 'ollama':
|
|
58
|
+
rawResponse = await this._callOllama(systemPrompt, userPrompt);
|
|
59
|
+
break;
|
|
60
|
+
case 'openai':
|
|
61
|
+
rawResponse = await this._callOpenAI(systemPrompt, userPrompt);
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
break;
|
|
65
|
+
} catch (err) {
|
|
66
|
+
const status = err && err.response && err.response.status;
|
|
67
|
+
const isRetryable = status === 429 || status === 503 || (err.message && (err.message.includes('429') || err.message.includes('503')));
|
|
68
|
+
if (isRetryable && attempt < maxRetries) {
|
|
69
|
+
await new Promise(r => setTimeout(r, attempt * 8000));
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
throw err;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Try to parse. On failure, retry ONCE with a tighter prompt asking for a shorter response.
|
|
77
|
+
// This recovers from genuinely-truncated JSON outputs (token limit hit, etc).
|
|
78
|
+
try {
|
|
79
|
+
return parseDecision(rawResponse);
|
|
80
|
+
} catch (parseErr) {
|
|
81
|
+
const shorterPrompt = userPrompt + '\n\nIMPORTANT: Your last response was truncated or malformed JSON. Respond with a CONCISE decision: keep reasoning under 100 chars, flag at most 1 issue this step, no markdown. Just valid compact JSON.';
|
|
82
|
+
try {
|
|
83
|
+
let retryResponse;
|
|
84
|
+
switch (this.provider) {
|
|
85
|
+
case 'gemini':
|
|
86
|
+
retryResponse = await this._callGemini(systemPrompt, shorterPrompt);
|
|
87
|
+
break;
|
|
88
|
+
case 'ollama':
|
|
89
|
+
retryResponse = await this._callOllama(systemPrompt, shorterPrompt);
|
|
90
|
+
break;
|
|
91
|
+
case 'openai':
|
|
92
|
+
retryResponse = await this._callOpenAI(systemPrompt, shorterPrompt);
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
return parseDecision(retryResponse);
|
|
96
|
+
} catch {
|
|
97
|
+
// Both attempts failed - rethrow original parse error
|
|
98
|
+
throw parseErr;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
async _callGemini(systemPrompt, userPrompt) {
|
|
104
|
+
const apiKey = process.env.GEMINI_API_KEY;
|
|
105
|
+
const model = this.modelName;
|
|
106
|
+
const url = 'https://generativelanguage.googleapis.com/v1beta/models/' + model + ':generateContent?key=' + apiKey;
|
|
107
|
+
|
|
108
|
+
const body = {
|
|
109
|
+
system_instruction: { parts: [{ text: systemPrompt }] },
|
|
110
|
+
contents: [{ parts: [{ text: userPrompt }], role: 'user' }],
|
|
111
|
+
generationConfig: {
|
|
112
|
+
temperature: 0.2,
|
|
113
|
+
maxOutputTokens: 8192,
|
|
114
|
+
thinkingConfig: { thinkingBudget: 0 },
|
|
115
|
+
},
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
const response = await axios.post(url, body, { timeout: 60000 });
|
|
119
|
+
const candidates = response.data.candidates;
|
|
120
|
+
if (candidates && candidates[0] && candidates[0].content && candidates[0].content.parts && candidates[0].content.parts[0]) {
|
|
121
|
+
return candidates[0].content.parts[0].text || '';
|
|
122
|
+
}
|
|
123
|
+
return '';
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async _callOllama(systemPrompt, userPrompt) {
|
|
127
|
+
const base = process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
|
|
128
|
+
const response = await axios.post(
|
|
129
|
+
base + '/api/generate',
|
|
130
|
+
{ model: this.modelName, system: systemPrompt, prompt: userPrompt, stream: false, options: { temperature: 0.2 } },
|
|
131
|
+
{ timeout: 60000 }
|
|
132
|
+
);
|
|
133
|
+
return response.data.response || '';
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async _callOpenAI(systemPrompt, userPrompt) {
|
|
137
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
138
|
+
const response = await axios.post(
|
|
139
|
+
'https://api.openai.com/v1/chat/completions',
|
|
140
|
+
{
|
|
141
|
+
model: this.modelName,
|
|
142
|
+
messages: [{ role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt }],
|
|
143
|
+
temperature: 0.2,
|
|
144
|
+
max_tokens: 2048,
|
|
145
|
+
},
|
|
146
|
+
{ headers: { Authorization: 'Bearer ' + apiKey }, timeout: 60000 }
|
|
147
|
+
);
|
|
148
|
+
const choices = response.data.choices;
|
|
149
|
+
if (choices && choices[0] && choices[0].message) return choices[0].message.content || '';
|
|
150
|
+
return '';
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function buildSystemPrompt() {
|
|
155
|
+
const parts = [
|
|
156
|
+
'You are Skopix, an expert QA agent that tests web applications like a skilled human tester would.',
|
|
157
|
+
'',
|
|
158
|
+
'Your job: analyse the DOM snapshot, decide the best next action(s), evaluate UI quality, flag bugs.',
|
|
159
|
+
'Respond ONLY with valid JSON. No markdown. Just JSON.',
|
|
160
|
+
'',
|
|
161
|
+
'SINGLE ACTION FORMAT:',
|
|
162
|
+
'{ "action": "...", "target": "...", "value": "...", "reasoning": "...", "observation": "...", "confidence": 8, "goalAchieved": false, "issues": [] }',
|
|
163
|
+
'',
|
|
164
|
+
'BATCH FORMAT - PREFERRED for filling multiple form fields in one turn:',
|
|
165
|
+
'{ "action": "BATCH", "actions": [ {"action":"TYPE","target":"#fullname","value":"John Smith"}, {"action":"TYPE","target":"#email","value":"john@example.com"}, {"action":"SELECT","target":"#country","value":"United States"} ], "reasoning":"...", "observation":"...", "confidence": 9, "goalAchieved": false, "issues": [] }',
|
|
166
|
+
'',
|
|
167
|
+
'WHEN TO USE BATCH (it makes tests much faster):',
|
|
168
|
+
'- 2+ form fields are visible and you know what to fill in each',
|
|
169
|
+
'- You can chain straightforward inputs without observing the result of each one',
|
|
170
|
+
'- BATCH max 8 actions per turn',
|
|
171
|
+
'- Do NOT batch CLICK actions that submit forms or navigate - keep those single',
|
|
172
|
+
'- Ideal use: filling out a form with name, email, address, etc all at once',
|
|
173
|
+
'',
|
|
174
|
+
'BATCH COMPLETENESS - CRITICAL (most common cause of test failures):',
|
|
175
|
+
'- Look at the FORM FIELDS section of the DOM. COUNT the required-looking input fields (username, email, password, name, etc).',
|
|
176
|
+
'- Your BATCH actions array MUST contain ONE TYPE action for EACH of those fields. Never skip one.',
|
|
177
|
+
'- If FORM FIELDS shows id="username" AND id="password", your BATCH MUST have 2 TYPE actions - one for #username AND one for #password. Never just one.',
|
|
178
|
+
'- BEFORE submitting your JSON: re-read your actions array. Count the TYPE actions. Does that count match the number of required fields? If not, ADD the missing ones before responding.',
|
|
179
|
+
'- This is the #1 source of test failures. Take an extra second to double-check completeness.',
|
|
180
|
+
'- Example: form has username + password. WRONG: BATCH with only [TYPE password]. CORRECT: BATCH with [TYPE #username, TYPE #password].',
|
|
181
|
+
'',
|
|
182
|
+
'Actions: CLICK, TYPE, SELECT, PRESS, SCROLL, NAVIGATE, WAIT, HOVER, CLICK_AT, OBSERVE, STOP, BATCH',
|
|
183
|
+
'',
|
|
184
|
+
'CLICK: target = JUST the visible text or label e.g. "Login" or "Submit". Do NOT use "button:Login" or similar prefixed format - just the text itself.',
|
|
185
|
+
'CLICK_AT: target = x,y coords from pos:(x,y) in DOM. Use when CLICK fails.',
|
|
186
|
+
'TYPE: target = id selector from FORM FIELDS section. value = text',
|
|
187
|
+
'SELECT: target = selector. value = option text',
|
|
188
|
+
'PRESS: value = key e.g. Enter, Tab',
|
|
189
|
+
'SCROLL: value = down or up',
|
|
190
|
+
'NAVIGATE: value = full URL',
|
|
191
|
+
'WAIT: value = ms max 5000',
|
|
192
|
+
'HOVER: hover over element to reveal tooltip',
|
|
193
|
+
'OBSERVE: read and record page content, error messages, modal text',
|
|
194
|
+
'STOP: goal complete or stuck',
|
|
195
|
+
'',
|
|
196
|
+
'Critical rules:',
|
|
197
|
+
'- DOM shows TARGET: "#fieldid" for inputs - use those EXACT selectors for TYPE/SELECT',
|
|
198
|
+
'- DOM marks WARNING-STYLED elements - target those for error icons',
|
|
199
|
+
'- Every element shows pos:(x,y) - use CLICK_AT with coords if CLICK fails',
|
|
200
|
+
'- After clicking icons always OBSERVE next to read tooltips/modals',
|
|
201
|
+
'- When verifying error text, OBSERVE and quote EXACT text in observation',
|
|
202
|
+
'- Always use credentials when login forms appear',
|
|
203
|
+
'- Set goalAchieved true only when fully complete',
|
|
204
|
+
'- USE BATCH for forms - it is dramatically faster than one-at-a-time',
|
|
205
|
+
'',
|
|
206
|
+
'FLAGGING ISSUES VS GOAL ACHIEVEMENT (very important - read carefully):',
|
|
207
|
+
'- The GOAL is what the user asked you to test. Set goalAchieved=true ONLY if the specific goal succeeded as described.',
|
|
208
|
+
'- ISSUES are problems you NOTICE while pursuing the goal - they are recorded as observations even if unrelated to the goal.',
|
|
209
|
+
'',
|
|
210
|
+
'How to decide goalAchieved (READ CAREFULLY):',
|
|
211
|
+
'- Read the goal LITERALLY. If the goal says "verify successful login" and login fails → goalAchieved=FALSE. Do not assume the user wanted the opposite.',
|
|
212
|
+
'- Goal "log in and verify dashboard loads" → did login succeed and a dashboard appear? Yes → goalAchieved=true. Unrelated bugs on dashboard do NOT fail the goal.',
|
|
213
|
+
'- Goal "verify successful login with username X password Y" → did the user actually log in successfully? If you see an error message or stay on the login page, login FAILED → goalAchieved=FALSE.',
|
|
214
|
+
'- Goal "verify all images load successfully" → are the images actually loading? No → goalAchieved=false.',
|
|
215
|
+
'- Goal "verify error appears for invalid credentials" → did the error appear? Yes → goalAchieved=true (the goal was to see the error).',
|
|
216
|
+
'- Rule: only the SPECIFIC THING the goal asks about determines goalAchieved. The actual outcome must match what the goal REQUIRES.',
|
|
217
|
+
'- DO NOT mark goalAchieved=true just because you completed your investigation. Mark it true only when the GOAL\'s required outcome was actually observed.',
|
|
218
|
+
'',
|
|
219
|
+
'When to flag issues:',
|
|
220
|
+
'- Flag any problem you notice during testing: broken images, 404s, console errors, JS errors, broken links, layout problems, slow loads, error messages, accessibility issues, etc.',
|
|
221
|
+
'- Each issue: { "title": "Short clear title", "description": "What is wrong and where", "severity": "low" | "medium" | "high" | "critical", "type": "bug" | "ux" | "performance" | "accessibility", "step": <current step number>, "url": "current page URL" }',
|
|
222
|
+
'- Severity: "critical" for blocking core functionality (login broken, data loss, security). "high" for major bugs affecting key flows. "medium" for broken images, 404 resources, console errors. "low" for minor styling/polish.',
|
|
223
|
+
'- CRITICAL: Flag each unique issue ONCE total across all steps - in the step where you first noticed it. Do not re-flag the same issue in subsequent steps.',
|
|
224
|
+
'- "The same issue" means the same root cause - "Login failed with invalid credentials" and "Login failed with incorrect credentials" are the SAME issue, only flag once.',
|
|
225
|
+
'- Look at issues already flagged in previous steps in this conversation. If the issue is already in the previous step\'s issues array, do NOT flag it again.',
|
|
226
|
+
'- Do NOT flag normal app behaviour as issues (e.g. cookie warnings, expected validation errors during a form test).',
|
|
227
|
+
'',
|
|
228
|
+
'Examples:',
|
|
229
|
+
'- Goal "verify images load" + images are broken → goalAchieved=false (the goal target failed) + flag issue.',
|
|
230
|
+
'- Goal "log in" + login works but dashboard has SQL error → goalAchieved=true (login worked) + flag the SQL error as a separate issue.',
|
|
231
|
+
'- Goal "log in" + login fails → goalAchieved=false + flag issue describing the login failure.',
|
|
232
|
+
'- Goal "checkout flow" + checkout completes but image broken on receipt page → goalAchieved=true + flag the image issue.',
|
|
233
|
+
'',
|
|
234
|
+
'',
|
|
235
|
+
'CRITICAL - GOAL DETECTION:',
|
|
236
|
+
'- After EVERY action, check the page for goal completion signals BEFORE deciding to act again',
|
|
237
|
+
'- The CURRENT URL is your strongest signal - if URL has changed from the login/start page, you have likely progressed',
|
|
238
|
+
'- For LOGIN goals: the goal is achieved as soon as you are NO LONGER on the login screen - check if dashboard, sidebar, navigation, or any post-login content is visible',
|
|
239
|
+
'- If you see success messages like "Order placed", "Welcome", "Successfully", "Thank you", "Confirmed", "Dashboard" - the goal is likely DONE',
|
|
240
|
+
'- If you see Order ID, confirmation number, success badges - the goal is likely DONE',
|
|
241
|
+
'- If the original form has been replaced by a success state - the goal is likely DONE',
|
|
242
|
+
'- When goal is done: respond with action OBSERVE, set goalAchieved: true, do NOT try more actions',
|
|
243
|
+
'- Do NOT keep batching after success - check the observation field of previous steps for completion',
|
|
244
|
+
'',
|
|
245
|
+
'IMPORTANT - IGNORE BACKGROUND NOISE:',
|
|
246
|
+
'- Cookie warnings, third-party storage notices, GDPR popups, and similar messages are NOT failure signals',
|
|
247
|
+
'- These messages can appear ALONGSIDE successful login - look for ACTUAL form/dashboard state changes',
|
|
248
|
+
'- If you see new content like a sidebar, dashboard panels, navigation menu, or different page structure than the start - login likely succeeded',
|
|
249
|
+
'- Compare CURRENT URL to ORIGINAL URL - if path has changed (even just adding /dashboard or similar), action succeeded',
|
|
250
|
+
'',
|
|
251
|
+
'NAVIGATION DISCOVERY - finding hidden menu items, categories, tree nodes:',
|
|
252
|
+
'- Sidebars, menus and category lists are often COLLAPSED by default. If you are looking for an item (e.g. "Dates", "Reports", "Settings") and you do not see it in the current snapshot, it may be hidden behind a parent that needs expanding.',
|
|
253
|
+
'- Signs of an expandable parent: a chevron icon (▸ ▶ ▼), an arrow class (caret/chevron/arrow/toggle/expand/collapse), aria-expanded="false", or a parent label that looks like a category header.',
|
|
254
|
+
'- The DOM snapshot marks these with hints like [expandable] or [collapsed] in the NAV section when present.',
|
|
255
|
+
'- Strategy: CLICK the parent first to expand it, then OBSERVE to see the new children, then CLICK the item you actually want.',
|
|
256
|
+
'- Example: Goal "open the Dates category" + you see a "Dashboard" item with a ▸ arrow but no "Dates" → first CLICK "Dashboard" (or the arrow) to expand, THEN you will see "Dates" appear, THEN CLICK it.',
|
|
257
|
+
'- Do NOT give up just because the item is not visible. Always try expanding plausible parent categories first.',
|
|
258
|
+
'- IMPORTANT: When a nav item shows selector:"..." in the snapshot, prefer that EXACT selector as your CLICK target instead of the text. Selectors are more reliable than text matching for custom widgets. e.g. CLICK target = "[pi-test-identifier=\"Dashboard.organisationList.organisation.1\"]".',
|
|
259
|
+
'',
|
|
260
|
+
'STUCK DETECTION - what to do when an action is not progressing:',
|
|
261
|
+
'- If your last 2 steps tried the same action or the page state has not changed, you are stuck.',
|
|
262
|
+
'- Things to try when stuck: SCROLL down to see more content, CLICK any expandable parents to reveal hidden options, HOVER over icons to reveal tooltips, or OBSERVE to re-read the page.',
|
|
263
|
+
'- If the goal references a specific element that does not appear in the DOM, try expanding sidebars, scrolling, or clicking category headers BEFORE assuming the test cannot be completed.',
|
|
264
|
+
];
|
|
265
|
+
return parts.join('\n');
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function buildUserPrompt({ goal, url, currentUrl, domSnapshot, stepNumber, previousSteps, credentials }) {
|
|
269
|
+
const credentialSection = buildCredentialSection(credentials);
|
|
270
|
+
const historySection = buildHistorySection(previousSteps);
|
|
271
|
+
const stuckHint = detectStuck(previousSteps);
|
|
272
|
+
return 'TESTING GOAL: ' + goal + '\nORIGINAL URL: ' + url + '\nCURRENT URL: ' + currentUrl + '\nSTEP: ' + stepNumber + '\n' + credentialSection + '\n' + historySection + stuckHint + '\n\nCURRENT PAGE STATE:\n' + domSnapshot + '\n\nDecide the next action. Respond ONLY with valid JSON. Use BATCH if multiple form fields can be filled.';
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Change 6: detect when the agent is repeating the same unproductive action.
|
|
276
|
+
// Surfaces a hint to break the loop (expand sidebars, scroll, click expandable parents).
|
|
277
|
+
function detectStuck(previousSteps) {
|
|
278
|
+
if (!previousSteps || previousSteps.length < 2) return '';
|
|
279
|
+
const last = previousSteps[previousSteps.length - 1];
|
|
280
|
+
const prev = previousSteps[previousSteps.length - 2];
|
|
281
|
+
|
|
282
|
+
// Repeated same action+target
|
|
283
|
+
if (last && prev && last.action === prev.action && (last.target || '') === (prev.target || '')) {
|
|
284
|
+
return '\n\n⚠ STUCK SIGNAL: your last 2 actions were identical (' + last.action + ' on ' + (last.target || '?') + '). Try a different approach: expand a sidebar item, SCROLL down, HOVER over icons, or look for a different selector.';
|
|
285
|
+
}
|
|
286
|
+
// Multiple recent failures
|
|
287
|
+
const last3 = previousSteps.slice(-3);
|
|
288
|
+
const failures = last3.filter(s => s.success === false).length;
|
|
289
|
+
if (failures >= 2) {
|
|
290
|
+
return '\n\n⚠ STUCK SIGNAL: your last actions have been failing. Re-read the DOM snapshot carefully. If you are looking for an element that does not appear, try expanding parent categories (look for [expandable, COLLAPSED] in NAVIGATION) or scrolling.';
|
|
291
|
+
}
|
|
292
|
+
return '';
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function buildCredentialSection(credentials) {
|
|
296
|
+
if (!credentials || Object.keys(credentials).length === 0) return '';
|
|
297
|
+
const lines = ['\nAVAILABLE CREDENTIALS:'];
|
|
298
|
+
for (const [label, fields] of Object.entries(credentials)) {
|
|
299
|
+
lines.push(' [' + label + ']');
|
|
300
|
+
for (const [key, value] of Object.entries(fields)) {
|
|
301
|
+
lines.push(' ' + key + ': ' + value);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
return lines.join('\n');
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
function buildHistorySection(previousSteps) {
|
|
308
|
+
if (!previousSteps || previousSteps.length === 0) return '';
|
|
309
|
+
const lines = ['\nPREVIOUS STEPS:'];
|
|
310
|
+
previousSteps.forEach((s) => {
|
|
311
|
+
const status = s.success ? 'OK' : 'FAIL';
|
|
312
|
+
lines.push(' ' + status + ' Step ' + s.step + ': ' + s.action + ' -> ' + (s.target || s.value || '-'));
|
|
313
|
+
if (s.observation) lines.push(' obs: ' + s.observation);
|
|
314
|
+
});
|
|
315
|
+
return lines.join('\n');
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Attempts to repair truncated/malformed JSON from LLMs.
|
|
319
|
+
// Common patterns: missing closing ] for arrays, missing closing }, unterminated strings,
|
|
320
|
+
// trailing commas. Returns parsed object on success, null on failure.
|
|
321
|
+
function tryRepairJSON(jsonStr) {
|
|
322
|
+
if (!jsonStr || typeof jsonStr !== 'string') return null;
|
|
323
|
+
let s = jsonStr;
|
|
324
|
+
|
|
325
|
+
// Helper: strip trailing whitespace and commas
|
|
326
|
+
const trimRight = () => {
|
|
327
|
+
while (s.length > 0) {
|
|
328
|
+
const c = s[s.length - 1];
|
|
329
|
+
if (c === ' ' || c === '\n' || c === '\r' || c === '\t' || c === ',') {
|
|
330
|
+
s = s.slice(0, -1);
|
|
331
|
+
continue;
|
|
332
|
+
}
|
|
333
|
+
break;
|
|
334
|
+
}
|
|
335
|
+
};
|
|
336
|
+
|
|
337
|
+
// Helper: count bracket depth at each position
|
|
338
|
+
const countDepths = (str) => {
|
|
339
|
+
let curly = 0, square = 0;
|
|
340
|
+
let inStr = false, esc = false;
|
|
341
|
+
for (let i = 0; i < str.length; i++) {
|
|
342
|
+
const c = str[i];
|
|
343
|
+
if (esc) { esc = false; continue; }
|
|
344
|
+
if (c === '\\') { esc = true; continue; }
|
|
345
|
+
if (c === '"') { inStr = !inStr; continue; }
|
|
346
|
+
if (inStr) continue;
|
|
347
|
+
if (c === '{') curly++;
|
|
348
|
+
else if (c === '}') curly--;
|
|
349
|
+
else if (c === '[') square++;
|
|
350
|
+
else if (c === ']') square--;
|
|
351
|
+
}
|
|
352
|
+
return { curly, square };
|
|
353
|
+
};
|
|
354
|
+
|
|
355
|
+
// Helper: are we currently inside an unterminated string?
|
|
356
|
+
const inUnterminatedString = (str) => {
|
|
357
|
+
let inStr = false, esc = false;
|
|
358
|
+
for (let i = 0; i < str.length; i++) {
|
|
359
|
+
const c = str[i];
|
|
360
|
+
if (esc) { esc = false; continue; }
|
|
361
|
+
if (c === '\\') { esc = true; continue; }
|
|
362
|
+
if (c === '"') inStr = !inStr;
|
|
363
|
+
}
|
|
364
|
+
return inStr;
|
|
365
|
+
};
|
|
366
|
+
|
|
367
|
+
// Attempt 1: Simple repair (close unterminated string, balance brackets)
|
|
368
|
+
trimRight();
|
|
369
|
+
if (inUnterminatedString(s)) s += '"';
|
|
370
|
+
trimRight();
|
|
371
|
+
let attempt = s;
|
|
372
|
+
let d = countDepths(attempt);
|
|
373
|
+
while (d.square > 0) { attempt += ']'; d.square--; }
|
|
374
|
+
while (d.curly > 0) { attempt += '}'; d.curly--; }
|
|
375
|
+
try { return JSON.parse(attempt); } catch {}
|
|
376
|
+
|
|
377
|
+
// Attempt 2: Strip incomplete trailing object inside array.
|
|
378
|
+
// Walk back from end, find the last position where we're at a 'safe' depth
|
|
379
|
+
// (i.e., between array elements with no unterminated string).
|
|
380
|
+
// Then close from there.
|
|
381
|
+
s = jsonStr;
|
|
382
|
+
for (let pos = s.length; pos > 0; pos--) {
|
|
383
|
+
const slice = s.slice(0, pos);
|
|
384
|
+
if (inUnterminatedString(slice)) continue;
|
|
385
|
+
// Look back past whitespace and commas
|
|
386
|
+
let trimmed = slice;
|
|
387
|
+
while (trimmed.length > 0) {
|
|
388
|
+
const c = trimmed[trimmed.length - 1];
|
|
389
|
+
if (c === ' ' || c === '\n' || c === '\r' || c === '\t' || c === ',') {
|
|
390
|
+
trimmed = trimmed.slice(0, -1);
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
break;
|
|
394
|
+
}
|
|
395
|
+
if (trimmed.length === 0) continue;
|
|
396
|
+
const last = trimmed[trimmed.length - 1];
|
|
397
|
+
// Safe positions to truncate at: after } (end of object), ] (end of array),
|
|
398
|
+
// a digit or a quote (end of value)
|
|
399
|
+
if (last !== '}' && last !== ']' && last !== '"' && !/[0-9truefalsn]/.test(last)) continue;
|
|
400
|
+
let candidate = trimmed;
|
|
401
|
+
const dd = countDepths(candidate);
|
|
402
|
+
while (dd.square > 0) { candidate += ']'; dd.square--; }
|
|
403
|
+
while (dd.curly > 0) { candidate += '}'; dd.curly--; }
|
|
404
|
+
try { return JSON.parse(candidate); } catch { continue; }
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
return null;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function parseDecision(rawText) {
|
|
411
|
+
if (!rawText) throw new Error('Empty response from LLM');
|
|
412
|
+
|
|
413
|
+
let cleaned = rawText.trim();
|
|
414
|
+
cleaned = cleaned.replace(/^```json\s*/i, '').replace(/^```\s*/i, '').replace(/```\s*$/i, '').trim();
|
|
415
|
+
|
|
416
|
+
const start = cleaned.indexOf('{');
|
|
417
|
+
if (start === -1) throw new Error('LLM response is not JSON: ' + cleaned.slice(0, 200));
|
|
418
|
+
|
|
419
|
+
const end = cleaned.lastIndexOf('}');
|
|
420
|
+
// Take everything from { onwards. If there's a closing }, use that. If not, work with what we have.
|
|
421
|
+
let jsonStr = end === -1 ? cleaned.slice(start) : cleaned.slice(start, end + 1);
|
|
422
|
+
|
|
423
|
+
let parsed = null;
|
|
424
|
+
try {
|
|
425
|
+
parsed = JSON.parse(jsonStr);
|
|
426
|
+
} catch (err) {
|
|
427
|
+
// Auto-repair common LLM truncation patterns:
|
|
428
|
+
// 1) Truncated mid-array: close unclosed [
|
|
429
|
+
// 2) Truncated mid-string: close unterminated "
|
|
430
|
+
// 3) Truncated mid-object: close unclosed {
|
|
431
|
+
// 4) Trailing comma before close
|
|
432
|
+
parsed = tryRepairJSON(jsonStr);
|
|
433
|
+
if (!parsed) {
|
|
434
|
+
throw new Error('Failed to parse JSON: ' + err.message);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
const validActions = ['CLICK', 'TYPE', 'SELECT', 'PRESS', 'SCROLL', 'NAVIGATE', 'WAIT', 'HOVER', 'CLICK_AT', 'STOP', 'OBSERVE', 'BATCH'];
|
|
439
|
+
let action = (parsed.action || 'OBSERVE').toUpperCase();
|
|
440
|
+
|
|
441
|
+
if (!validActions.includes(action)) {
|
|
442
|
+
const actionMap = {
|
|
443
|
+
'INSPECT': 'OBSERVE', 'READ': 'OBSERVE', 'VERIFY': 'OBSERVE', 'CHECK': 'OBSERVE',
|
|
444
|
+
'ASSERT': 'OBSERVE', 'VALIDATE': 'OBSERVE', 'FIND': 'OBSERVE',
|
|
445
|
+
'TAP': 'CLICK', 'PRESS_BUTTON': 'CLICK', 'SUBMIT': 'CLICK',
|
|
446
|
+
'ENTER': 'TYPE', 'INPUT': 'TYPE', 'FILL': 'TYPE',
|
|
447
|
+
};
|
|
448
|
+
const prefix = Object.keys(actionMap).find(k => action.startsWith(k));
|
|
449
|
+
action = prefix ? actionMap[prefix] : 'OBSERVE';
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
const result = {
|
|
453
|
+
action,
|
|
454
|
+
target: parsed.target || null,
|
|
455
|
+
value: parsed.value || null,
|
|
456
|
+
reasoning: parsed.reasoning || '',
|
|
457
|
+
observation: parsed.observation || '',
|
|
458
|
+
confidence: Math.min(10, Math.max(0, parseInt(parsed.confidence) || 5)),
|
|
459
|
+
goalAchieved: Boolean(parsed.goalAchieved),
|
|
460
|
+
issues: Array.isArray(parsed.issues) ? parsed.issues : [],
|
|
461
|
+
};
|
|
462
|
+
|
|
463
|
+
// For BATCH, validate the actions array
|
|
464
|
+
if (action === 'BATCH') {
|
|
465
|
+
if (!Array.isArray(parsed.actions) || parsed.actions.length === 0) {
|
|
466
|
+
// Fall back to OBSERVE if BATCH has no actions
|
|
467
|
+
result.action = 'OBSERVE';
|
|
468
|
+
} else {
|
|
469
|
+
result.actions = parsed.actions.slice(0, 8).map(a => ({
|
|
470
|
+
action: (a.action || '').toUpperCase(),
|
|
471
|
+
target: a.target || null,
|
|
472
|
+
value: a.value || null,
|
|
473
|
+
}));
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return result;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// ─── Recording processing ─────────────────────────────────────────────────────
|
|
481
|
+
// Takes raw recorded steps and runs three LLM jobs:
|
|
482
|
+
// 1. Selector stabilisation - rewrite brittle selectors to resilient ones
|
|
483
|
+
// 2. Step descriptions - generate human-readable labels for each step
|
|
484
|
+
// 3. Playwright code generation - output clean JS and TS test files
|
|
485
|
+
//
|
|
486
|
+
// Returns: { steps: [...enriched steps], playwrightJs: '...', playwrightTs: '...' }
|
|
487
|
+
|
|
488
|
+
export async function processRecording({ steps, testName, url, provider, apiKey, isReusable, setupSteps, setupName }) {
|
|
489
|
+
// Set the API key in env if provided
|
|
490
|
+
if (apiKey && provider === 'gemini') process.env.GEMINI_API_KEY = apiKey;
|
|
491
|
+
if (apiKey && provider === 'openai') process.env.OPENAI_API_KEY = apiKey;
|
|
492
|
+
if (apiKey && provider === 'claude') process.env.CLAUDE_API_KEY = apiKey;
|
|
493
|
+
|
|
494
|
+
const llm = new LLMRouter(provider || 'gemini');
|
|
495
|
+
await llm.verify();
|
|
496
|
+
|
|
497
|
+
// Build a compact representation of steps for the LLM
|
|
498
|
+
const stepsContext = steps.map((s, i) => ({
|
|
499
|
+
index: i + 1,
|
|
500
|
+
id: s.id,
|
|
501
|
+
action: s.action,
|
|
502
|
+
assertType: s.assertType || null,
|
|
503
|
+
selector: s.selector,
|
|
504
|
+
element: s.element ? {
|
|
505
|
+
tag: s.element.tag,
|
|
506
|
+
id: s.element.id,
|
|
507
|
+
name: s.element.name,
|
|
508
|
+
type: s.element.type,
|
|
509
|
+
text: s.element.text,
|
|
510
|
+
classes: s.element.classes,
|
|
511
|
+
} : null,
|
|
512
|
+
value: s.action === 'type' && s.isPassword ? '[password - use process.env.TEST_PASSWORD]' : (s.value || null),
|
|
513
|
+
isPassword: s.isPassword || false,
|
|
514
|
+
label: s.label || null,
|
|
515
|
+
description: s.description || null,
|
|
516
|
+
url: s.url,
|
|
517
|
+
}));
|
|
518
|
+
|
|
519
|
+
const setupContext = setupSteps && setupSteps.length ? setupSteps.map((s, i) => ({
|
|
520
|
+
index: i + 1,
|
|
521
|
+
id: s.id,
|
|
522
|
+
action: s.action,
|
|
523
|
+
assertType: s.assertType || null,
|
|
524
|
+
selector: s.selector,
|
|
525
|
+
element: s.element ? { tag: s.element.tag, id: s.element.id, name: s.element.name, type: s.element.type, text: s.element.text } : null,
|
|
526
|
+
value: s.action === 'type' && s.isPassword ? '[password - use process.env.TEST_PASSWORD]' : (s.value || null),
|
|
527
|
+
isPassword: s.isPassword || false,
|
|
528
|
+
description: s.description || null,
|
|
529
|
+
url: s.url,
|
|
530
|
+
})) : null;
|
|
531
|
+
|
|
532
|
+
const setupSection = setupContext
|
|
533
|
+
? '\nSetup steps (run before the test):\n' + JSON.stringify(setupContext, null, 2)
|
|
534
|
+
: '';
|
|
535
|
+
const reusableNote = isReusable ? 'This test is marked as REUSABLE — generate it as an exported async helper function, not a test() block.' : '';
|
|
536
|
+
const setupNote = setupContext ? 'This test uses a setup sequence ("' + (setupName||'setup') + '") that runs first. Extract setup steps into a shared helper function.' : '';
|
|
537
|
+
|
|
538
|
+
const prompt = 'You are a Playwright test engineer. You have been given a list of recorded browser actions.\n\n'
|
|
539
|
+
+ 'Test name: "' + (testName || 'Recorded test') + '"\n'
|
|
540
|
+
+ 'Start URL: "' + url + '"\n'
|
|
541
|
+
+ (reusableNote ? reusableNote + '\n' : '')
|
|
542
|
+
+ (setupNote ? setupNote + '\n' : '')
|
|
543
|
+
+ '\nRecorded steps:\n'
|
|
544
|
+
+ JSON.stringify(stepsContext, null, 2)
|
|
545
|
+
+ setupSection
|
|
546
|
+
+ '\n\nYour jobs:\n'
|
|
547
|
+
+ '1. For each step, write a STABLE SELECTOR. Priority order:\n'
|
|
548
|
+
+ ' - data-testid, data-test, data-cy, data-qa, pi-test-identifier attributes use [attr="value"]\n'
|
|
549
|
+
+ ' - Unique meaningful id (NOT random/generated IDs) use #id\n'
|
|
550
|
+
+ ' - Semantic selector e.g. button:has-text("Login"), input[name="email"]\n'
|
|
551
|
+
+ ' - Role + text e.g. [role="button"]:has-text("Submit")\n'
|
|
552
|
+
+ ' - Class-based selector for well-named classes e.g. .chart-container, .save-btn\n'
|
|
553
|
+
+ ' - Fall back to the original selector if nothing better\n'
|
|
554
|
+
+ ' CRITICAL RULES FOR SELECTORS:\n'
|
|
555
|
+
+ ' - NEVER use IDs that look randomly generated (e.g. #highcharts-abc123-58, #ng-view-1, anything with random hex/numbers)\n'
|
|
556
|
+
+ ' - NEVER use :nth-child or :nth-of-type positional selectors\n'
|
|
557
|
+
+ ' - For chart/visualization containers: use class-based selectors like .highcharts-container, .chart-wrapper, [class*="chart"]\n'
|
|
558
|
+
+ ' - CRITICAL: pi-test-identifier values ending in a long number are DYNAMIC (e.g. "ChartColumn.operandOne.sort.option.desc.885249556") — strip the number and use *= contains: [pi-test-identifier*="ChartColumn.operandOne.sort.option.desc"] NOT exact =\n'
|
|
559
|
+
+ ' - Any attribute value ending in 5+ digits is almost certainly a dynamic runtime ID — use *= instead of =\n'
|
|
560
|
+
+ ' - For assert steps especially: make sure the selector will match on every run, not just once\n\n'
|
|
561
|
+
+ '2. For each step, write a SHORT human-readable description (max 10 words). Examples:\n'
|
|
562
|
+
+ ' - "Click the Login button"\n'
|
|
563
|
+
+ ' - "Type username into email field"\n'
|
|
564
|
+
+ ' - "Select Admin from role dropdown"\n'
|
|
565
|
+
+ ' - "Navigate to dashboard"\n\n'
|
|
566
|
+
+ '3. Generate clean Playwright code in JAVASCRIPT. Requirements:\n'
|
|
567
|
+
+ ' - Import from @playwright/test\n'
|
|
568
|
+
+ ' - At the top define: const BASE_URL = process.env.BASE_URL || "<the recorded start url>";\n'
|
|
569
|
+
+ ' - Use BASE_URL for all navigation (replace the recorded hostname with BASE_URL)\n'
|
|
570
|
+
+ ' - If this test is REUSABLE: generate export async function <camelCaseName>(page) { ... } with NO test() block.\n'
|
|
571
|
+
+ ' - If this test has a SETUP: extract setup steps into export async function <setupName>(page) { ... } at the top, then call await <setupName>(page) as the first line inside the test() block.\n'
|
|
572
|
+
+ ' - If neither reusable nor has setup: single test() block with a descriptive name matching the test\n'
|
|
573
|
+
+ ' - Use the stable selectors you generated\n'
|
|
574
|
+
+ ' - For type actions: use await page.locator(selector).pressSequentially("value", { delay: 50 }) NOT page.fill()\n'
|
|
575
|
+
+ ' After pressSequentially, always fire blur/change events:\n'
|
|
576
|
+
+ ' await page.locator(selector).evaluate(el => { el.dispatchEvent(new Event("blur",{bubbles:true})); el.dispatchEvent(new Event("change",{bubbles:true})); el.dispatchEvent(new Event("focusout",{bubbles:true})); });\n'
|
|
577
|
+
+ ' - Use await page.click() for clicks\n'
|
|
578
|
+
+ ' - Use await page.selectOption() for selects\n'
|
|
579
|
+
+ ' - For check actions: use await page.check(selector) when checked=true, await page.uncheck(selector) when checked=false\n'
|
|
580
|
+
+ ' - For scroll actions: if selector is window use await page.evaluate(() => window.scrollTo(x, y)), otherwise use document.querySelector(sel).scrollTo(x, y)\n'
|
|
581
|
+
+ ' - Add await page.waitForLoadState("networkidle") after navigation actions\n'
|
|
582
|
+
+ ' - For assert steps: generate correct Playwright expect() calls:\n'
|
|
583
|
+
+ ' visible -> await expect(page.locator(selector)).toBeVisible();\n'
|
|
584
|
+
+ ' text_contains -> await expect(page.locator(selector)).toContainText("value");\n'
|
|
585
|
+
+ ' text_equals -> await expect(page.locator(selector)).toHaveText("value");\n'
|
|
586
|
+
+ ' url_contains -> await expect(page).toHaveURL(/value/);\n'
|
|
587
|
+
+ ' element_count -> await expect(page.locator(selector)).toHaveCount(N);\n'
|
|
588
|
+
+ ' attribute_contains -> await expect(page.locator(selector)).toHaveAttribute("title", /value/); (use the actual attribute name from step.attribute)\n'
|
|
589
|
+
+ ' - For password fields (isPassword: true) use: process.env.TEST_PASSWORD\n'
|
|
590
|
+
+ ' - Add comments above each logical section\n\n'
|
|
591
|
+
+ '4. Generate the same test in TYPESCRIPT. Same requirements, add proper types where helpful.\n\n'
|
|
592
|
+
+ 'Respond ONLY with valid JSON in this exact structure:\n'
|
|
593
|
+
+ '{\n'
|
|
594
|
+
+ ' "steps": [{ "id": "step-001", "stableSelector": "...", "description": "..." }],\n'
|
|
595
|
+
+ ' "playwrightJs": "// full javascript test code here",\n'
|
|
596
|
+
+ ' "playwrightTs": "// full typescript test code here"\n'
|
|
597
|
+
+ '}';
|
|
598
|
+
|
|
599
|
+
let raw;
|
|
600
|
+
try {
|
|
601
|
+
switch (llm.provider) {
|
|
602
|
+
case 'gemini': raw = await llm._callGemini('You are a Playwright test engineer. Always respond with valid JSON only, no markdown.', prompt); break;
|
|
603
|
+
case 'openai': raw = await llm._callOpenAI('You are a Playwright test engineer. Always respond with valid JSON only, no markdown.', prompt); break;
|
|
604
|
+
case 'ollama': raw = await llm._callOllama('You are a Playwright test engineer. Always respond with valid JSON only, no markdown.', prompt); break;
|
|
605
|
+
default: throw new Error('Unknown provider: ' + llm.provider);
|
|
606
|
+
}
|
|
607
|
+
} catch (err) {
|
|
608
|
+
throw new Error('LLM call failed: ' + err.message);
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
// Parse the response
|
|
612
|
+
let parsed;
|
|
613
|
+
try {
|
|
614
|
+
let cleaned = raw.trim().replace(/^```json\s*/i, '').replace(/^```\s*/i, '').replace(/```\s*$/i, '').trim();
|
|
615
|
+
const start = cleaned.indexOf('{');
|
|
616
|
+
const end = cleaned.lastIndexOf('}');
|
|
617
|
+
if (start === -1 || end === -1) throw new Error('No JSON found');
|
|
618
|
+
parsed = JSON.parse(cleaned.slice(start, end + 1));
|
|
619
|
+
} catch {
|
|
620
|
+
// Try repair
|
|
621
|
+
const repaired = tryRepairJSON(raw);
|
|
622
|
+
if (!repaired) throw new Error('Failed to parse LLM response as JSON');
|
|
623
|
+
parsed = repaired;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// Merge stable selectors and descriptions back into steps
|
|
627
|
+
const stepMap = {};
|
|
628
|
+
(parsed.steps || []).forEach(s => { stepMap[s.id] = s; });
|
|
629
|
+
|
|
630
|
+
const enrichedSteps = steps.map(s => ({
|
|
631
|
+
...s,
|
|
632
|
+
stableSelector: stepMap[s.id]?.stableSelector || s.selector,
|
|
633
|
+
description: stepMap[s.id]?.description || s.action + ' ' + (s.selector || ''),
|
|
634
|
+
}));
|
|
635
|
+
|
|
636
|
+
return {
|
|
637
|
+
steps: enrichedSteps,
|
|
638
|
+
playwrightJs: parsed.playwrightJs || '// Code generation failed',
|
|
639
|
+
playwrightTs: parsed.playwrightTs || '// Code generation failed',
|
|
640
|
+
};
|
|
641
|
+
}
|