qazen-cli 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/visionNavigator.js +71 -19
- package/package.json +1 -1
|
@@ -104,6 +104,18 @@ export class VisionNavigator {
|
|
|
104
104
|
onEvent({ type: "error", message: `Redirected to ${currentHostname} — skipping` });
|
|
105
105
|
return;
|
|
106
106
|
}
|
|
107
|
+
// Post-goto dedup: if redirect landed on an already-mapped URL, skip.
|
|
108
|
+
const normalizedFinal = this.normalizeUrl(currentUrl);
|
|
109
|
+
if (normalizedFinal !== normalizedUrl && this.visitedUrls.has(normalizedFinal)) {
|
|
110
|
+
onEvent({
|
|
111
|
+
type: "vision_analysis",
|
|
112
|
+
message: `Skipping ${currentUrl} — already mapped`,
|
|
113
|
+
});
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
this.visitedUrls.add(normalizedFinal);
|
|
117
|
+
// Mark original URL as visited too — prevents re-queuing via redirect.
|
|
118
|
+
this.visitedUrls.add(normalizedUrl);
|
|
107
119
|
const screenshot = await this.takeScreenshot(url);
|
|
108
120
|
const analysis = await this.analyzeScreenshot(screenshot, url, context, onEvent);
|
|
109
121
|
if (!analysis)
|
|
@@ -122,8 +134,26 @@ export class VisionNavigator {
|
|
|
122
134
|
message: `${analysis.pageDescription} — ${analysis.elements.length} elements found`,
|
|
123
135
|
});
|
|
124
136
|
const highPriority = analysis.elements
|
|
125
|
-
.filter((e) =>
|
|
126
|
-
(e.
|
|
137
|
+
.filter((e) => {
|
|
138
|
+
if (e.priority !== "high")
|
|
139
|
+
return false;
|
|
140
|
+
if (!["link", "tab", "menu"].includes(e.elementType))
|
|
141
|
+
return false;
|
|
142
|
+
// Skip if this action URL is already visited
|
|
143
|
+
if (e.action && e.action.startsWith("http")) {
|
|
144
|
+
const normalized = this.normalizeUrl(e.action);
|
|
145
|
+
if (this.visitedUrls.has(normalized))
|
|
146
|
+
return false;
|
|
147
|
+
}
|
|
148
|
+
// Skip if element description indicates it's the current page
|
|
149
|
+
const desc = e.description.toLowerCase();
|
|
150
|
+
if (desc.includes("currently active") ||
|
|
151
|
+
desc.includes("current page") ||
|
|
152
|
+
desc.includes("(active)")) {
|
|
153
|
+
return false;
|
|
154
|
+
}
|
|
155
|
+
return true;
|
|
156
|
+
})
|
|
127
157
|
.slice(0, 4);
|
|
128
158
|
for (const element of highPriority) {
|
|
129
159
|
try {
|
|
@@ -237,13 +267,9 @@ Return ONLY the JSON object. No markdown, no explanation.`,
|
|
|
237
267
|
return null;
|
|
238
268
|
}
|
|
239
269
|
}
|
|
240
|
-
async
|
|
241
|
-
onEvent({
|
|
242
|
-
type: "action",
|
|
243
|
-
message: `Clicking "${element.description}" (${element.visualLocation || element.elementType})`,
|
|
244
|
-
});
|
|
245
|
-
const urlBefore = this.page.url();
|
|
270
|
+
async getElementCoordinates(description, visualLocation) {
|
|
246
271
|
const screenshot = await this.takeScreenshot("click-target");
|
|
272
|
+
const hint = visualLocation ? ` (${visualLocation})` : "";
|
|
247
273
|
const coordResponse = await this.anthropic.messages.create({
|
|
248
274
|
model: MODEL,
|
|
249
275
|
max_tokens: 200,
|
|
@@ -257,7 +283,7 @@ Return ONLY the JSON object. No markdown, no explanation.`,
|
|
|
257
283
|
},
|
|
258
284
|
{
|
|
259
285
|
type: "text",
|
|
260
|
-
text: `Find "${
|
|
286
|
+
text: `Find "${description}"${hint} on screen.
|
|
261
287
|
Return ONLY JSON: {"x": number, "y": number, "found": boolean}
|
|
262
288
|
x and y are pixel coordinates (image is 1280x720).
|
|
263
289
|
If not found, return {"x": 0, "y": 0, "found": false}`,
|
|
@@ -267,24 +293,50 @@ If not found, return {"x": 0, "y": 0, "found": false}`,
|
|
|
267
293
|
],
|
|
268
294
|
});
|
|
269
295
|
const coordText = firstTextBlock(coordResponse.content);
|
|
270
|
-
let coords;
|
|
271
296
|
try {
|
|
272
|
-
|
|
297
|
+
return JSON.parse(stripCodeFences(coordText));
|
|
273
298
|
}
|
|
274
299
|
catch {
|
|
275
300
|
return null;
|
|
276
301
|
}
|
|
277
|
-
|
|
302
|
+
}
|
|
303
|
+
async clickElement(element, pageMap, onEvent) {
|
|
304
|
+
onEvent({
|
|
305
|
+
type: "action",
|
|
306
|
+
message: `Clicking "${element.description}"`,
|
|
307
|
+
});
|
|
308
|
+
const coords = await this.getElementCoordinates(element.description, element.visualLocation);
|
|
309
|
+
if (!coords?.found)
|
|
278
310
|
return null;
|
|
311
|
+
// Set up navigation listener BEFORE clicking so we catch the redirect.
|
|
312
|
+
let navigationUrl = null;
|
|
313
|
+
const navigationPromise = this.page.waitForNavigation({
|
|
314
|
+
timeout: 3000,
|
|
315
|
+
waitUntil: "domcontentloaded",
|
|
316
|
+
})
|
|
317
|
+
.then(() => {
|
|
318
|
+
navigationUrl = this.page.url();
|
|
319
|
+
})
|
|
320
|
+
.catch(() => {
|
|
321
|
+
/* no navigation — fine */
|
|
322
|
+
});
|
|
279
323
|
await this.page.mouse.click(coords.x, coords.y);
|
|
280
324
|
this.totalActions++;
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
325
|
+
await Promise.race([navigationPromise, this.page.waitForTimeout(2000)]);
|
|
326
|
+
if (navigationUrl) {
|
|
327
|
+
const normalizedNew = this.normalizeUrl(navigationUrl);
|
|
328
|
+
// If we landed on an already-mapped page, go back and skip.
|
|
329
|
+
if (this.visitedUrls.has(normalizedNew)) {
|
|
330
|
+
onEvent({
|
|
331
|
+
type: "vision_analysis",
|
|
332
|
+
message: `Skipping ${normalizedNew} — already mapped`,
|
|
333
|
+
});
|
|
334
|
+
await this.page.goBack({ waitUntil: "domcontentloaded", timeout: 10000 }).catch(() => { });
|
|
335
|
+
await this.page.waitForTimeout(1000);
|
|
336
|
+
return null;
|
|
337
|
+
}
|
|
338
|
+
pageMap.actions_taken.push(`Navigated to ${navigationUrl}`);
|
|
339
|
+
return navigationUrl;
|
|
288
340
|
}
|
|
289
341
|
return null;
|
|
290
342
|
}
|