qazen-cli 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -114,6 +114,8 @@ export class VisionNavigator {
114
114
  return;
115
115
  }
116
116
  this.visitedUrls.add(normalizedFinal);
117
+ // Mark original URL as visited too — prevents re-queuing via redirect.
118
+ this.visitedUrls.add(normalizedUrl);
117
119
  const screenshot = await this.takeScreenshot(url);
118
120
  const analysis = await this.analyzeScreenshot(screenshot, url, context, onEvent);
119
121
  if (!analysis)
@@ -265,13 +267,9 @@ Return ONLY the JSON object. No markdown, no explanation.`,
265
267
  return null;
266
268
  }
267
269
  }
268
- async clickElement(element, pageMap, onEvent) {
269
- onEvent({
270
- type: "action",
271
- message: `Clicking "${element.description}" (${element.visualLocation || element.elementType})`,
272
- });
273
- const urlBefore = this.page.url();
270
+ async getElementCoordinates(description, visualLocation) {
274
271
  const screenshot = await this.takeScreenshot("click-target");
272
+ const hint = visualLocation ? ` (${visualLocation})` : "";
275
273
  const coordResponse = await this.anthropic.messages.create({
276
274
  model: MODEL,
277
275
  max_tokens: 200,
@@ -285,7 +283,7 @@ Return ONLY the JSON object. No markdown, no explanation.`,
285
283
  },
286
284
  {
287
285
  type: "text",
288
- text: `Find "${element.description}" on screen.
286
+ text: `Find "${description}"${hint} on screen.
289
287
  Return ONLY JSON: {"x": number, "y": number, "found": boolean}
290
288
  x and y are pixel coordinates (image is 1280x720).
291
289
  If not found, return {"x": 0, "y": 0, "found": false}`,
@@ -295,24 +293,50 @@ If not found, return {"x": 0, "y": 0, "found": false}`,
295
293
  ],
296
294
  });
297
295
  const coordText = firstTextBlock(coordResponse.content);
298
- let coords;
299
296
  try {
300
- coords = JSON.parse(stripCodeFences(coordText));
297
+ return JSON.parse(stripCodeFences(coordText));
301
298
  }
302
299
  catch {
303
300
  return null;
304
301
  }
305
- if (!coords.found)
302
+ }
303
+ async clickElement(element, pageMap, onEvent) {
304
+ onEvent({
305
+ type: "action",
306
+ message: `Clicking "${element.description}"`,
307
+ });
308
+ const coords = await this.getElementCoordinates(element.description, element.visualLocation);
309
+ if (!coords?.found)
306
310
  return null;
311
+ // Set up navigation listener BEFORE clicking so we catch the redirect.
312
+ let navigationUrl = null;
313
+ const navigationPromise = this.page.waitForNavigation({
314
+ timeout: 3000,
315
+ waitUntil: "domcontentloaded",
316
+ })
317
+ .then(() => {
318
+ navigationUrl = this.page.url();
319
+ })
320
+ .catch(() => {
321
+ /* no navigation — fine */
322
+ });
307
323
  await this.page.mouse.click(coords.x, coords.y);
308
324
  this.totalActions++;
309
- pageMap.actions_taken.push(`Clicked "${element.description}" at (${coords.x}, ${coords.y})`);
310
- await this.page.waitForTimeout(2000);
311
- const urlAfter = this.page.url();
312
- if (urlAfter !== urlBefore) {
313
- const newScreenshot = await this.takeScreenshot(urlAfter);
314
- this.screenshots.push(newScreenshot);
315
- return urlAfter;
325
+ await Promise.race([navigationPromise, this.page.waitForTimeout(2000)]);
326
+ if (navigationUrl) {
327
+ const normalizedNew = this.normalizeUrl(navigationUrl);
328
+ // If we landed on an already-mapped page, go back and skip.
329
+ if (this.visitedUrls.has(normalizedNew)) {
330
+ onEvent({
331
+ type: "vision_analysis",
332
+ message: `Skipping ${normalizedNew} — already mapped`,
333
+ });
334
+ await this.page.goBack({ waitUntil: "domcontentloaded", timeout: 10000 }).catch(() => { });
335
+ await this.page.waitForTimeout(1000);
336
+ return null;
337
+ }
338
+ pageMap.actions_taken.push(`Navigated to ${navigationUrl}`);
339
+ return navigationUrl;
316
340
  }
317
341
  return null;
318
342
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "qazen-cli",
3
- "version": "0.1.8",
3
+ "version": "0.1.9",
4
4
  "description": "QAZen CLI — capture authenticated browser sessions for enterprise SSO testing",
5
5
  "license": "MIT",
6
6
  "author": "QAZen",