qazen-cli 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,9 +86,45 @@ export async function recordCommand(options) {
86
86
  console.error(chalk.red(err instanceof Error ? err.message : String(err)));
87
87
  process.exit(1);
88
88
  }
89
- console.log(chalk.green(`\n ✓ Session captured`));
90
- console.log(chalk.gray(` Cookies: ${result.cookieCount} (including HttpOnly)`));
91
- console.log(chalk.gray(` Domain: ${result.domain}`));
89
+ const targetHostname = (() => {
90
+ try {
91
+ return new URL(targetUrl).hostname.replace(/^www\./i, "").toLowerCase();
92
+ }
93
+ catch {
94
+ return "";
95
+ }
96
+ })();
97
+ const ss = result.storageState;
98
+ const cookies = ss.cookies ?? [];
99
+ const origins = ss.origins ?? [];
100
+ const cookiesByDomain = cookies.reduce((acc, c) => {
101
+ acc[c.domain] = (acc[c.domain] ?? 0) + 1;
102
+ return acc;
103
+ }, {});
104
+ const matchesTarget = (cookieDomain) => {
105
+ if (!targetHostname)
106
+ return false;
107
+ const norm = cookieDomain.replace(/^\./, "").toLowerCase();
108
+ return (norm === targetHostname ||
109
+ norm.endsWith("." + targetHostname) ||
110
+ targetHostname.endsWith("." + norm));
111
+ };
112
+ console.log(chalk.green("\n ✓ Session captured"));
113
+ console.log(chalk.gray(" Cookies by domain:"));
114
+ Object.entries(cookiesByDomain).forEach(([domain, count]) => {
115
+ const marker = matchesTarget(domain) ? chalk.green(" ✓ ") : chalk.gray(" · ");
116
+ console.log(marker + chalk.gray(`${domain}: ${count} cookies`));
117
+ });
118
+ if (origins.length > 0) {
119
+ console.log(chalk.gray(` localStorage: ${origins.length} origin(s)`));
120
+ }
121
+ const hasTargetCookies = cookies.some((c) => matchesTarget(c.domain));
122
+ if (!hasTargetCookies && targetHostname) {
123
+ console.log(chalk.yellow(`\n ⚠ No cookies found for ${targetHostname}`));
124
+ console.log(chalk.yellow(" The session may not work for authenticated crawling."));
125
+ console.log(chalk.gray(" Tip: Make sure you are fully logged into the app"));
126
+ console.log(chalk.gray(" (not just the SSO provider) before pressing Enter."));
127
+ }
92
128
  const uploadSpinner = ora("Uploading to QAZen...").start();
93
129
  try {
94
130
  await uploadSession(apiUrl, cliToken, project.id, result.storageState, result.domain, result.cookieCount);
@@ -0,0 +1,114 @@
1
+ import chalk from "chalk";
2
+ import ora from "ora";
3
+ import inquirer from "inquirer";
4
+ import Anthropic from "@anthropic-ai/sdk";
5
+ import { getConfig, isConfigured } from "../lib/config.js";
6
+ import { fetchProjects, uploadVisionDiscovery } from "../lib/api.js";
7
+ import { VisionNavigator } from "../lib/visionNavigator.js";
8
+ export async function visionScoutCommand(options) {
9
+ if (!isConfigured()) {
10
+ console.log(chalk.red("\n Not logged in. Run: qazen login --token <key>\n"));
11
+ process.exit(1);
12
+ }
13
+ const config = getConfig();
14
+ const anthropicKey = options.anthropicKey || process.env["ANTHROPIC_API_KEY"];
15
+ if (!anthropicKey) {
16
+ console.log(chalk.red("\n Anthropic API key required."));
17
+ console.log(chalk.gray(" Set ANTHROPIC_API_KEY env var or use --anthropic-key\n"));
18
+ process.exit(1);
19
+ }
20
+ const spinner = ora("Loading projects...").start();
21
+ const projects = await fetchProjects(config.apiUrl, config.cliToken);
22
+ spinner.stop();
23
+ if (projects.length === 0) {
24
+ console.log(chalk.red("\n No projects found in your workspace.\n"));
25
+ process.exit(1);
26
+ }
27
+ let project;
28
+ if (options.project) {
29
+ const found = projects.find((p) => p.name.toLowerCase().includes(options.project.toLowerCase()));
30
+ if (!found) {
31
+ console.log(chalk.red(`\n Project "${options.project}" not found\n`));
32
+ process.exit(1);
33
+ }
34
+ project = found;
35
+ }
36
+ else if (projects.length === 1) {
37
+ project = projects[0];
38
+ }
39
+ else {
40
+ const { projectId } = await inquirer.prompt([
41
+ {
42
+ type: "list",
43
+ name: "projectId",
44
+ message: "Select project:",
45
+ choices: projects.map((p) => ({
46
+ name: `${p.name} ${chalk.gray(p.url)}`,
47
+ value: p.id,
48
+ })),
49
+ },
50
+ ]);
51
+ project = projects.find((p) => p.id === projectId);
52
+ }
53
+ const targetUrl = options.url || project.url;
54
+ const maxPages = parseInt(options.maxPages, 10);
55
+ console.log("\n" + chalk.hex("#6366F1").bold(" QAZen Vision Scout"));
56
+ console.log(chalk.gray(" ─────────────────────────────────────"));
57
+ console.log(` ${chalk.gray("Project:")} ${chalk.white(project.name)}`);
58
+ console.log(` ${chalk.gray("URL:")} ${chalk.white(targetUrl)}`);
59
+ console.log(` ${chalk.gray("Max pages:")} ${chalk.white(maxPages)}`);
60
+ console.log(chalk.gray(" ─────────────────────────────────────"));
61
+ console.log(chalk.yellow("\n A browser window will open."));
62
+ console.log(chalk.white(" Claude Vision will navigate the app"));
63
+ console.log(chalk.white(" visually — like a human QA engineer.\n"));
64
+ const { confirm } = await inquirer.prompt([
65
+ {
66
+ type: "confirm",
67
+ name: "confirm",
68
+ message: "Start vision-based discovery?",
69
+ default: true,
70
+ },
71
+ ]);
72
+ if (!confirm) {
73
+ console.log(chalk.gray("\n Cancelled.\n"));
74
+ process.exit(0);
75
+ }
76
+ const anthropic = new Anthropic({ apiKey: anthropicKey });
77
+ const navigator = new VisionNavigator(anthropic, targetUrl, maxPages);
78
+ console.log("\n " + chalk.hex("#6366F1")("Starting Vision Scout...") + "\n");
79
+ try {
80
+ const result = await navigator.explore((event) => {
81
+ switch (event.type) {
82
+ case "page_start":
83
+ process.stdout.write(`\n ${chalk.gray("→")} ${chalk.white(event.url ?? "")}\n`);
84
+ break;
85
+ case "vision_analysis":
86
+ console.log(` ${chalk.gray("[Vision]")} ${event.message}`);
87
+ break;
88
+ case "action":
89
+ console.log(` ${chalk.hex("#6366F1")("[Click]")} ${event.message}`);
90
+ break;
91
+ case "page_mapped":
92
+ console.log(` ${chalk.green("✓")} ${chalk.gray("Mapped:")} ${event.message}`);
93
+ break;
94
+ case "error":
95
+ console.log(` ${chalk.red("✗")} ${event.message}`);
96
+ break;
97
+ }
98
+ });
99
+ const uploadSpinner = ora("Uploading discovery to QAZen...").start();
100
+ await uploadVisionDiscovery(config.apiUrl, config.cliToken, project.id, result);
101
+ uploadSpinner.succeed(chalk.green("Discovery uploaded to QAZen"));
102
+ console.log("\n" + chalk.hex("#6366F1").bold(" Vision Scout complete\n"));
103
+ console.log(chalk.gray(` Pages explored: ${chalk.white(result.pages.length)}`));
104
+ console.log(chalk.gray(` Elements found: ${chalk.white(result.totalElements)}`));
105
+ console.log(chalk.gray(` Actions taken: ${chalk.white(result.totalActions)}`));
106
+ console.log(chalk.gray(` Screenshots: ${chalk.white(result.screenshots.length)}`));
107
+ console.log(chalk.gray(`\n View in QAZen: ${config.apiUrl}/discovery\n`));
108
+ }
109
+ catch (err) {
110
+ console.log(chalk.red("\n ✗ Vision Scout failed"));
111
+ console.error(chalk.red(err instanceof Error ? err.message : String(err)));
112
+ process.exit(1);
113
+ }
114
+ }
package/dist/index.js CHANGED
@@ -4,6 +4,7 @@ import chalk from "chalk";
4
4
  import { loginCommand } from "./commands/login.js";
5
5
  import { recordCommand } from "./commands/record.js";
6
6
  import { statusCommand } from "./commands/status.js";
7
+ import { visionScoutCommand } from "./commands/visionScout.js";
7
8
  console.log(chalk.hex("#6366F1").bold("\n QAZen") + chalk.gray(" — Autonomous QA Platform\n"));
8
9
  program
9
10
  .name("qazen")
@@ -25,4 +26,12 @@ program
25
26
  .command("status")
26
27
  .description("Check CLI connection and list projects")
27
28
  .action(statusCommand);
29
+ program
30
+ .command("vision-scout")
31
+ .description("Visually explore an app using Claude Vision — works on any SPA or SSO-protected app")
32
+ .option("-p, --project <name>", "Project name")
33
+ .option("-u, --url <url>", "Override project URL")
34
+ .option("--max-pages <n>", "Max pages to explore", "20")
35
+ .option("--anthropic-key <key>", "Anthropic API key (or set ANTHROPIC_API_KEY)")
36
+ .action(visionScoutCommand);
28
37
  program.parse();
package/dist/lib/api.js CHANGED
@@ -32,3 +32,47 @@ export async function uploadSession(apiUrl, cliToken, projectId, storageState, d
32
32
  throw new Error(`Upload failed (HTTP ${res.status}): ${text}`);
33
33
  }
34
34
  }
35
+ export async function uploadVisionDiscovery(apiUrl, cliToken, projectId, result) {
36
+ const appMap = {
37
+ baseUrl: result.pages[0]?.url ?? "",
38
+ pages: result.pages.map((p) => ({
39
+ url: p.url,
40
+ title: p.title,
41
+ buttons: p.elements
42
+ .filter((e) => e.elementType === "button")
43
+ .map((e) => e.description),
44
+ forms: p.elements
45
+ .filter((e) => e.elementType === "input")
46
+ .map((e) => ({ name: e.description, type: "text", placeholder: e.action })),
47
+ headings: [p.visualDescription],
48
+ links: p.elements
49
+ .filter((e) => e.elementType === "link")
50
+ .map((e) => e.action),
51
+ visualDescription: p.visualDescription,
52
+ workflow: p.workflow,
53
+ actionsTaken: p.actions_taken,
54
+ })),
55
+ api_endpoints: [],
56
+ total_pages: result.pages.length,
57
+ total_elements: result.totalElements,
58
+ discoveryMethod: "vision",
59
+ primaryWorkflows: result.primaryWorkflows,
60
+ screenshots: result.screenshots.slice(0, 5),
61
+ };
62
+ const res = await fetch(`${apiUrl}/api/cli/projects/${projectId}/vision-discovery`, {
63
+ method: "POST",
64
+ headers: { "Content-Type": "application/json", ...authHeaders(cliToken) },
65
+ body: JSON.stringify({
66
+ appMap,
67
+ totalPages: result.pages.length,
68
+ totalElements: result.totalElements,
69
+ primaryWorkflows: result.primaryWorkflows,
70
+ }),
71
+ });
72
+ if (res.status === 401)
73
+ throw new Error("Invalid CLI token");
74
+ if (!res.ok) {
75
+ const text = await res.text().catch(() => "");
76
+ throw new Error(`Upload failed (HTTP ${res.status}): ${text}`);
77
+ }
78
+ }
@@ -1,11 +1,19 @@
1
1
  import { chromium } from "playwright";
2
2
  import * as readline from "node:readline";
3
+ import chalk from "chalk";
4
+ const MAX_DOMAIN_RETRIES = 3;
3
5
  /**
4
6
  * Launch a real, headed Chromium and let the user complete any
5
7
  * login flow (SSO, ADFS, Okta, MFA, …). When the user presses
6
8
  * Enter in the terminal we snapshot the full storage state via
7
9
  * CDP, which includes HttpOnly cookies that JavaScript cannot
8
10
  * read — this is the reason the CLI exists.
11
+ *
12
+ * Before capturing, we verify the browser is actually on the
13
+ * target app's hostname (or a subdomain). If the user pressed
14
+ * Enter while still on an SSO provider page (the most common
15
+ * recorder failure), we warn and wait again. After 3 attempts
16
+ * we capture anyway so the user can never be permanently stuck.
9
17
  */
10
18
  export async function recordSession(url, onReady) {
11
19
  const browser = await chromium.launch({
@@ -31,7 +39,14 @@ export async function recordSession(url, onReady) {
31
39
  // Some apps redirect to a different origin immediately; that's fine — keep going.
32
40
  }
33
41
  onReady();
34
- await waitForEnter();
42
+ const targetHostname = normalizeHost(safeHostname(url));
43
+ if (!targetHostname) {
44
+ console.log("\n " + chalk.yellow("⚠ Could not parse target hostname — skipping domain check."));
45
+ await waitForEnterWithStatus(page);
46
+ }
47
+ else {
48
+ await captureLoop(page, targetHostname);
49
+ }
35
50
  const storageState = await context.storageState();
36
51
  const cookieCount = storageState.cookies?.length ?? 0;
37
52
  let domain = "unknown";
@@ -49,14 +64,92 @@ export async function recordSession(url, onReady) {
49
64
  await browser.close();
50
65
  return { storageState, cookieCount, domain };
51
66
  }
52
- function waitForEnter() {
53
- return new Promise((resolve) => {
67
+ function normalizeHost(h) {
68
+ return h.replace(/^www\./i, "").toLowerCase();
69
+ }
70
+ function safeHostname(u) {
71
+ try {
72
+ return new URL(u).hostname;
73
+ }
74
+ catch {
75
+ return "";
76
+ }
77
+ }
78
+ function isOnTargetDomain(currentHostname, targetHostname) {
79
+ if (!currentHostname || !targetHostname)
80
+ return false;
81
+ return (currentHostname === targetHostname ||
82
+ currentHostname.endsWith("." + targetHostname) ||
83
+ targetHostname.endsWith("." + currentHostname));
84
+ }
85
+ /**
86
+ * Re-prompt loop: keep asking the user to press [Enter] until the
87
+ * browser is actually on the target app's hostname. After
88
+ * MAX_DOMAIN_RETRIES attempts, capture anyway so the user is
89
+ * never permanently stuck.
90
+ */
91
+ async function captureLoop(page, targetHostname) {
92
+ for (let attempts = 1; attempts <= MAX_DOMAIN_RETRIES; attempts++) {
93
+ await waitForEnterWithStatus(page);
94
+ let currentHostname = "";
95
+ try {
96
+ currentHostname = normalizeHost(safeHostname(page.url()));
97
+ }
98
+ catch {
99
+ /* page/context may have closed — treat as unknown */
100
+ }
101
+ if (currentHostname && isOnTargetDomain(currentHostname, targetHostname))
102
+ return;
103
+ if (attempts >= MAX_DOMAIN_RETRIES) {
104
+ console.log("\n " +
105
+ chalk.yellow(`⚠ Capturing anyway after ${attempts} attempts`) +
106
+ chalk.yellow(" — session may be incomplete.\n"));
107
+ return;
108
+ }
109
+ console.log("\n " +
110
+ chalk.yellow("⚠ Warning: browser is currently on") +
111
+ chalk.red(" " + (currentHostname || "<unknown>")));
112
+ console.log(" " + chalk.yellow("Expected:") + chalk.green(" " + targetHostname));
113
+ console.log(chalk.white("\n Please complete the full login flow and"));
114
+ console.log(chalk.white(" make sure you can see the app dashboard,"));
115
+ console.log(chalk.white(" then press [Enter] again.\n"));
116
+ }
117
+ }
118
+ /**
119
+ * Print the "press [Enter]" prompt and tick every 5 seconds with
120
+ * the current browser hostname so the user can see when they have
121
+ * actually landed on the target app. Cleans up the interval and
122
+ * readline interface on every termination path (line, close,
123
+ * SIGINT, stdin error) so we never leak timers or hang the process.
124
+ */
125
+ function waitForEnterWithStatus(page) {
126
+ return new Promise((resolve, reject) => {
54
127
  const rl = readline.createInterface({ input: process.stdin, output: process.stdout, terminal: false });
55
- process.stdout.write("\n \x1b[33m→ Complete your login, then press [Enter] \x1b[0m");
56
- rl.once("line", () => {
57
- rl.close();
128
+ process.stdout.write("\n " + chalk.yellow("→ Complete your login, then press [Enter]") + "\n");
129
+ const ticker = setInterval(() => {
130
+ try {
131
+ const hostname = new URL(page.url()).hostname;
132
+ process.stdout.write("\r " + chalk.gray("Browser is on: ") + chalk.cyan(hostname) + " ");
133
+ }
134
+ catch {
135
+ /* ignore — page may be navigating */
136
+ }
137
+ }, 5_000);
138
+ let settled = false;
139
+ const cleanup = () => {
140
+ if (settled)
141
+ return;
142
+ settled = true;
143
+ clearInterval(ticker);
144
+ try {
145
+ rl.close();
146
+ }
147
+ catch { /* */ }
58
148
  process.stdout.write("\n");
59
- resolve();
60
- });
149
+ };
150
+ rl.once("line", () => { cleanup(); resolve(); });
151
+ rl.once("close", () => { cleanup(); resolve(); });
152
+ rl.once("SIGINT", () => { cleanup(); reject(new Error("Recording cancelled (Ctrl+C)")); });
153
+ process.stdin.once("error", (err) => { cleanup(); reject(err); });
61
154
  });
62
155
  }
@@ -0,0 +1,315 @@
1
+ import { chromium } from "playwright";
2
+ import * as fs from "node:fs";
3
+ import * as path from "node:path";
4
+ import * as os from "node:os";
5
+ const MODEL = "claude-opus-4-5";
6
+ function stripCodeFences(text) {
7
+ return text.replace(/```json?\n?/g, "").replace(/```\n?/g, "").trim();
8
+ }
9
+ function firstTextBlock(content) {
10
+ const block = content[0];
11
+ return block && block.type === "text" ? block.text : "";
12
+ }
13
+ export class VisionNavigator {
14
+ anthropic;
15
+ baseUrl;
16
+ maxPages;
17
+ visitedUrls = new Set();
18
+ explorationQueue = [];
19
+ pages = [];
20
+ screenshots = [];
21
+ totalActions = 0;
22
+ browser;
23
+ browserContext;
24
+ page;
25
+ screenshotDir;
26
+ constructor(anthropic, baseUrl, maxPages = 20) {
27
+ this.anthropic = anthropic;
28
+ this.baseUrl = baseUrl;
29
+ this.maxPages = maxPages;
30
+ this.screenshotDir = fs.mkdtempSync(path.join(os.tmpdir(), "qazen-vision-"));
31
+ }
32
+ async explore(onEvent) {
33
+ this.browser = await chromium.launch({
34
+ headless: false,
35
+ args: [
36
+ "--no-sandbox",
37
+ "--disable-infobars",
38
+ "--start-maximized",
39
+ "--disable-blink-features=AutomationControlled",
40
+ ],
41
+ ignoreDefaultArgs: ["--enable-automation"],
42
+ });
43
+ this.browserContext = await this.browser.newContext({
44
+ viewport: { width: 1280, height: 720 },
45
+ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
46
+ ignoreHTTPSErrors: true,
47
+ });
48
+ await this.browserContext.addInitScript(() => {
49
+ Object.defineProperty(navigator, "webdriver", { get: () => false });
50
+ });
51
+ this.page = await this.browserContext.newPage();
52
+ this.explorationQueue.push({
53
+ url: this.baseUrl,
54
+ context: "Starting page — explore the main navigation",
55
+ });
56
+ try {
57
+ while (this.explorationQueue.length > 0 && this.pages.length < this.maxPages) {
58
+ const next = this.explorationQueue.shift();
59
+ await this.explorePage(next.url, next.context, onEvent);
60
+ await this.page.waitForTimeout(800 + Math.random() * 400);
61
+ }
62
+ }
63
+ finally {
64
+ await this.browser.close();
65
+ }
66
+ const primaryWorkflows = await this.inferWorkflows(onEvent);
67
+ return {
68
+ pages: this.pages,
69
+ totalElements: this.pages.reduce((sum, p) => sum + p.elements.length, 0),
70
+ totalActions: this.totalActions,
71
+ screenshots: this.screenshots,
72
+ primaryWorkflows,
73
+ };
74
+ }
75
+ async explorePage(url, context, onEvent) {
76
+ if (this.visitedUrls.has(url))
77
+ return;
78
+ this.visitedUrls.add(url);
79
+ onEvent({ type: "page_start", url, message: url });
80
+ try {
81
+ await this.page.goto(url, { waitUntil: "domcontentloaded", timeout: 30000 });
82
+ await this.page.waitForTimeout(2500);
83
+ const currentUrl = this.page.url();
84
+ const currentHostname = new URL(currentUrl).hostname;
85
+ const baseHostname = new URL(this.baseUrl).hostname;
86
+ const baseRegistrable = baseHostname.split(".").slice(-2).join(".");
87
+ if (!currentHostname.includes(baseRegistrable)) {
88
+ onEvent({ type: "error", message: `Redirected to ${currentHostname} — skipping` });
89
+ return;
90
+ }
91
+ const screenshot = await this.takeScreenshot(url);
92
+ const analysis = await this.analyzeScreenshot(screenshot, url, context, onEvent);
93
+ if (!analysis)
94
+ return;
95
+ const pageMap = {
96
+ url: currentUrl,
97
+ title: await this.page.title(),
98
+ visualDescription: analysis.pageDescription,
99
+ elements: analysis.elements,
100
+ screenshot,
101
+ ...(analysis.workflow ? { workflow: analysis.workflow } : {}),
102
+ actions_taken: [],
103
+ };
104
+ onEvent({
105
+ type: "page_mapped",
106
+ message: `${analysis.pageDescription} — ${analysis.elements.length} elements found`,
107
+ });
108
+ const highPriority = analysis.elements
109
+ .filter((e) => e.priority === "high" &&
110
+ (e.elementType === "link" || e.elementType === "tab" || e.elementType === "menu"))
111
+ .slice(0, 4);
112
+ for (const element of highPriority) {
113
+ try {
114
+ const newUrl = await this.clickElement(element, pageMap, onEvent);
115
+ if (newUrl && !this.visitedUrls.has(newUrl)) {
116
+ this.explorationQueue.push({
117
+ url: newUrl,
118
+ context: `Navigated from ${url} by clicking "${element.description}"`,
119
+ });
120
+ }
121
+ await this.page.goto(url, { waitUntil: "domcontentloaded", timeout: 20000 });
122
+ await this.page.waitForTimeout(1500);
123
+ }
124
+ catch {
125
+ // click failed — continue
126
+ }
127
+ }
128
+ this.pages.push(pageMap);
129
+ const links = analysis.elements
130
+ .filter((e) => e.elementType === "link" && e.action.startsWith("http"))
131
+ .map((e) => e.action)
132
+ .filter((href) => {
133
+ try {
134
+ const hostname = new URL(href).hostname;
135
+ return hostname.includes(baseRegistrable);
136
+ }
137
+ catch {
138
+ return false;
139
+ }
140
+ });
141
+ for (const link of links) {
142
+ if (!this.visitedUrls.has(link)) {
143
+ this.explorationQueue.push({ url: link, context: `Link found on ${url}` });
144
+ }
145
+ }
146
+ }
147
+ catch (err) {
148
+ onEvent({
149
+ type: "error",
150
+ message: `Failed to explore ${url}: ${err instanceof Error ? err.message : String(err)}`,
151
+ });
152
+ }
153
+ }
154
+ async analyzeScreenshot(screenshot, url, context, onEvent) {
155
+ onEvent({ type: "vision_analysis", message: "Sending screenshot to Claude Vision..." });
156
+ try {
157
+ const response = await this.anthropic.messages.create({
158
+ model: MODEL,
159
+ max_tokens: 2000,
160
+ messages: [
161
+ {
162
+ role: "user",
163
+ content: [
164
+ {
165
+ type: "image",
166
+ source: { type: "base64", media_type: "image/jpeg", data: screenshot },
167
+ },
168
+ {
169
+ type: "text",
170
+ text: `You are a QA engineer exploring a web application.
171
+ Context: ${context}
172
+ Current URL: ${url}
173
+
174
+ Analyse this screenshot and return ONLY valid JSON:
175
+ {
176
+ "pageDescription": "One sentence describing what this page/section does",
177
+ "workflow": "The business workflow this page is part of (e.g. 'User Authentication', 'Product Catalog', 'Order Management')",
178
+ "elements": [
179
+ {
180
+ "description": "Clear description of the element",
181
+ "elementType": "button|link|input|tab|menu|dropdown|other",
182
+ "action": "What clicking/interacting would do OR the href URL if it's a link",
183
+ "priority": "high|medium|low",
184
+ "visualLocation": "Brief description of where it is on screen"
185
+ }
186
+ ]
187
+ }
188
+
189
+ Focus on:
190
+ - Navigation items and menu links (high priority)
191
+ - Primary action buttons (high priority)
192
+ - Tab bars and section switchers (high priority)
193
+ - Form inputs (medium priority)
194
+ - Secondary buttons (medium priority)
195
+
196
+ Ignore: decorative elements, footer links, external links.
197
+ Return ONLY the JSON object. No markdown, no explanation.`,
198
+ },
199
+ ],
200
+ },
201
+ ],
202
+ });
203
+ const text = firstTextBlock(response.content);
204
+ const parsed = JSON.parse(stripCodeFences(text));
205
+ onEvent({
206
+ type: "vision_analysis",
207
+ message: parsed.pageDescription + (parsed.workflow ? ` (${parsed.workflow})` : ""),
208
+ });
209
+ return parsed;
210
+ }
211
+ catch (err) {
212
+ onEvent({
213
+ type: "error",
214
+ message: `Vision analysis failed: ${err instanceof Error ? err.message : String(err)}`,
215
+ });
216
+ return null;
217
+ }
218
+ }
219
+ async clickElement(element, pageMap, onEvent) {
220
+ onEvent({
221
+ type: "action",
222
+ message: `Clicking "${element.description}" (${element.visualLocation || element.elementType})`,
223
+ });
224
+ const urlBefore = this.page.url();
225
+ const screenshot = await this.takeScreenshot("click-target");
226
+ const coordResponse = await this.anthropic.messages.create({
227
+ model: MODEL,
228
+ max_tokens: 200,
229
+ messages: [
230
+ {
231
+ role: "user",
232
+ content: [
233
+ {
234
+ type: "image",
235
+ source: { type: "base64", media_type: "image/jpeg", data: screenshot },
236
+ },
237
+ {
238
+ type: "text",
239
+ text: `Find "${element.description}" on screen.
240
+ Return ONLY JSON: {"x": number, "y": number, "found": boolean}
241
+ x and y are pixel coordinates (image is 1280x720).
242
+ If not found, return {"x": 0, "y": 0, "found": false}`,
243
+ },
244
+ ],
245
+ },
246
+ ],
247
+ });
248
+ const coordText = firstTextBlock(coordResponse.content);
249
+ let coords;
250
+ try {
251
+ coords = JSON.parse(stripCodeFences(coordText));
252
+ }
253
+ catch {
254
+ return null;
255
+ }
256
+ if (!coords.found)
257
+ return null;
258
+ await this.page.mouse.click(coords.x, coords.y);
259
+ this.totalActions++;
260
+ pageMap.actions_taken.push(`Clicked "${element.description}" at (${coords.x}, ${coords.y})`);
261
+ await this.page.waitForTimeout(2000);
262
+ const urlAfter = this.page.url();
263
+ if (urlAfter !== urlBefore) {
264
+ const newScreenshot = await this.takeScreenshot(urlAfter);
265
+ this.screenshots.push(newScreenshot);
266
+ return urlAfter;
267
+ }
268
+ return null;
269
+ }
270
+ async takeScreenshot(_label) {
271
+ const buffer = await this.page.screenshot({ type: "jpeg", quality: 70, fullPage: false });
272
+ const base64 = buffer.toString("base64");
273
+ this.screenshots.push(base64);
274
+ return base64;
275
+ }
276
+ async inferWorkflows(onEvent) {
277
+ if (this.pages.length === 0)
278
+ return [];
279
+ onEvent({
280
+ type: "vision_analysis",
281
+ message: "Inferring primary workflows from explored pages...",
282
+ });
283
+ const pagesSummary = this.pages
284
+ .map((p) => `${p.url}: ${p.visualDescription} (${p.workflow || "unknown"})`)
285
+ .join("\n");
286
+ try {
287
+ const response = await this.anthropic.messages.create({
288
+ model: MODEL,
289
+ max_tokens: 500,
290
+ messages: [
291
+ {
292
+ role: "user",
293
+ content: `Based on these explored pages, identify 3-5 primary user workflows.
294
+ Return ONLY a JSON array of workflow names.
295
+ Example: ["User Authentication", "Product Catalog Browse", "Order Management"]
296
+
297
+ Pages explored:
298
+ ${pagesSummary}
299
+
300
+ Return ONLY the JSON array.`,
301
+ },
302
+ ],
303
+ });
304
+ const text = firstTextBlock(response.content) || "[]";
305
+ return JSON.parse(stripCodeFences(text));
306
+ }
307
+ catch {
308
+ return [];
309
+ }
310
+ }
311
+ // Surface temp dir for callers that want to clean up
312
+ getScreenshotDir() {
313
+ return this.screenshotDir;
314
+ }
315
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "qazen-cli",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "description": "QAZen CLI — capture authenticated browser sessions for enterprise SSO testing",
5
5
  "license": "MIT",
6
6
  "author": "QAZen",
@@ -37,7 +37,8 @@
37
37
  "conf": "^13.0.0",
38
38
  "inquirer": "^9.2.0",
39
39
  "ora": "^8.0.0",
40
- "playwright": "^1.60.0"
40
+ "playwright": "^1.60.0",
41
+ "@anthropic-ai/sdk": "^0.24.0"
41
42
  },
42
43
  "devDependencies": {
43
44
  "@types/inquirer": "^9.0.7",