npm - web-tester-for-claude - Versions diffs - 0.4.0 - Mend

web-tester-for-claude 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/LICENSE +21 -0
package/README.md +651 -0
package/bin/web-tester.js +35 -0
package/package.json +64 -0
package/src/browser/attrs.ts +79 -0
package/src/browser/session.ts +139 -0
package/src/cli.ts +1488 -0
package/src/impact.ts +165 -0
package/src/init.ts +260 -0
package/src/inspector/capture.ts +293 -0
package/src/inspector/deep.ts +147 -0
package/src/inspector/packs.ts +98 -0
package/src/inspector/report.ts +667 -0
package/src/inspector/run.ts +544 -0
package/src/inspector/steps.ts +380 -0
package/src/inspector/summarise.ts +178 -0
package/src/inspector/verdict.ts +275 -0
package/src/journeys.ts +78 -0
package/src/kb.ts +84 -0
package/src/map/classify.ts +149 -0
package/src/map/crawl.ts +394 -0
package/src/map/generate.ts +253 -0
package/src/map/report.ts +112 -0
package/src/map/run.ts +219 -0
package/src/sitemap.ts +75 -0
package/src/sweep.ts +476 -0
package/src/templates/agent-section.md +77 -0
package/src/templates/dot-web-tester/impact-rules.json +36 -0
package/src/templates/dot-web-tester/instructions/getting-started.md +62 -0
package/src/templates/dot-web-tester/instructions/recipes.md +105 -0
package/src/templates/dot-web-tester/journeys/example-signup.json +17 -0
package/src/templates/dot-web-tester/urls-smoke.txt +19 -0
package/src/templates/skill.md +59 -0
package/src/util/log.ts +26 -0
package/src/util/paths.ts +141 -0
package/src/util/prompt.ts +50 -0
package/tsconfig.json +14 -0

package/src/inspector/steps.ts ADDED Viewed

@@ -0,0 +1,380 @@
+import type { Page } from "playwright";
+import { waitForAttrsReady } from "../browser/attrs";
+export type WaitTarget =
+  | { kind: "loadState"; state: "load" | "domcontentloaded" | "networkidle" }
+  | { kind: "ms"; ms: number }
+  | { kind: "selector"; selector: string }
+  | { kind: "text"; text: string }
+  | { kind: "urlStable"; quietMs: number }
+  | { kind: "urlContains"; substring: string; timeoutMs: number };
+export type Step =
+  | { kind: "goto"; url: string }
+  | { kind: "click"; selector: string }
+  | { kind: "fill"; selector: string; value: string }
+  | { kind: "reactFill"; selector: string; value: string }
+  | { kind: "press"; selector: string; key: string }
+  | { kind: "select"; selector: string; value: string }
+  | { kind: "hover"; selector: string }
+  | { kind: "scroll"; target: "top" | "bottom" | "px"; px?: number }
+  | { kind: "wait"; target: WaitTarget }
+  | { kind: "settle"; timeoutMs?: number }
+  | { kind: "screenshot"; name?: string; fullPage?: boolean }
+  | { kind: "eval"; script: string }
+  | { kind: "reload" };
+const LOAD_STATES = new Set(["load", "domcontentloaded", "networkidle"]);
+/**
+ * Split a `<selector>=<value>` step argument on the first `=` that sits
+ * outside any `[...]`, `(...)`, or quotes. Attribute selectors like
+ * `input[name=email]` and `:has-text("a=b")` keep their inner `=`; only the
+ * real separator splits the value off. Returns null when there is no
+ * top-level `=` (i.e. no value was supplied).
+ */
+function splitSelectorValue(
+  arg: string
+): { selector: string; value: string } | null {
+  let depth = 0;
+  let quote: string | null = null;
+  for (let i = 0; i < arg.length; i++) {
+    const c = arg[i];
+    if (quote) {
+      // A closing quote only counts if preceded by an even number of
+      // backslashes (so `\"` stays escaped but `\\"` closes the string).
+      if (c === quote) {
+        let backslashes = 0;
+        for (let j = i - 1; j >= 0 && arg[j] === "\\"; j--) backslashes++;
+        if (backslashes % 2 === 0) quote = null;
+      }
+      continue;
+    }
+    if (c === '"' || c === "'") quote = c;
+    else if (c === "[" || c === "(") depth++;
+    else if (c === "]" || c === ")") depth = Math.max(0, depth - 1);
+    else if (c === "=" && depth === 0)
+      return { selector: arg.slice(0, i), value: arg.slice(i + 1) };
+  }
+  return null;
+}
+/**
+ * Parse a `--step <type>:<arg>` shorthand into a typed step.
+ *
+ * Examples:
+ *   "settle"                                      → settle
+ *   "goto:/checkout"                              → goto
+ *   "wait:networkidle"                            → wait loadState
+ *   "wait:2000"                                   → wait ms
+ *   "wait:#cta"                                   → wait selector
+ *   "wait:text=Submit"                            → wait text
+ *   "click:button:has-text(\"Submit\")"           → click (note: selector may contain `:`)
+ *   "fill:input[name=email]=user@example.com"     → fill (first `=` after selector splits value)
+ *   "screenshot:after-submit"                     → screenshot
+ *   "screenshot"                                  → screenshot anonymous
+ */
+export function parseStep(raw: string): Step {
+  const trimmed = raw.trim();
+  if (!trimmed) throw new Error("empty --step");
+  const colonAt = trimmed.indexOf(":");
+  const type = colonAt === -1 ? trimmed : trimmed.slice(0, colonAt);
+  const arg = colonAt === -1 ? "" : trimmed.slice(colonAt + 1);
+  switch (type) {
+    case "settle": {
+      if (!arg) return { kind: "settle" };
+      const ms = Number(arg);
+      if (!Number.isFinite(ms) || ms <= 0)
+        throw new Error("`settle:<ms>` needs a positive integer");
+      return { kind: "settle", timeoutMs: ms };
+    }
+    case "reload":
+      return { kind: "reload" };
+    case "goto":
+      if (!arg) throw new Error("`goto` needs a URL");
+      return { kind: "goto", url: arg };
+    case "click":
+      if (!arg) throw new Error("`click` needs a selector");
+      return { kind: "click", selector: arg };
+    case "hover":
+      if (!arg) throw new Error("`hover` needs a selector");
+      return { kind: "hover", selector: arg };
+    case "fill": {
+      const parts = splitSelectorValue(arg);
+      if (!parts) throw new Error("`fill` needs `<selector>=<value>`");
+      return { kind: "fill", selector: parts.selector, value: parts.value };
+    }
+    case "react-fill": {
+      const parts = splitSelectorValue(arg);
+      if (!parts) throw new Error("`react-fill` needs `<selector>=<value>`");
+      return { kind: "reactFill", selector: parts.selector, value: parts.value };
+    }
+    case "press": {
+      const parts = splitSelectorValue(arg);
+      if (!parts) throw new Error("`press` needs `<selector>=<key>`");
+      return { kind: "press", selector: parts.selector, key: parts.value };
+    }
+    case "select": {
+      const parts = splitSelectorValue(arg);
+      if (!parts) throw new Error("`select` needs `<selector>=<value>`");
+      return { kind: "select", selector: parts.selector, value: parts.value };
+    }
+    case "scroll":
+      if (arg === "top") return { kind: "scroll", target: "top" };
+      if (arg === "bottom") return { kind: "scroll", target: "bottom" };
+      if (/^\d+$/.test(arg))
+        return { kind: "scroll", target: "px", px: Number(arg) };
+      throw new Error(`unknown scroll target: ${arg}`);
+    case "wait": {
+      if (!arg) throw new Error("`wait` needs a target");
+      if (LOAD_STATES.has(arg))
+        return {
+          kind: "wait",
+          target: {
+            kind: "loadState",
+            state: arg as "load" | "domcontentloaded" | "networkidle"
+          }
+        };
+      if (/^\d+$/.test(arg))
+        return { kind: "wait", target: { kind: "ms", ms: Number(arg) } };
+      if (arg.startsWith("text="))
+        return {
+          kind: "wait",
+          target: { kind: "text", text: arg.slice("text=".length) }
+        };
+      if (arg === "url-stable")
+        return { kind: "wait", target: { kind: "urlStable", quietMs: 250 } };
+      if (arg.startsWith("url-stable=")) {
+        const ms = Number(arg.slice("url-stable=".length));
+        if (!Number.isFinite(ms) || ms <= 0)
+          throw new Error("`wait:url-stable=<ms>` needs a positive integer");
+        return { kind: "wait", target: { kind: "urlStable", quietMs: ms } };
+      }
+      if (arg.startsWith("url-contains:")) {
+        const rest = arg.slice("url-contains:".length);
+        // Optional trailing `@<timeoutMs>` overrides the 10s default. The
+        // separator is `@` (not `=`) so the substring can itself contain `=`
+        // — e.g. `wait:url-contains:tab=details@30000`.
+        const at = rest.lastIndexOf("@");
+        if (at === -1 || !/^\d+$/.test(rest.slice(at + 1)))
+          return {
+            kind: "wait",
+            target: { kind: "urlContains", substring: rest, timeoutMs: 10_000 }
+          };
+        const timeoutMs = Number(rest.slice(at + 1));
+        if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
+          throw new Error(
+            "`wait:url-contains:<sub>@<ms>` timeout must be a positive integer"
+          );
+        return {
+          kind: "wait",
+          target: {
+            kind: "urlContains",
+            substring: rest.slice(0, at),
+            timeoutMs
+          }
+        };
+      }
+      return { kind: "wait", target: { kind: "selector", selector: arg } };
+    }
+    case "screenshot":
+      return { kind: "screenshot", name: arg || undefined, fullPage: false };
+    case "screenshot-full":
+      return { kind: "screenshot", name: arg || undefined, fullPage: true };
+    case "eval":
+      if (!arg) throw new Error("`eval` needs a JS expression");
+      return { kind: "eval", script: arg };
+    default:
+      throw new Error(
+        `unknown step type "${type}". See \`pnpm web-tester help\`.`
+      );
+  }
+}
+const DEFAULT_TIMEOUT_MS = Number(process.env.STEP_TIMEOUT_MS ?? 15_000);
+const DEFAULT_SETTLE_MS = Number(process.env.SETTLE_TIMEOUT_MS ?? 30_000);
+/**
+ * Execute a single step against the page. Returns a short label describing
+ * what happened and, optionally, an `evalResult` for `eval` steps.
+ */
+export async function executeStep(
+  step: Step,
+  page: Page
+): Promise<{ label: string; evalResult?: unknown }> {
+  switch (step.kind) {
+    case "goto": {
+      const baseUrl = new URL(page.url()).origin;
+      const target = step.url.startsWith("http")
+        ? step.url
+        : new URL(step.url, baseUrl).toString();
+      const response = await page.goto(target, {
+        waitUntil: "domcontentloaded",
+        timeout: DEFAULT_TIMEOUT_MS
+      });
+      return { label: `goto ${target} (${response?.status() ?? "?"})` };
+    }
+    case "reload":
+      await page.reload({ waitUntil: "domcontentloaded" });
+      return { label: "reload" };
+    case "click":
+      await page.locator(step.selector).first().click({ timeout: DEFAULT_TIMEOUT_MS });
+      return { label: `click ${step.selector}` };
+    case "hover":
+      await page.locator(step.selector).first().hover({ timeout: DEFAULT_TIMEOUT_MS });
+      return { label: `hover ${step.selector}` };
+    case "fill":
+      await page
+        .locator(step.selector)
+        .first()
+        .fill(step.value, { timeout: DEFAULT_TIMEOUT_MS });
+      return { label: `fill ${step.selector} = ${step.value}` };
+    case "reactFill": {
+      // React controlled inputs reset to their state value when you mutate
+      // the DOM value directly, so Playwright's `fill` doesn't stick. Call
+      // the native value setter on the prototype, then dispatch input/change
+      // so React's synthetic event system picks up the change.
+      const result = await page.evaluate(
+        ({ selector, value }) => {
+          const el = document.querySelector(
+            selector
+          ) as HTMLInputElement | HTMLTextAreaElement | null;
+          if (!el) return { ok: false, reason: `selector not found: ${selector}` };
+          const proto =
+            el.tagName === "TEXTAREA"
+              ? window.HTMLTextAreaElement.prototype
+              : window.HTMLInputElement.prototype;
+          const desc = Object.getOwnPropertyDescriptor(proto, "value");
+          if (!desc?.set)
+            return { ok: false, reason: "no value setter on prototype" };
+          desc.set.call(el, value);
+          el.dispatchEvent(new Event("input", { bubbles: true }));
+          el.dispatchEvent(new Event("change", { bubbles: true }));
+          el.blur();
+          return { ok: true, finalDomValue: el.value };
+        },
+        { selector: step.selector, value: step.value }
+      );
+      if (!result.ok)
+        throw new Error(`react-fill failed: ${result.reason ?? "unknown"}`);
+      return {
+        label: `react-fill ${step.selector} = ${step.value}`,
+        evalResult: result
+      };
+    }
+    case "press":
+      await page
+        .locator(step.selector)
+        .first()
+        .press(step.key, { timeout: DEFAULT_TIMEOUT_MS });
+      return { label: `press ${step.key} on ${step.selector}` };
+    case "select":
+      await page
+        .locator(step.selector)
+        .first()
+        .selectOption(step.value, { timeout: DEFAULT_TIMEOUT_MS });
+      return { label: `select ${step.value} in ${step.selector}` };
+    case "scroll":
+      if (step.target === "top") {
+        await page.evaluate(() => window.scrollTo(0, 0));
+        return { label: "scroll top" };
+      }
+      if (step.target === "bottom") {
+        await page.evaluate(() =>
+          window.scrollTo(0, document.body.scrollHeight)
+        );
+        return { label: "scroll bottom" };
+      }
+      await page.evaluate((px) => window.scrollTo(0, px), step.px ?? 0);
+      return { label: `scroll ${step.px}px` };
+    case "wait": {
+      const t = step.target;
+      if (t.kind === "loadState") {
+        await page.waitForLoadState(t.state, { timeout: DEFAULT_TIMEOUT_MS });
+        return { label: `wait load=${t.state}` };
+      }
+      if (t.kind === "ms") {
+        await page.waitForTimeout(t.ms);
+        return { label: `wait ${t.ms}ms` };
+      }
+      if (t.kind === "selector") {
+        await page.locator(t.selector).first().waitFor({ timeout: DEFAULT_TIMEOUT_MS });
+        return { label: `wait selector ${t.selector}` };
+      }
+      if (t.kind === "urlStable") {
+        // Wait for the URL to change at least once, then hold steady for
+        // `quietMs`. Requiring an observed change (not just "stable from the
+        // start") avoids a false pass when the action under test hasn't
+        // written to the URL yet. Useful after a debounced router.replace.
+        const POLL_MS = 50;
+        const DEADLINE = Date.now() + DEFAULT_TIMEOUT_MS;
+        const initial = page.url();
+        let last = initial;
+        let stableSince = Date.now();
+        let hasChanged = false;
+        while (Date.now() < DEADLINE) {
+          await page.waitForTimeout(POLL_MS);
+          const current = page.url();
+          if (current !== last) {
+            last = current;
+            stableSince = Date.now();
+            if (current !== initial) hasChanged = true;
+          } else if (
+            hasChanged &&
+            Date.now() - stableSince >= t.quietMs
+          ) {
+            return { label: `wait url-stable (${t.quietMs}ms quiet)` };
+          }
+        }
+        throw new Error(
+          hasChanged
+            ? `wait:url-stable timed out — URL kept changing past ${DEFAULT_TIMEOUT_MS}ms`
+            : `wait:url-stable timed out — URL never changed from "${initial}" within ${DEFAULT_TIMEOUT_MS}ms. If the action under test doesn't write to the URL, use \`wait:<ms>\` or \`wait:url-contains:<sub>\` instead.`
+        );
+      }
+      if (t.kind === "urlContains") {
+        // Deterministic alternative to url-stable: wait until the URL contains
+        // a known substring. Use when an action pushes a specific param or
+        // navigates to a known path.
+        const POLL_MS = 100;
+        const DEADLINE = Date.now() + t.timeoutMs;
+        while (Date.now() < DEADLINE) {
+          if (page.url().includes(t.substring))
+            return {
+              label: `wait url-contains "${t.substring}" (${Date.now() - (DEADLINE - t.timeoutMs)}ms)`
+            };
+          await page.waitForTimeout(POLL_MS);
+        }
+        throw new Error(
+          `wait:url-contains "${t.substring}" timed out after ${t.timeoutMs}ms (final URL: ${page.url()})`
+        );
+      }
+      await page.getByText(t.text).first().waitFor({ timeout: DEFAULT_TIMEOUT_MS });
+      return { label: `wait text="${t.text}"` };
+    }
+    case "settle":
+      await waitForAttrsReady(page, step.timeoutMs ?? DEFAULT_SETTLE_MS);
+      return {
+        label:
+          step.timeoutMs !== undefined
+            ? `settle (attrs ready, ${step.timeoutMs}ms cap)`
+            : "settle (attrs ready)"
+      };
+    case "screenshot":
+      // Screenshot is captured by the runner, which also names it. The runner
+      // looks at the step kind directly — we just pass through.
+      return {
+        label: step.fullPage
+          ? `screenshot-full ${step.name ?? ""}`
+          : `screenshot ${step.name ?? ""}`
+      };
+    case "eval": {
+      // Pass as a raw expression string so we don't go through esbuild's
+      // function compilation (which would inject `__name` helpers).
+      const value = await page.evaluate(step.script);
+      return { label: `eval ${step.script.slice(0, 40)}`, evalResult: value };
+    }
+  }
+}

package/src/inspector/summarise.ts ADDED Viewed

@@ -0,0 +1,178 @@
+import { spawnSync } from "node:child_process";
+import { existsSync, readdirSync } from "node:fs";
+import { homedir } from "node:os";
+import { resolve } from "node:path";
+import { log } from "../util/log";
+import type { InspectResult } from "./run";
+/**
+ * Locate the `claude` binary. Order:
+ *   1. `which claude` (anything on PATH)
+ *   2. VSCode extension bundled binary (anthropic.claude-code-*)
+ *   3. Anthropic desktop app (macOS)
+ * Returns the absolute path, or null if not found.
+ */
+function findClaudeBinary(): string | null {
+  const which = spawnSync("which", ["claude"], { encoding: "utf-8" });
+  if (which.status === 0 && which.stdout.trim()) return which.stdout.trim();
+  const extRoot = resolve(homedir(), ".vscode/extensions");
+  if (existsSync(extRoot)) {
+    const matches = readdirSync(extRoot)
+      .filter((d) => d.startsWith("anthropic.claude-code-"))
+      .sort()
+      .reverse();
+    for (const dir of matches) {
+      const candidate = resolve(extRoot, dir, "resources/native-binary/claude");
+      if (existsSync(candidate)) return candidate;
+    }
+  }
+  const desktopCandidate = "/Applications/Claude.app/Contents/Resources/bin/claude";
+  if (existsSync(desktopCandidate)) return desktopCandidate;
+  return null;
+}
+function summariseConsole(result: InspectResult): string {
+  const errs = result.console.entries
+    .filter((e) => e.type === "error")
+    .slice(0, 6)
+    .map((e) => `  - ${e.text.split("\n")[0]?.slice(0, 200)}`)
+    .join("\n");
+  return errs || "  (no console errors)";
+}
+function summariseNetworkFailures(result: InspectResult): string {
+  const failed = result.network.entries
+    .filter(
+      (e) => e.failureText !== null || (e.status !== null && e.status >= 400)
+    )
+    .slice(0, 6);
+  if (!failed.length) return "  (no failed/4xx requests)";
+  return failed
+    .map((e) =>
+      e.failureText
+        ? `  - ${e.method} ${e.url} :: ${e.failureText}`
+        : `  - ${e.status} ${e.method} ${e.url}`
+    )
+    .join("\n");
+}
+function summarisePageErrors(result: InspectResult): string {
+  if (!result.pageErrors.length) return "  (no uncaught JS errors)";
+  const grouped = new Map<string, number>();
+  for (const e of result.pageErrors)
+    grouped.set(e.message.split("\n")[0] ?? e.message, (grouped.get(e.message.split("\n")[0] ?? e.message) ?? 0) + 1);
+  return Array.from(grouped.entries())
+    .slice(0, 6)
+    .map(([msg, count]) => `  - ${msg}${count > 1 ? ` (×${count})` : ""}`)
+    .join("\n");
+}
+function summariseSteps(result: InspectResult): string {
+  if (!result.steps.length) return "  (no steps — single-page snapshot)";
+  return result.steps
+    .map((s) => {
+      const tag = s.ok ? "✓" : "✗";
+      const evalStr =
+        s.evalResult !== undefined
+          ? ` -> ${JSON.stringify(s.evalResult).slice(0, 120)}`
+          : "";
+      return `  ${tag} ${s.index}. ${s.label} (${s.durationMs}ms)${evalStr}`;
+    })
+    .join("\n");
+}
+function summariseAttrs(
+  attrs: { name: string; value: string; label: string }[]
+): string {
+  if (!attrs.length) return "(none captured)";
+  return attrs
+    .slice(0, 20)
+    .map((a) => `${a.name}=${a.label || a.value}`)
+    .join(", ");
+}
+function buildPrompt(result: InspectResult): string {
+  return `You are summarising the result of a web-tester run against the developer's web app for a developer who is about to open the HTML report. Be concise and useful.
+# Run context
+- URL: ${result.requestedUrl}
+- Final URL: ${result.finalUrl}
+- Page title: ${result.title || "(unknown)"}
+- Duration: ${result.durationMs}ms
+- Verdict: ${result.ok ? "all steps executed" : `${result.failedSteps} step(s) failed`}
+# Steps (✓=ok, ✗=error during step)
+${summariseSteps(result)}
+# Page errors (uncaught JS)
+${summarisePageErrors(result)}
+# Console errors
+${summariseConsole(result)}
+# Failed / 4xx network requests
+${summariseNetworkFailures(result)}
+# Final on-page attrs
+${summariseAttrs(result.final.attrs)}
+---
+Write a short summary aimed at a developer scanning the report. Format:
+**TL;DR:** one sentence — what the run did and whether it looks healthy.
+**Notable findings:** 2–4 bullets, each one short line. Focus on signal: real failures, surprising state, unexpected URLs, missing attrs, suspicious network calls. Skip hydration warnings unless they are the only issue. Skip generic noise.
+**Suggested next look:** 1 bullet if there is anything specific in the report worth zooming into; omit otherwise.
+Output only the summary in markdown — no preamble, no closing remarks, no headers other than the three bolded labels above.`;
+}
+const SUMMARY_TIMEOUT_MS = 60_000;
+/**
+ * Generate a short Sonnet-written summary of the run. Returns null if the
+ * claude CLI isn't available, errors out, or times out — callers should treat
+ * a null as "no summary, render the report without one".
+ */
+export async function summariseRun(
+  result: InspectResult,
+  opts: { enabled: boolean }
+): Promise<string | null> {
+  if (!opts.enabled) return null;
+  const bin = findClaudeBinary();
+  if (!bin) {
+    log.dim("  summary: claude CLI not found, skipping");
+    return null;
+  }
+  log.dim("  summary: asking sonnet to summarise…");
+  const started = Date.now();
+  const prompt = buildPrompt(result);
+  const proc = spawnSync(
+    bin,
+    ["-p", "--model", "claude-sonnet-4-6", "--output-format", "text"],
+    {
+      input: prompt,
+      encoding: "utf-8",
+      timeout: SUMMARY_TIMEOUT_MS,
+      maxBuffer: 4 * 1024 * 1024
+    }
+  );
+  if (proc.error) {
+    log.dim(`  summary: ${proc.error.message}, skipping`);
+    return null;
+  }
+  if (proc.status !== 0) {
+    log.dim(`  summary: claude exited ${proc.status}, skipping`);
+    return null;
+  }
+  const text = proc.stdout.trim();
+  if (!text) return null;
+  log.dim(`  summary: done in ${Date.now() - started}ms`);
+  return text;
+}