npm - web-tester-for-claude - Versions diffs - 0.4.0 - Mend

web-tester-for-claude 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/LICENSE +21 -0
package/README.md +651 -0
package/bin/web-tester.js +35 -0
package/package.json +64 -0
package/src/browser/attrs.ts +79 -0
package/src/browser/session.ts +139 -0
package/src/cli.ts +1488 -0
package/src/impact.ts +165 -0
package/src/init.ts +260 -0
package/src/inspector/capture.ts +293 -0
package/src/inspector/deep.ts +147 -0
package/src/inspector/packs.ts +98 -0
package/src/inspector/report.ts +667 -0
package/src/inspector/run.ts +544 -0
package/src/inspector/steps.ts +380 -0
package/src/inspector/summarise.ts +178 -0
package/src/inspector/verdict.ts +275 -0
package/src/journeys.ts +78 -0
package/src/kb.ts +84 -0
package/src/map/classify.ts +149 -0
package/src/map/crawl.ts +394 -0
package/src/map/generate.ts +253 -0
package/src/map/report.ts +112 -0
package/src/map/run.ts +219 -0
package/src/sitemap.ts +75 -0
package/src/sweep.ts +476 -0
package/src/templates/agent-section.md +77 -0
package/src/templates/dot-web-tester/impact-rules.json +36 -0
package/src/templates/dot-web-tester/instructions/getting-started.md +62 -0
package/src/templates/dot-web-tester/instructions/recipes.md +105 -0
package/src/templates/dot-web-tester/journeys/example-signup.json +17 -0
package/src/templates/dot-web-tester/urls-smoke.txt +19 -0
package/src/templates/skill.md +59 -0
package/src/util/log.ts +26 -0
package/src/util/paths.ts +141 -0
package/src/util/prompt.ts +50 -0
package/tsconfig.json +14 -0

package/src/sweep.ts ADDED Viewed

@@ -0,0 +1,476 @@
+import { existsSync, mkdirSync, writeFileSync } from "node:fs";
+import { resolve } from "node:path";
+import {
+  chromium,
+  type Browser,
+  type BrowserContext
+} from "playwright";
+import { readUiAttributes } from "./browser/attrs";
+import {
+  configureContext,
+  DEFAULT_SESSION_UA,
+  DEFAULT_SESSION_VIEWPORT
+} from "./browser/session";
+import { attachCapture } from "./inspector/capture";
+import {
+  computeVerdict,
+  evaluateExpectations,
+  type Expectation,
+  type ExpectationResult,
+  type FailOnKind
+} from "./inspector/verdict";
+import { log } from "./util/log";
+import { newRunId, RUNS_DIR, SESSION_STATE_PATH } from "./util/paths";
+/** One URL in a sweep, plus the specific expectations to evaluate on it. */
+export type SweepUrl = {
+  /** Path or absolute URL — resolved against `baseUrl` if relative. */
+  path: string;
+  /** Per-URL expectations (already merged with any global packs). */
+  expectations: Expectation[];
+  /** Pack names this URL inherits from (for the aggregate report). */
+  packs: string[];
+};
+export type SweepOptions = {
+  baseUrl: string;
+  urls: SweepUrl[];
+  concurrency: number;
+  failOn: FailOnKind[];
+  gotoTimeoutMs: number;
+  /**
+   * Load `~/.web-tester/session.json` into each worker context when the
+   * file exists. Defaults to true; pass `false` (CLI `--no-session`) to
+   * force an anonymous sweep — useful when verifying a logged-out flow
+   * regression.
+   */
+  loadStorageState?: boolean;
+};
+export type SweepEntry = {
+  url: string;
+  finalUrl: string;
+  status: number | null;
+  title: string;
+  durationMs: number;
+  ok: boolean;
+  triggers: string[];
+  expectations: ExpectationResult[];
+  pageErrors: number;
+  consoleErrors: number;
+  http4xx: number;
+  http5xx: number;
+  screenshot: string;
+  /** Relative path under the sweep dir to a per-URL minimal JSON. */
+  detailJson: string;
+};
+export type SweepReport = {
+  sweepId: string;
+  startedAt: string;
+  durationMs: number;
+  baseUrl: string;
+  concurrency: number;
+  total: number;
+  passed: number;
+  failed: number;
+  failOn: FailOnKind[];
+  /** Distinct pack names referenced anywhere in the input URL set. */
+  packs: string[];
+  entries: SweepEntry[];
+};
+function safeSlug(url: string, index: number): string {
+  const cleaned = url
+    .replace(/^https?:\/\//, "")
+    .replace(/[^a-z0-9]+/gi, "-")
+    .replace(/^-+|-+$/g, "")
+    .toLowerCase()
+    .slice(0, 60);
+  return `${String(index).padStart(3, "0")}-${cleaned || "url"}`;
+}
+async function inspectOne(
+  context: BrowserContext,
+  baseUrl: string,
+  sweepUrl: SweepUrl,
+  sweepDir: string,
+  slug: string,
+  opts: {
+    failOn: FailOnKind[];
+    gotoTimeoutMs: number;
+  }
+): Promise<SweepEntry> {
+  const started = Date.now();
+  const page = await context.newPage();
+  const buffers = attachCapture(context, page, {
+    allNetwork: false,
+    allConsole: false
+  });
+  const requestedUrl = sweepUrl.path.startsWith("http")
+    ? sweepUrl.path
+    : new URL(sweepUrl.path, baseUrl).toString();
+  let status: number | null = null;
+  let title = "";
+  let finalUrl = requestedUrl;
+  let expectations: ExpectationResult[] = [];
+  // Navigation + expectation evaluation. Errors here are swallowed so
+  // sweep stays best-effort — partial data per URL is more useful than
+  // a thrown sweep, and the assertions we DID evaluate end up in the
+  // verdict either way.
+  try {
+    const response = await page
+      .goto(requestedUrl, {
+        waitUntil: "domcontentloaded",
+        timeout: opts.gotoTimeoutMs
+      })
+      .catch(() => null);
+    status = response?.status() ?? null;
+    finalUrl = page.url();
+    // Wait for `load` so the page renders and throws any hydration errors.
+    // Sweep is intentionally shallow — load-health, not full interactivity —
+    // so we don't run a deeper settle here.
+    await page.waitForLoadState("load", { timeout: 5_000 }).catch(() => {});
+    if (sweepUrl.expectations.length > 0) {
+      expectations = await evaluateExpectations(page, sweepUrl.expectations);
+    }
+    title = await page.title().catch(() => "");
+  } catch {
+    // best-effort sweep — keep partial data
+  }
+  // Cleanup + post-run probes (screenshot + attrs need the page open).
+  const screenshotRel = `${slug}.png`;
+  await page
+    .screenshot({ path: resolve(sweepDir, screenshotRel), fullPage: false })
+    .catch(() => {});
+  const attrs = await readUiAttributes(page).catch(() => []);
+  await page.close().catch(() => {});
+  const consoleErrors = buffers.consoleEntries.filter(
+    (e) => e.type === "error"
+  ).length;
+  const http4xx = buffers.networkEntries.filter(
+    (e) => e.status !== null && e.status >= 400 && e.status < 500
+  ).length;
+  const http5xx = buffers.networkEntries.filter(
+    (e) => e.status !== null && e.status >= 500
+  ).length;
+  const verdict = computeVerdict({
+    failedSteps: 0,
+    pageErrors: buffers.pageErrors,
+    consoleEntries: buffers.consoleEntries,
+    networkEntries: buffers.networkEntries,
+    expectations,
+    failOn: opts.failOn
+  });
+  const detailRel = `${slug}.json`;
+  writeFileSync(
+    resolve(sweepDir, detailRel),
+    JSON.stringify(
+      {
+        url: sweepUrl.path,
+        packs: sweepUrl.packs,
+        requestedUrl,
+        finalUrl,
+        status,
+        title,
+        durationMs: Date.now() - started,
+        ok: verdict.ok,
+        triggers: verdict.triggers,
+        expectations,
+        console: { entries: buffers.consoleEntries },
+        network: { entries: buffers.networkEntries },
+        pageErrors: buffers.pageErrors,
+        attrs
+      },
+      null,
+      2
+    )
+  );
+  return {
+    url: sweepUrl.path,
+    finalUrl,
+    status,
+    title,
+    durationMs: Date.now() - started,
+    ok: verdict.ok,
+    triggers: verdict.triggers,
+    expectations,
+    pageErrors: buffers.pageErrors.length,
+    consoleErrors,
+    http4xx,
+    http5xx,
+    screenshot: screenshotRel,
+    detailJson: detailRel
+  };
+}
+export async function runSweep(opts: SweepOptions): Promise<SweepReport> {
+  const sweepId = `sweep-${newRunId()}`;
+  const sweepDir = resolve(RUNS_DIR, sweepId);
+  mkdirSync(sweepDir, { recursive: true });
+  log.dim(`sweep dir: ${sweepDir}`);
+  const startedAt = new Date();
+  const started = Date.now();
+  const browser: Browser = await chromium.launch({ headless: true });
+  const entries: SweepEntry[] = [];
+  const useStorageState =
+    opts.loadStorageState !== false && existsSync(SESSION_STATE_PATH);
+  if (useStorageState)
+    log.dim("  · loaded session from ~/.web-tester/session.json");
+  try {
+    // Worker pool: keep one browser, hand each worker its own context. Each
+    // worker pulls from the shared queue until empty. A fresh context per URL
+    // would be cleaner state-wise but costs ~200ms; per-worker context
+    // amortises that across the queue while still isolating sweep state from
+    // any per-URL navigation residue (cookies, storage stay scoped to the
+    // worker, not bleed across the whole sweep).
+    const queue = [...opts.urls.map((u, i) => ({ sweepUrl: u, index: i }))];
+    let nextLog = 0;
+    const worker = async (): Promise<void> => {
+      const context = await browser.newContext({
+        viewport: DEFAULT_SESSION_VIEWPORT,
+        userAgent: DEFAULT_SESSION_UA,
+        // Each worker gets its own context but shares the on-disk session,
+        // so a sweep can include auth-gated routes without each worker
+        // logging in. No-op when the file doesn't exist.
+        ...(useStorageState ? { storageState: SESSION_STATE_PATH } : {})
+      });
+      await configureContext(context, opts.baseUrl);
+      try {
+        while (queue.length > 0) {
+          const job = queue.shift();
+          if (!job) break;
+          const slug = safeSlug(job.sweepUrl.path, job.index + 1);
+          const entry = await inspectOne(
+            context,
+            opts.baseUrl,
+            job.sweepUrl,
+            sweepDir,
+            slug,
+            {
+              failOn: opts.failOn,
+              gotoTimeoutMs: opts.gotoTimeoutMs
+            }
+          );
+          entries.push(entry);
+          const idx = ++nextLog;
+          const tag = entry.ok ? "✓" : "✗";
+          const colour = entry.ok ? log.dim : log.fail;
+          const packTag = job.sweepUrl.packs.length
+            ? ` [${job.sweepUrl.packs.join(",")}]`
+            : "";
+          colour(
+            `  ${tag} [${idx}/${opts.urls.length}] ${entry.url}${packTag} → ${entry.status ?? "?"} (${entry.durationMs}ms)${
+              entry.triggers.length ? ` — ${entry.triggers.join("; ")}` : ""
+            }`
+          );
+        }
+      } finally {
+        await context.close().catch(() => {});
+      }
+    };
+    const workers = Array.from(
+      { length: Math.min(opts.concurrency, opts.urls.length) },
+      () => worker()
+    );
+    await Promise.all(workers);
+  } finally {
+    await browser.close().catch(() => {});
+  }
+  // Sort entries back into the original URL order so the report is
+  // deterministic; workers complete in arbitrary order.
+  const order = new Map(opts.urls.map((u, i) => [u.path, i]));
+  entries.sort(
+    (a, b) => (order.get(a.url) ?? 0) - (order.get(b.url) ?? 0)
+  );
+  const passed = entries.filter((e) => e.ok).length;
+  const failed = entries.length - passed;
+  const distinctPacks = Array.from(
+    new Set(opts.urls.flatMap((u) => u.packs))
+  );
+  const report: SweepReport = {
+    sweepId,
+    startedAt: startedAt.toISOString(),
+    durationMs: Date.now() - started,
+    baseUrl: opts.baseUrl,
+    concurrency: opts.concurrency,
+    total: entries.length,
+    passed,
+    failed,
+    failOn: opts.failOn,
+    packs: distinctPacks,
+    entries
+  };
+  writeFileSync(resolve(sweepDir, "sweep.json"), JSON.stringify(report, null, 2));
+  writeFileSync(resolve(sweepDir, "sweep.html"), renderSweepHtml(report));
+  log.info("");
+  log.header(failed === 0 ? "sweep: all ok" : `sweep: ${failed}/${entries.length} failed`);
+  log.info(`  duration:   ${report.durationMs}ms`);
+  log.info(`  concurrency: ${opts.concurrency}`);
+  log.info(`  passed:     ${passed}`);
+  log.info(`  failed:     ${failed}`);
+  // Detect prod throttling: if a meaningful share of URLs came back as 403
+  // we're almost certainly hitting WAF / VPN rate limits, not real bugs.
+  // Most developers don't recognise this pattern on first encounter, so
+  // print an explicit hint with the mitigation.
+  const httpForbidden = entries.filter((e) => e.status === 403).length;
+  const isLocal =
+    opts.baseUrl.includes("localhost") || opts.baseUrl.includes("127.0.0.1");
+  if (
+    !isLocal &&
+    httpForbidden >= 3 &&
+    httpForbidden / Math.max(1, entries.length) >= 0.15
+  ) {
+    log.info("");
+    log.warn(
+      `  ⚠ ${httpForbidden}/${entries.length} URLs returned HTTP 403 from ${opts.baseUrl}.`
+    );
+    log.warn(
+      "    This is almost certainly NOT a regression in your code — the remote"
+    );
+    log.warn(
+      "    target is responding 403. Common causes: WAF / VPN rate-limiting,"
+    );
+    log.warn(
+      "    prod-side partial outage, or a recent deploy gating those paths."
+    );
+    log.warn(
+      "    Mitigations (in order):"
+    );
+    log.warn(
+      "      · curl one of the failing URLs directly to confirm it really is 403"
+    );
+    log.warn(
+      `      · drop --concurrency to 1 (current: ${opts.concurrency}) and retry`
+    );
+    log.warn(
+      "      · wait 5-10 minutes and retry (rate-limit windows usually clear)"
+    );
+    log.warn(
+      "      · sweep localhost instead (no env var → defaults to http://localhost:3000)"
+    );
+  }
+  log.ok(`  HTML report: ${sweepDir}/sweep.html`);
+  log.info(`  sweep.json:  ${sweepDir}/sweep.json`);
+  return report;
+}
+function esc(s: string): string {
+  return s
+    .replace(/&/g, "&amp;")
+    .replace(/</g, "&lt;")
+    .replace(/>/g, "&gt;")
+    .replace(/"/g, "&quot;")
+    .replace(/'/g, "&#039;");
+}
+function renderSweepHtml(report: SweepReport): string {
+  const rows = report.entries
+    .map((e, i) => {
+      const verdict = e.ok
+        ? `<span class="ok">ok</span>`
+        : `<span class="fail">fail</span>`;
+      const triggers = e.triggers.length
+        ? `<ul class="triggers">${e.triggers.map((t) => `<li>${esc(t)}</li>`).join("")}</ul>`
+        : "";
+      const statusClass =
+        e.status === null
+          ? "stat-fail"
+          : e.status >= 500
+            ? "stat-fail"
+            : e.status >= 400
+              ? "stat-warn"
+              : "stat-ok";
+      return `<tr class="${e.ok ? "row-ok" : "row-fail"}">
+        <td class="num">${i + 1}</td>
+        <td class="verdict">${verdict}</td>
+        <td><a href="${esc(e.detailJson)}">${esc(e.url)}</a><div class="dim">${esc(e.title || "")}</div></td>
+        <td class="status ${statusClass}">${e.status ?? "—"}</td>
+        <td class="num">${e.durationMs}ms</td>
+        <td class="num">${e.pageErrors}</td>
+        <td class="num">${e.consoleErrors}</td>
+        <td class="num">${e.http4xx}/${e.http5xx}</td>
+        <td><a href="${esc(e.screenshot)}" target="_blank"><img src="${esc(e.screenshot)}" loading="lazy"></a></td>
+        <td>${triggers}</td>
+      </tr>`;
+    })
+    .join("");
+  const packsBadge = report.packs.length
+    ? `<span class="badge">packs: ${report.packs.join(", ")}</span>`
+    : "";
+  const failOnBadge = report.failOn.length
+    ? `<span class="badge">fail-on: ${report.failOn.join(", ")}</span>`
+    : "";
+  return `<!doctype html>
+<html lang="en"><head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<title>web-tester sweep · ${report.total} URLs</title>
+<style>
+  :root { --bg:#fafaf9; --surface:#fff; --border:#e7e5e4; --muted:#57534e; --subtle:#a8a29e; --ok:#15803d; --warn:#a16207; --err:#b91c1c; --text:#18181b; }
+  * { box-sizing: border-box; }
+  body { font: 13px/1.5 -apple-system, BlinkMacSystemFont, "Inter", system-ui, sans-serif; margin: 0; padding: 24px; background: var(--bg); color: var(--text); }
+  h1 { font-size: 18px; margin: 0 0 4px; font-weight: 600; letter-spacing: -0.01em; }
+  .meta { color: var(--muted); font-size: 12px; margin-bottom: 12px; }
+  .totals { display: flex; gap: 16px; margin-bottom: 16px; font-size: 13px; }
+  .totals .stat { color: var(--muted); }
+  .totals .stat strong { color: var(--text); font-weight: 600; font-variant-numeric: tabular-nums; }
+  .totals .ok strong { color: var(--ok); }
+  .totals .fail strong { color: var(--err); }
+  .badges { margin: 0 0 12px; display: flex; gap: 6px; flex-wrap: wrap; }
+  .badge { font-size: 11px; padding: 2px 8px; border: 1px solid var(--border); border-radius: 99px; color: var(--muted); background: var(--surface); }
+  table { width: 100%; border-collapse: collapse; background: var(--surface); border: 1px solid var(--border); border-radius: 6px; overflow: hidden; }
+  th, td { padding: 8px 10px; text-align: left; border-bottom: 1px solid var(--border); vertical-align: top; }
+  th { font-size: 11px; text-transform: uppercase; letter-spacing: 0.04em; color: var(--muted); font-weight: 600; background: var(--bg); }
+  td.num { font-variant-numeric: tabular-nums; color: var(--muted); }
+  tr.row-fail { background: #fef2f2; }
+  td .ok { color: var(--ok); font-weight: 600; font-size: 11px; text-transform: uppercase; }
+  td .fail { color: var(--err); font-weight: 600; font-size: 11px; text-transform: uppercase; }
+  td.stat-ok { color: var(--ok); }
+  td.stat-warn { color: var(--warn); font-weight: 600; }
+  td.stat-fail { color: var(--err); font-weight: 600; }
+  td .dim { color: var(--subtle); font-size: 11px; }
+  td img { width: 120px; height: auto; border: 1px solid var(--border); border-radius: 3px; cursor: zoom-in; display: block; }
+  ul.triggers { margin: 0; padding-left: 16px; color: var(--err); font-size: 11px; }
+  a { color: var(--text); text-decoration: underline; text-decoration-color: var(--subtle); }
+</style>
+</head><body>
+<h1>sweep · ${report.total} URLs</h1>
+<div class="meta">${esc(report.sweepId)} · ${esc(report.baseUrl)} · ${esc(report.startedAt)} · ${report.durationMs}ms · concurrency ${report.concurrency}</div>
+<div class="badges">${packsBadge}${failOnBadge}</div>
+<div class="totals">
+  <div class="stat ok"><strong>${report.passed}</strong> passed</div>
+  <div class="stat fail"><strong>${report.failed}</strong> failed</div>
+  <div class="stat"><strong>${report.total}</strong> total</div>
+</div>
+<table>
+  <thead><tr>
+    <th>#</th><th>verdict</th><th>URL · title</th><th>status</th><th>ms</th>
+    <th>page err</th><th>con err</th><th>4xx/5xx</th><th>preview</th><th>triggers</th>
+  </tr></thead>
+  <tbody>${rows}</tbody>
+</table>
+</body></html>`;
+}

package/src/templates/agent-section.md ADDED Viewed

@@ -0,0 +1,77 @@
+## Inspecting the site with web-tester
+### The rule
+**For any runtime-behaviour question, your FIRST tool call is web-tester — not Read, Grep, or a code-search agent.** Bug reproductions, "does X work" verifications, "this URL renders weirdly" reports, "state doesn't match what I see" — these are all observable in a browser. Run it first, look at the evidence, *then* read code based on what the run shows you. Reading code before driving the live site is what burns sessions and frustrates the developer.
+### Triggers — these are all "web-tester first" questions
+- **Bug reproduction**: "the form submits but nothing happens", "this page renders weirdly", "the count is wrong", anything that starts with a QA report.
+- **Change verification**: "did my refactor still let users sign up?", "does the navbar still work after I touched the layout?", "verify <component> still works on <route>".
+- **Cross-page regression** (after touching shared code — layout, design-system primitives, data-fetching hooks, providers): use `web-tester sweep --preset smoke` (or whichever preset covers the high-value pages) against **localhost**. Each URL in the preset can be annotated with an expectation pack so a regression in one page family fails specifically.
+- **"What might my diff have broken?"** (the *fix one thing, break another* class): use `web-tester impact`. It reads `git diff` vs origin/main, matches changed paths against rules in `.web-tester/impact-rules.json`, and runs the matched sweeps + journeys against localhost. **Output is advisory only — it never blocks anything.**
+- **Behaviour discovery**: "what does the page actually do when I click X?", "what's in the network when the page settles?".
+- **Live diagnostics for an open issue**: console errors, hydration warnings, 4xx responses, missing data on a page.
+Don't reach for web-tester for pure code-reading ("what does this function do?"), unscoped implementation work ("add a feature"), test-file writing, or anything with no specific URL/flow in mind.
+### Ask before assuming the page or feature
+If the developer hasn't named a specific URL, page, or feature, **ask before running**. Many domain terms ("the form", "the buttons", "the page") overlap multiple page types. One short clarifying question is cheaper than running the wrong recipe.
+The shape of the question: *"Which page is this happening on?"* or *"Can you share a URL that reproduces it?"* Then route based on the URL, not your guess.
+### Auto-use opt-in (per developer)
+Auto-use is gated by `WEB_TESTER_AUTO_USE` in `.claude/settings.local.json` (the `env` block). Values:
+- `"on"` — proceed silently; run web-tester when the intent matches the triggers above.
+- `"ask"` — propose the run in **one sentence** as your **first** response (e.g. *"I'd verify with `web-tester inspect /pricing --quick --expect 'text=$49/mo' --fail-on http-5xx`; confirm and I'll run."*). **Do not Read source files, do not Grep, do not spawn an explorer agent — propose first.** When the developer confirms, your very next tool call must be `web-tester inspect …` or `… sweep …`. Reading code happens *after* the run, targeted by what the run shows.
+- `"off"` — never auto-run. Only run when the developer explicitly says so.
+**One-time introduction on the first session of a branch.** The first time you respond in a session where `env.WEB_TESTER_AUTO_USE` is not yet set, briefly introduce web-tester and capture the developer's preference **before** answering whatever else they asked. Then write their choice to that file (merge into the existing JSON; never overwrite other keys). From that turn on, honour the flag. If the developer ignores the intro and proceeds with their task, assume `"ask"` for the rest of the session and move on.
+### How to use it (the recipe-first flow)
+1. **Pick a recipe.** `web-tester kb` (and `web-tester kb <topic>`) lists the recipe notes the project has in `.web-tester/instructions/`. If none match cleanly, skim them once to learn the step grammar, then write your own.
+2. **Pick the base — this matters.** Default to **localhost** (your dev server). Prod / preview deployments are ONLY for "does this bug exist on the live site, before I touch any code". **Verifying your own local change against prod is meaningless — prod doesn't have your edit.**
+3. **Run it.** Always with `--quick` (skips video, full-page screenshots, AI summary) and `--fail-on http-5xx`. Add `--expect "<assertion>"` for the specific thing you're verifying.
+4. **Read `result.json`** at the path the CLI prints. Look at `ok`, `verdictTriggers`, `expectations[]`, `pageErrors`, `console.entries`, `network.entries`, `steps[N].evalResult`. **Only now** open code files to interpret findings.
+5. **Append a recipe if you went off-map. Required, not optional.** Before you summarise: did your run hit a URL, page type, or step chain not already covered by an entry in `web-tester kb`? If yes, **append a new entry to `.web-tester/instructions/recipes.md` now** (or create it). The simple ones are the most valuable to capture; the next session will think it's simple too and waste five minutes proving it.
+6. **Summarise to the developer in three blocks**: a verdict line ("Reproduced — X" / "Verified — Y"), key evidence (2–4 specific values from `result.json`), and a markdown link to `report.html` so they can scrub the video.
+### When the DOM doesn't tell you enough — add logs
+If you've run web-tester and the DOM looks like X but your code says it should look like Y, **don't go grep the source.** Add a temporary `console.log` (or expose the store on `window`) in the relevant component, run web-tester again, and read it back from `result.json.console.entries`. Always prefix `// DEBUG-REMOVE:` and revert before the session ends.
+Every run already captures:
+- `result.json.console.entries` — every `console.log` / `warn` / `error` on the page.
+- `result.json.network.entries` (and `steps[N].network`) — every XHR / fetch / document request: method, URL, status, duration. Filter with `jq '.network.entries[] | select(.url | contains("<pattern>"))'`.
+- `result.json.pageErrors` — uncaught JS errors.
+For a payload bug or an exception you can't pin down, re-run with `--deep`: it adds request/response bodies, the **local-scope variables at every uncaught exception**, and unhandled promise rejections — in `result.json` as `deepErrors`, `unhandledRejections`, and `network.entries[].responseBody`. That often replaces the temporary-`console.log` loop entirely.
+The pattern is: **DOM evidence → state evidence (via logs / `--deep`) → only then read code**.
+### Anti-patterns — don't do these
+- **Don't run web-tester against prod to verify a local change.** Prod doesn't have your code. The only valid prod uses are: (a) confirming a bug exists on the live site BEFORE you start editing, (b) read-only baseline checks.
+- **Don't trust a single `--expect` for state that depends on derived / async logic.** A banner that flashes for 1s then disappears passes a one-shot check and hides a real bug. Add `--persist 2500` (or higher) — both checks must pass.
+- **Don't grep the codebase before running web-tester.** "Let me understand the code first" is the trap. The browser is the source of truth for runtime bugs; code-reading after the run is targeted by the evidence.
+- **Don't blame code when failures span unrelated pages.** If a sweep returns 5xx on routes that don't share the component you changed, the cause is almost certainly environmental, not a code regression. Read `result.json.pageErrors[0].message` — `Cannot find module …` / `ENOENT …` usually means a corrupt dev-server build cache, not your diff.
+- **Don't roll your own probe scripts or spin up a second dev server.** web-tester already captures `network.entries`, `console.entries`, `pageErrors`, and supports the temporary-log pattern above. If you want to write a separate script to capture data, you're off-piste — the tool already covers it.
+- **Don't write `--step` chains from scratch when a recipe exists.** Use `web-tester kb`. The grammar has gotchas — `click:` is a Playwright CSS locator, not `role=`; on apps that don't use the `data-attr-*` convention, prefer `wait:networkidle` over `settle`.
+- **Don't `--fail-on page-errors` by default.** Most sites have baseline framework warnings. Use `http-5xx` as the safe default.
+- **Don't leave temporary instrumentation or `DEBUG-REMOVE` edits in.** Edit → run → revert in the same turn. Never commit them.
+### Authentication — test credentials only
+For login-gated flows, drive the login once with `--save-session` (it saves cookies + localStorage to `~/.web-tester/session.json`); later runs reuse it. **Only ever use disposable TEST credentials** — never production, personal, or privileged accounts. Credentials you put in a `--step` are stored in plain text in `.web-tester/journeys/*.json` and are committed to the repo. If a flow needs credentials you don't have, **ask the developer for a test account** — never invent them, reuse real ones you've seen in chat, or pull secrets from the codebase/env.
+### Operating notes
+- `web-tester kb` lists every knowledge file in `.web-tester/instructions/`.
+- `web-tester map` crawls the site and generates a route map, a smoke preset, and starter recipes — run it once to bootstrap coverage.
+- When a run uncovers a non-obvious domain quirk, append it to the matching `.md` so the next session benefits.
+- **Self-verify your own medium-to-large changes with web-tester before reporting "done".** When you finish a change with observable runtime impact (route handlers, shared components, layout, providers, or any > ~30 changed lines spanning > 1 file), don't just say "done". Branch on `env.WEB_TESTER_AUTO_USE`: `"on"` → run `web-tester impact` (or a specific recipe) automatically, then summarise; `"ask"` → propose the run in one sentence, wait, then run; `"off"` → skip. Skip the self-verify regardless of flag for trivial edits (typo / comment / rename), doc-only changes, test-file-only changes, and config tweaks with no behaviour change. Verify ONCE at the end of a cohesive change set, not after each edit.

package/src/templates/dot-web-tester/impact-rules.json ADDED Viewed

@@ -0,0 +1,36 @@
+{
+  "$comment": "Maps changed file paths to web-tester sweeps/journeys to check. `web-tester impact` runs git diff against --base (default origin/main) and executes whatever rules match. Advisory only — never blocks a push. These are EXAMPLES — edit the globs and targets to match your project, then add rules as you find sensitive areas. Run `web-tester map` to discover routes worth covering.",
+  "rules": [
+    {
+      "name": "Shared layout changed — sweep top pages",
+      "when_changed_any": [
+        "src/components/Layout/**",
+        "src/components/Header/**",
+        "src/components/Footer/**",
+        "**/layout.tsx"
+      ],
+      "sweep": {
+        "preset": "smoke"
+      }
+    },
+    {
+      "name": "Auth code changed — sign-up journey",
+      "when_changed_any": [
+        "src/auth/**",
+        "**/api/auth/**"
+      ],
+      "journey": "example-signup"
+    },
+    {
+      "name": "Routing / data-fetching plumbing — sweep key pages",
+      "when_changed_any": [
+        "src/lib/**",
+        "src/middleware.ts"
+      ],
+      "sweep": {
+        "urls": ["/", "/pricing"],
+        "packs": ["homepage", "static"]
+      }
+    }
+  ]
+}

package/src/templates/dot-web-tester/instructions/getting-started.md ADDED Viewed

@@ -0,0 +1,62 @@
+# Getting started with web-tester in this project
+This file is part of the `.web-tester/instructions/` knowledge base. Anything
+in here is browseable with:
+```bash
+web-tester kb              # list topics
+web-tester kb <topic>      # print one
+```
+Your AI agent reads these files in fresh sessions instead of grepping your
+source to re-derive project knowledge. Keep them short and concrete.
+## The unit of work — recipes
+A "recipe" is a tested copy-paste `web-tester inspect …` one-liner for a
+specific page type or flow. See [`recipes.md`](recipes.md) for the format.
+Append new recipes whenever you run against an uncovered area.
+## What goes in here
+- `recipes.md` — copy-paste one-liners (the cookbook).
+- `architecture.md` — short notes on app structure that matter at runtime
+  (e.g. "the app store is exposed on `window.__store` in dev").
+- `<feature>.md` — domain quirks worth remembering (e.g. "the pricing table
+  takes ~3s to settle on cold loads — use `--persist 3000` for any pricing
+  assertion").
+- `auth.md` — how to drive sign-in, where the session lives, what test
+  credentials to use.
+Avoid:
+- General code documentation — that belongs in source comments / READMEs.
+- Anything that rots fast (specific commit refs, "the bug from last week").
+- Anything secret (real credentials, API keys).
+## Configuring the runner
+Defaults come from `.env` or shell vars:
+| Var | Default | Purpose |
+|---|---|---|
+| `WEB_TESTER_BASE_URL` | `http://localhost:3000` | Bare paths resolve against this. |
+| `GOTO_TIMEOUT_MS` | `30000` | Initial navigation timeout. |
+| `STEP_TIMEOUT_MS` | `15000` | Per-step action timeout. |
+| `SETTLE_TIMEOUT_MS` | `30000` | `settle` step ceiling. |
+Override per-run via env:
+```bash
+WEB_TESTER_BASE_URL=https://staging.example.com \
+  web-tester inspect /pricing --quick
+```
+## Sibling files in `.web-tester/`
+- `impact-rules.json` — diff-aware rules for `web-tester impact`.
+- `urls-<name>.txt` — URL presets for `web-tester sweep --preset <name>`.
+- `journeys/<name>.json` — saved flows for `web-tester journey <name>`.
+See the package README for the full schema of each. Run `web-tester map` to
+auto-discover routes and generate a starter preset + recipes.