web-tester-for-claude 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,394 @@
1
+ import { resolve } from "node:path";
2
+ import { chromium, type Browser, type BrowserContext } from "playwright";
3
+ import { readUiAttributes } from "../browser/attrs";
4
+ import {
5
+ configureContext,
6
+ DEFAULT_SESSION_UA,
7
+ DEFAULT_SESSION_VIEWPORT
8
+ } from "../browser/session";
9
+ import { attachCapture } from "../inspector/capture";
10
+ import { log } from "../util/log";
11
+ import { SESSION_STATE_PATH } from "../util/paths";
12
+ import { existsSync } from "node:fs";
13
+ import { routeTemplate } from "./classify";
14
+
15
+ /** A form discovered on a page, with enough detail to draft a journey. */
16
+ export type PageForm = {
17
+ action: string | null;
18
+ method: string;
19
+ fields: { name: string; type: string; tag: string; required: boolean }[];
20
+ submitText: string | null;
21
+ };
22
+
23
+ /** Everything the crawler observes about one page. */
24
+ export type PageFacts = {
25
+ /** Absolute URL requested. */
26
+ url: string;
27
+ /** Path requested (pathname + search). */
28
+ path: string;
29
+ /** Path after any redirects. */
30
+ finalPath: string;
31
+ status: number | null;
32
+ title: string;
33
+ ok: boolean;
34
+ depth: number;
35
+ hasHeader: boolean;
36
+ hasFooter: boolean;
37
+ hasMain: boolean;
38
+ hasNav: boolean;
39
+ h1: string | null;
40
+ headingCount: number;
41
+ forms: PageForm[];
42
+ /** Same-origin paths linked from this page. */
43
+ internalLinks: string[];
44
+ imageCount: number;
45
+ /** `main a[href^='/']:has(img)` — catalog-card shape. */
46
+ cardLinkCount: number;
47
+ passwordFields: number;
48
+ searchInputs: number;
49
+ consoleErrors: number;
50
+ pageErrors: number;
51
+ attrCount: number;
52
+ /** Relative path to a viewport screenshot under the map dir, if captured. */
53
+ screenshot?: string;
54
+ error?: string;
55
+ };
56
+
57
+ export type CrawlOptions = {
58
+ baseUrl: string;
59
+ /** Seed paths to start from (the base path + any sitemap entries). */
60
+ seeds: string[];
61
+ mapDir: string;
62
+ limit: number;
63
+ depth: number;
64
+ concurrency: number;
65
+ perTemplate: number;
66
+ captureScreenshots: boolean;
67
+ loadStorageState: boolean;
68
+ gotoTimeoutMs: number;
69
+ filter?: RegExp;
70
+ exclude?: RegExp;
71
+ };
72
+
73
+ type DomFacts = Omit<
74
+ PageFacts,
75
+ | "url"
76
+ | "path"
77
+ | "finalPath"
78
+ | "status"
79
+ | "depth"
80
+ | "ok"
81
+ | "consoleErrors"
82
+ | "pageErrors"
83
+ | "attrCount"
84
+ | "screenshot"
85
+ | "error"
86
+ >;
87
+
88
+ /**
89
+ * Run inside the page to collect structural facts in one round-trip.
90
+ *
91
+ * Everything in here is serialised to the browser by page.evaluate, so it must
92
+ * use inline callback arguments only — no `const fn = () => …` and no nested
93
+ * `function` declarations. tsx's esbuild "keep names" transform wraps named
94
+ * inner functions in a `__name(…)` helper that doesn't exist in the page; one
95
+ * slips in and the whole evaluate throws `ReferenceError: __name is not
96
+ * defined`, which the caller swallows — leaving every fact blank.
97
+ */
98
+ function collectDomFacts(): DomFacts {
99
+ const forms = Array.from(document.querySelectorAll("form")).map((form) => {
100
+ const fields = Array.from(
101
+ form.querySelectorAll("input, select, textarea")
102
+ )
103
+ .map((el) => {
104
+ const tag = el.tagName.toLowerCase();
105
+ const type =
106
+ tag === "input"
107
+ ? (el.getAttribute("type") ?? "text").toLowerCase()
108
+ : tag;
109
+ return {
110
+ name:
111
+ el.getAttribute("name") ?? el.getAttribute("id") ?? "",
112
+ type,
113
+ tag,
114
+ required: el.hasAttribute("required")
115
+ };
116
+ })
117
+ .filter((f) => f.type !== "hidden");
118
+ const submit = form.querySelector(
119
+ "button[type=submit], input[type=submit], button:not([type])"
120
+ );
121
+ return {
122
+ action: form.getAttribute("action"),
123
+ method: (form.getAttribute("method") ?? "get").toLowerCase(),
124
+ fields,
125
+ submitText: submit
126
+ ? (submit.textContent ?? "").replace(/\s+/g, " ").trim() ||
127
+ submit.getAttribute("value")
128
+ : null
129
+ };
130
+ });
131
+
132
+ // `.href` (not getAttribute) gives the browser-resolved absolute URL against
133
+ // the live document base — so relative links resolve correctly even after a
134
+ // redirect, where the requested URL and final URL differ.
135
+ const anchors = Array.from(document.querySelectorAll("a[href]"))
136
+ .map((a) => (a as HTMLAnchorElement).href)
137
+ .filter(Boolean);
138
+
139
+ return {
140
+ title: document.title ?? "",
141
+ hasHeader: !!document.querySelector("header"),
142
+ hasFooter: !!document.querySelector("footer"),
143
+ hasMain: !!document.querySelector("main"),
144
+ hasNav: !!document.querySelector("nav"),
145
+ h1:
146
+ (document.querySelector("h1")?.textContent ?? "")
147
+ .replace(/\s+/g, " ")
148
+ .trim() || null,
149
+ headingCount: document.querySelectorAll("h1, h2, h3").length,
150
+ forms,
151
+ internalLinks: anchors,
152
+ imageCount: document.querySelectorAll("img").length,
153
+ cardLinkCount: document.querySelectorAll("main a[href^='/']:has(img)")
154
+ .length,
155
+ passwordFields: document.querySelectorAll("input[type=password]").length,
156
+ searchInputs: document.querySelectorAll(
157
+ "input[type=search], input[name=q], input[name=query], input[name=s]"
158
+ ).length
159
+ };
160
+ }
161
+
162
+ /** Normalise an href to a same-origin path (pathname + search), or null. */
163
+ function toInternalPath(href: string, pageUrl: string, origin: string): string | null {
164
+ const trimmed = href.trim();
165
+ if (
166
+ !trimmed ||
167
+ trimmed.startsWith("#") ||
168
+ /^(mailto:|tel:|javascript:|data:)/i.test(trimmed)
169
+ )
170
+ return null;
171
+ let abs: URL;
172
+ try {
173
+ abs = new URL(trimmed, pageUrl);
174
+ } catch {
175
+ return null;
176
+ }
177
+ if (abs.origin !== origin) return null;
178
+ if (!/^https?:$/.test(abs.protocol)) return null;
179
+ // Drop the hash — fragment-only differences are the same document.
180
+ return abs.pathname + abs.search;
181
+ }
182
+
183
+ function slug(path: string, index: number): string {
184
+ const cleaned = path
185
+ .replace(/[^a-z0-9]+/gi, "-")
186
+ .replace(/^-+|-+$/g, "")
187
+ .toLowerCase()
188
+ .slice(0, 50);
189
+ return `${String(index).padStart(3, "0")}-${cleaned || "root"}`;
190
+ }
191
+
192
+ /**
193
+ * Breadth-first crawl from the seed paths, same-origin only. Stops at
194
+ * `limit` fetched pages or `depth` link hops, and fetches at most
195
+ * `perTemplate` pages per dynamic-route template so a catalog of thousands
196
+ * of detail pages doesn't dominate the crawl.
197
+ */
198
+ export async function crawlSite(opts: CrawlOptions): Promise<PageFacts[]> {
199
+ const origin = new URL(opts.baseUrl).origin;
200
+ const browser: Browser = await chromium.launch({ headless: true });
201
+ const useStorageState =
202
+ opts.loadStorageState && existsSync(SESSION_STATE_PATH);
203
+ if (useStorageState) log.dim(" · loaded session from ~/.web-tester/session.json");
204
+
205
+ const results: PageFacts[] = [];
206
+ const queued = new Set<string>();
207
+ const templateCounts = new Map<string, number>();
208
+ type Job = { path: string; depth: number };
209
+ const frontier: Job[] = [];
210
+
211
+ const wanted = (path: string): boolean => {
212
+ if (opts.filter && !opts.filter.test(path)) return false;
213
+ if (opts.exclude && opts.exclude.test(path)) return false;
214
+ return true;
215
+ };
216
+
217
+ for (const seed of opts.seeds) {
218
+ if (!queued.has(seed) && wanted(seed)) {
219
+ queued.add(seed);
220
+ frontier.push({ path: seed, depth: 0 });
221
+ }
222
+ }
223
+
224
+ let fetched = 0;
225
+ let active = 0;
226
+
227
+ const basePath = (() => {
228
+ try {
229
+ const u = new URL(opts.baseUrl);
230
+ return (u.pathname.replace(/\/+$/, "") || "/") + u.search;
231
+ } catch {
232
+ return "/";
233
+ }
234
+ })();
235
+
236
+ const claimTemplateSlot = (path: string): boolean => {
237
+ // The base path always gets crawled. Everything else — including sitemap
238
+ // seeds — is capped per route template so a large catalog can't crowd
239
+ // out the rest of the site.
240
+ if (path === basePath || path === "/") return true;
241
+ const tpl = routeTemplate(path);
242
+ const count = templateCounts.get(tpl) ?? 0;
243
+ if (count >= opts.perTemplate) return false;
244
+ templateCounts.set(tpl, count + 1);
245
+ return true;
246
+ };
247
+
248
+ const fetchOne = async (
249
+ context: BrowserContext,
250
+ job: Job
251
+ ): Promise<void> => {
252
+ const index = ++fetched;
253
+ const page = await context.newPage();
254
+ const buffers = attachCapture(context, page, {
255
+ allNetwork: false,
256
+ allConsole: false
257
+ });
258
+ const url = job.path.startsWith("http")
259
+ ? job.path
260
+ : new URL(job.path, opts.baseUrl).toString();
261
+ let facts: PageFacts = {
262
+ url,
263
+ path: job.path,
264
+ finalPath: job.path,
265
+ status: null,
266
+ title: "",
267
+ ok: false,
268
+ depth: job.depth,
269
+ hasHeader: false,
270
+ hasFooter: false,
271
+ hasMain: false,
272
+ hasNav: false,
273
+ h1: null,
274
+ headingCount: 0,
275
+ forms: [],
276
+ internalLinks: [],
277
+ imageCount: 0,
278
+ cardLinkCount: 0,
279
+ passwordFields: 0,
280
+ searchInputs: 0,
281
+ consoleErrors: 0,
282
+ pageErrors: 0,
283
+ attrCount: 0
284
+ };
285
+ try {
286
+ const response = await page
287
+ .goto(url, { waitUntil: "domcontentloaded", timeout: opts.gotoTimeoutMs })
288
+ .catch((err) => {
289
+ facts.error = err instanceof Error ? err.message : String(err);
290
+ return null;
291
+ });
292
+ facts.status = response?.status() ?? null;
293
+ facts.finalPath = (() => {
294
+ try {
295
+ const u = new URL(page.url());
296
+ return u.pathname + u.search;
297
+ } catch {
298
+ return job.path;
299
+ }
300
+ })();
301
+ await page.waitForLoadState("load", { timeout: 5_000 }).catch(() => {});
302
+ const dom = await page.evaluate(collectDomFacts).catch(() => null);
303
+ if (dom) facts = { ...facts, ...dom };
304
+ const attrs = await readUiAttributes(page).catch(() => []);
305
+ facts.attrCount = attrs.length;
306
+ if (opts.captureScreenshots) {
307
+ const rel = `${slug(facts.finalPath, index)}.png`;
308
+ await page
309
+ .screenshot({ path: resolve(opts.mapDir, rel), fullPage: false })
310
+ .then(() => {
311
+ facts.screenshot = rel;
312
+ })
313
+ .catch(() => {});
314
+ }
315
+ } finally {
316
+ facts.consoleErrors = buffers.consoleEntries.filter(
317
+ (e) => e.type === "error"
318
+ ).length;
319
+ facts.pageErrors = buffers.pageErrors.length;
320
+ await page.close().catch(() => {});
321
+ }
322
+
323
+ facts.ok =
324
+ facts.error === undefined &&
325
+ facts.status !== null &&
326
+ facts.status < 400;
327
+
328
+ // Resolve and enqueue newly discovered links.
329
+ const discovered = new Set<string>();
330
+ for (const href of facts.internalLinks) {
331
+ const path = toInternalPath(href, facts.url, origin);
332
+ if (path) discovered.add(path);
333
+ }
334
+ facts.internalLinks = Array.from(discovered);
335
+ if (job.depth < opts.depth) {
336
+ for (const path of facts.internalLinks) {
337
+ if (queued.has(path) || !wanted(path)) continue;
338
+ queued.add(path);
339
+ frontier.push({ path, depth: job.depth + 1 });
340
+ }
341
+ }
342
+
343
+ results.push(facts);
344
+ const tag = facts.ok ? "✓" : "✗";
345
+ const colour = facts.ok ? log.dim : log.fail;
346
+ colour(
347
+ ` ${tag} [${results.length}] ${facts.path} → ${facts.status ?? "?"}${
348
+ facts.error ? ` (${facts.error.split("\n")[0]})` : ""
349
+ }`
350
+ );
351
+ };
352
+
353
+ // Pull from the shared frontier with a fixed worker count. Each worker
354
+ // keeps its own context. The frontier grows as pages reveal links, so the
355
+ // loop continues until it drains or the fetch limit is hit.
356
+ const worker = async (): Promise<void> => {
357
+ const context = await browser.newContext({
358
+ viewport: DEFAULT_SESSION_VIEWPORT,
359
+ userAgent: DEFAULT_SESSION_UA,
360
+ ...(useStorageState ? { storageState: SESSION_STATE_PATH } : {})
361
+ });
362
+ await configureContext(context, opts.baseUrl);
363
+ try {
364
+ for (;;) {
365
+ const job = frontier.shift();
366
+ if (!job) {
367
+ if (active === 0) break;
368
+ await new Promise((r) => setTimeout(r, 25));
369
+ continue;
370
+ }
371
+ if (fetched >= opts.limit) break;
372
+ if (!claimTemplateSlot(job.path)) continue;
373
+ active++;
374
+ try {
375
+ await fetchOne(context, job);
376
+ } finally {
377
+ active--;
378
+ }
379
+ }
380
+ } finally {
381
+ await context.close().catch(() => {});
382
+ }
383
+ };
384
+
385
+ try {
386
+ await Promise.all(
387
+ Array.from({ length: Math.max(1, opts.concurrency) }, () => worker())
388
+ );
389
+ } finally {
390
+ await browser.close().catch(() => {});
391
+ }
392
+
393
+ return results;
394
+ }
@@ -0,0 +1,253 @@
1
+ import type { ClassifiedPage, RouteGroup } from "./classify";
2
+ import type { PageForm } from "./crawl";
3
+
4
+ /**
5
+ * Pick the strongest built-in expectation pack the page actually satisfied
6
+ * during the crawl, so the generated preset passes on a healthy baseline and
7
+ * only fails when something regresses. Returns null when nothing fits.
8
+ */
9
+ export function packForFacts(page: ClassifiedPage): string | null {
10
+ if (page.type === "home" && page.hasHeader && page.hasFooter)
11
+ return "homepage";
12
+ if (page.cardLinkCount >= 1) return "category";
13
+ if (page.hasMain) return "has-main";
14
+ if (page.h1) return "has-h1";
15
+ if (page.hasHeader && page.hasFooter) return "static";
16
+ return null;
17
+ }
18
+
19
+ /** Build the `urls-map.txt` preset body from the healthy route groups. */
20
+ export function buildPreset(groups: RouteGroup[], baseUrl: string): string {
21
+ const lines: string[] = [
22
+ "# urls-map.txt — generated by `web-tester map`.",
23
+ "#",
24
+ `# Source: ${baseUrl}`,
25
+ "# One representative path per route template, annotated with the",
26
+ "# strongest expectation pack the page satisfied when crawled. Review and",
27
+ "# prune, then run: web-tester sweep --preset map --fail-on http-5xx",
28
+ ""
29
+ ];
30
+ for (const g of groups) {
31
+ if (g.type === "error" || !g.representative.ok) continue;
32
+ const pack = packForFacts(g.representative);
33
+ const path = g.representative.finalPath || g.representative.path;
34
+ const annotation = pack ? ` #pack=${pack}` : "";
35
+ const note =
36
+ g.count > 1 ? ` # ${g.count} pages share ${g.template}` : "";
37
+ lines.push(`${path}${annotation}${note}`);
38
+ }
39
+ lines.push("");
40
+ return lines.join("\n");
41
+ }
42
+
43
+ function kebab(s: string): string {
44
+ return s
45
+ .replace(/[^a-z0-9]+/gi, "-")
46
+ .replace(/^-+|-+$/g, "")
47
+ .toLowerCase();
48
+ }
49
+
50
+ /** A draft value for a form field, by input type. */
51
+ function sampleValue(type: string): string | null {
52
+ switch (type) {
53
+ case "email":
54
+ return "test@example.com";
55
+ case "password":
56
+ return "Password123!";
57
+ case "tel":
58
+ return "5551234567";
59
+ case "number":
60
+ return "1";
61
+ case "url":
62
+ return "https://example.com";
63
+ case "search":
64
+ case "text":
65
+ return "test";
66
+ case "textarea":
67
+ return "Hello from web-tester";
68
+ default:
69
+ return null; // select / checkbox / radio / date etc. — skip
70
+ }
71
+ }
72
+
73
+ /** Build fill/click steps for a form. Returns null if it can't be driven. */
74
+ function formSteps(form: PageForm): string[] | null {
75
+ const steps: string[] = ["wait:networkidle"];
76
+ let filled = 0;
77
+ for (const field of form.fields) {
78
+ if (!field.name) continue;
79
+ const value = sampleValue(field.tag === "textarea" ? "textarea" : field.type);
80
+ if (value === null) continue;
81
+ // Quote the attribute value so names with special characters
82
+ // (`user[email]`, `items[]`, dotted names) produce valid CSS.
83
+ const tag =
84
+ field.tag === "textarea"
85
+ ? "textarea"
86
+ : field.tag === "select"
87
+ ? "select"
88
+ : "input";
89
+ const selector = `${tag}[name="${field.name}"]`;
90
+ steps.push(`fill:${selector}=${value}`);
91
+ filled++;
92
+ }
93
+ if (filled === 0) return null;
94
+ steps.push("click:button[type=submit]");
95
+ return steps;
96
+ }
97
+
98
+ export type DraftJourney = {
99
+ name: string;
100
+ journey: {
101
+ description: string;
102
+ url: string;
103
+ steps: string[];
104
+ expectations: string[];
105
+ failOn: string;
106
+ };
107
+ };
108
+
109
+ /**
110
+ * Draft one journey per distinct form discovered. Forms are deduped by route
111
+ * template + field signature so a form repeated across many pages yields a
112
+ * single journey. These are starting points — values and expectations need a
113
+ * human pass before they mean anything.
114
+ */
115
+ export function buildJourneys(
116
+ pages: ClassifiedPage[],
117
+ limit: number
118
+ ): DraftJourney[] {
119
+ const seen = new Set<string>();
120
+ const usedNames = new Set<string>();
121
+ const drafts: DraftJourney[] = [];
122
+
123
+ for (const page of pages) {
124
+ if (!page.ok) continue;
125
+ for (const form of page.forms) {
126
+ const signature = `${page.template}::${form.fields
127
+ .map((f) => `${f.name}:${f.tag}:${f.type}`)
128
+ .sort()
129
+ .join(",")}`;
130
+ if (seen.has(signature)) continue;
131
+ const steps = formSteps(form);
132
+ if (!steps) continue;
133
+ seen.add(signature);
134
+
135
+ const path = page.finalPath || page.path;
136
+ let base =
137
+ page.type === "auth"
138
+ ? /sign-?up|register/i.test(path)
139
+ ? "signup"
140
+ : "login"
141
+ : kebab(path) || "form";
142
+ base = `${base}-form`.replace(/-form-form$/, "-form");
143
+ let name = base;
144
+ let n = 2;
145
+ while (usedNames.has(name)) name = `${base}-${n++}`;
146
+ usedNames.add(name);
147
+
148
+ drafts.push({
149
+ name,
150
+ journey: {
151
+ description: `Auto-generated by \`web-tester map\` from the form on ${path}. Review the selectors, values, and add expectations before relying on it.`,
152
+ url: path,
153
+ steps,
154
+ expectations: [],
155
+ failOn: "http-5xx"
156
+ }
157
+ });
158
+ if (drafts.length >= limit) return drafts;
159
+ }
160
+ }
161
+ return drafts;
162
+ }
163
+
164
+ const RECIPES_START = "<!-- web-tester:map:start -->";
165
+ const RECIPES_END = "<!-- web-tester:map:end -->";
166
+
167
+ const TYPE_LABEL: Record<string, string> = {
168
+ home: "Homepage",
169
+ list: "List / index page",
170
+ detail: "Detail page",
171
+ form: "Form page",
172
+ auth: "Auth page",
173
+ search: "Search page",
174
+ content: "Content page"
175
+ };
176
+
177
+ /** Build the marker-fenced "Generated by map" recipe section. */
178
+ export function buildRecipesSection(groups: RouteGroup[]): string {
179
+ const lines: string[] = [
180
+ RECIPES_START,
181
+ "",
182
+ "## Generated by `web-tester map`",
183
+ "",
184
+ "One smoke recipe per route template discovered. Re-running `web-tester map`",
185
+ "refreshes this section. Promote the useful ones into the project-specific",
186
+ "section above (they won't be overwritten there).",
187
+ "",
188
+ "### Sweep every discovered route",
189
+ "",
190
+ "```bash",
191
+ "web-tester sweep --preset map --fail-on http-5xx",
192
+ "```",
193
+ ""
194
+ ];
195
+
196
+ for (const g of groups) {
197
+ if (g.type === "error" || !g.representative.ok) continue;
198
+ const path = g.representative.finalPath || g.representative.path;
199
+ const pack = packForFacts(g.representative);
200
+ const expects: string[] = [];
201
+ if (pack === "homepage" || pack === "static") {
202
+ expects.push("--expect \"selector=header\"", "--expect \"selector=footer\"");
203
+ } else if (pack === "category") {
204
+ expects.push(
205
+ "--expect \"selector=main\"",
206
+ "--expect \"selector=main a[href^='/']:has(img)\""
207
+ );
208
+ } else if (pack === "has-main") {
209
+ expects.push("--expect \"selector=main\"");
210
+ } else if (pack === "has-h1") {
211
+ expects.push("--expect \"selector=h1\"");
212
+ }
213
+ const label = TYPE_LABEL[g.type] ?? "Page";
214
+ const cmd = [
215
+ `web-tester inspect "${path}" \\`,
216
+ " --step wait:networkidle --quick \\",
217
+ ...expects.map((e) => ` ${e} \\`),
218
+ " --fail-on http-5xx"
219
+ ].join("\n");
220
+ lines.push(
221
+ `### ${label} — \`${g.template}\``,
222
+ "",
223
+ `**When:** verifying \`${g.template}\` still renders.`,
224
+ "",
225
+ "```bash",
226
+ cmd,
227
+ "```",
228
+ ""
229
+ );
230
+ }
231
+
232
+ lines.push(RECIPES_END);
233
+ return lines.join("\n");
234
+ }
235
+
236
+ /**
237
+ * Merge the generated recipe section into an existing recipes.md body,
238
+ * replacing a previous generated block if present. Returns the full file.
239
+ */
240
+ export function mergeRecipes(existing: string | null, section: string): string {
241
+ if (!existing) {
242
+ return `# Recipes\n\nCopy-paste one-liners for common web-tester runs.\n\n${section}\n`;
243
+ }
244
+ const start = existing.indexOf(RECIPES_START);
245
+ const end = existing.indexOf(RECIPES_END);
246
+ if (start !== -1 && end !== -1 && end > start) {
247
+ const before = existing.slice(0, start);
248
+ const after = existing.slice(end + RECIPES_END.length);
249
+ return `${before}${section}${after}`;
250
+ }
251
+ const sep = existing.endsWith("\n") ? "\n" : "\n\n";
252
+ return `${existing}${sep}${section}\n`;
253
+ }