web-tester-for-claude 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,544 @@
1
+ import { writeFileSync } from "node:fs";
2
+ import { resolve } from "node:path";
3
+ import type { Page } from "playwright";
4
+ import { readUiAttributes, type UiAttribute } from "../browser/attrs";
5
+ import { openSession } from "../browser/session";
6
+ import { log } from "../util/log";
7
+ import { ensureRunPaths, newRunId, type RunPaths } from "../util/paths";
8
+ import {
9
+ attachCapture,
10
+ type CaptureBuffers,
11
+ type ConsoleEntry,
12
+ flushBodies,
13
+ type NetworkEntry,
14
+ type PageErrorEntry,
15
+ sliceSince,
16
+ snapshotCursor
17
+ } from "./capture";
18
+ import { attachDeepCapture, type DeepError } from "./deep";
19
+ import { writeReport } from "./report";
20
+ import { executeStep, type Step } from "./steps";
21
+ import { summariseRun } from "./summarise";
22
+ import {
23
+ computeVerdict,
24
+ evaluateExpectations,
25
+ type Expectation,
26
+ type ExpectationResult,
27
+ type FailOnKind
28
+ } from "./verdict";
29
+
30
+ export type InspectOptions = {
31
+ baseUrl: string;
32
+ url: string;
33
+ steps: Step[];
34
+ headed: boolean;
35
+ captureHtml: boolean;
36
+ captureStorage: boolean;
37
+ captureAllNetwork: boolean;
38
+ captureAllConsole: boolean;
39
+ /** Record a webm of the session. Off in `--quick` mode. */
40
+ recordVideo: boolean;
41
+ /** Take a full-page screenshot in addition to viewport. Off in `--quick`. */
42
+ fullPageScreenshots: boolean;
43
+ /** Ask Sonnet for a short human summary. Opt-in (off by default). */
44
+ summary: boolean;
45
+ /** Final assertions evaluated after the last step. */
46
+ expectations: Expectation[];
47
+ /**
48
+ * If > 0, re-evaluate every assertion in `expectations` after a wait of
49
+ * `persistMs` and require BOTH the initial and the after-wait check to
50
+ * pass. Catches transient states — e.g. an alert that flashes for ~1s
51
+ * after load and then disappears would pass a single assertion but fails
52
+ * the persistence check. Default 0 (single check, no persistence).
53
+ */
54
+ persistMs: number;
55
+ /** Signals that flip the run verdict to fail when triggered. */
56
+ failOn: FailOnKind[];
57
+ /**
58
+ * Deep capture (`--deep`): record request/response bodies, and attach a CDP
59
+ * debugger that snapshots the local scope of any uncaught exception plus
60
+ * unhandled promise rejections. Off by default — it adds protocol overhead.
61
+ */
62
+ deep: boolean;
63
+ gotoTimeoutMs: number;
64
+ /**
65
+ * Load `~/.web-tester/session.json` into the context when it exists.
66
+ * Defaults to true; pass false (CLI `--no-session`) to force an anonymous
67
+ * context — e.g. to test the logged-out experience.
68
+ */
69
+ loadStorageState?: boolean;
70
+ /**
71
+ * Save the browser session (cookies + localStorage) to
72
+ * `~/.web-tester/session.json` after a clean run (CLI `--save-session`).
73
+ * Use to bootstrap auth: run the login flow once with this on.
74
+ */
75
+ saveSession?: boolean;
76
+ };
77
+
78
+ export type StepReport = {
79
+ index: number;
80
+ step: Step;
81
+ label: string;
82
+ ok: boolean;
83
+ error?: string;
84
+ durationMs: number;
85
+ url: string;
86
+ screenshot?: string;
87
+ evalResult?: unknown;
88
+ console: ConsoleEntry[];
89
+ network: NetworkEntry[];
90
+ pageErrors: PageErrorEntry[];
91
+ };
92
+
93
+ export type InspectResult = {
94
+ runId: string;
95
+ runDir: string;
96
+ startedAt: string;
97
+ durationMs: number;
98
+ baseUrl: string;
99
+ requestedUrl: string;
100
+ finalUrl: string;
101
+ title: string;
102
+ viewport: { width: number; height: number };
103
+ /** Relative path to the screen recording, if recorded. */
104
+ video?: string;
105
+ /** Sonnet-generated TL;DR rendered at the top of the HTML report. */
106
+ summary?: string;
107
+ ok: boolean;
108
+ failedSteps: number;
109
+ /** Human-readable reasons the verdict failed, empty when ok. */
110
+ verdictTriggers: string[];
111
+ /** Signals that were configured to flip the verdict, for reporting. */
112
+ failOn: FailOnKind[];
113
+ /** Expectations evaluated against the final page state. */
114
+ expectations: ExpectationResult[];
115
+ initial: {
116
+ screenshot: string;
117
+ screenshotFull?: string;
118
+ attrs: UiAttribute[];
119
+ html?: string;
120
+ storage?: StorageSnapshot;
121
+ };
122
+ final: {
123
+ screenshot: string;
124
+ screenshotFull?: string;
125
+ attrs: UiAttribute[];
126
+ html?: string;
127
+ storage?: StorageSnapshot;
128
+ };
129
+ console: { totals: Record<string, number>; entries: ConsoleEntry[] };
130
+ network: { count: number; failedCount: number; entries: NetworkEntry[] };
131
+ pageErrors: PageErrorEntry[];
132
+ /** Uncaught exceptions with local-scope dumps. Present only under `--deep`. */
133
+ deepErrors?: DeepError[];
134
+ /** Unhandled promise rejections (missed by `pageerror`). `--deep` only. */
135
+ unhandledRejections?: string[];
136
+ steps: StepReport[];
137
+ };
138
+
139
+ export type StorageSnapshot = {
140
+ localStorage: Record<string, string>;
141
+ sessionStorage: Record<string, string>;
142
+ cookies: { name: string; value: string; domain: string; path: string }[];
143
+ };
144
+
145
+ async function readStorage(page: Page): Promise<StorageSnapshot> {
146
+ const pageData = await page
147
+ .evaluate(() => {
148
+ const dump = (s: Storage): Record<string, string> => {
149
+ const out: Record<string, string> = {};
150
+ for (let i = 0; i < s.length; i++) {
151
+ const k = s.key(i);
152
+ if (k !== null) out[k] = s.getItem(k) ?? "";
153
+ }
154
+ return out;
155
+ };
156
+ return {
157
+ localStorage: dump(window.localStorage),
158
+ sessionStorage: dump(window.sessionStorage)
159
+ };
160
+ })
161
+ .catch(() => ({ localStorage: {}, sessionStorage: {} }));
162
+ const cookies = await page.context().cookies().catch(() => []);
163
+ return {
164
+ ...pageData,
165
+ cookies: cookies.map((c) => ({
166
+ name: c.name,
167
+ value: c.value,
168
+ domain: c.domain,
169
+ path: c.path
170
+ }))
171
+ };
172
+ }
173
+
174
+ async function captureSnapshot(
175
+ page: Page,
176
+ paths: RunPaths,
177
+ prefix: "initial" | "final",
178
+ captureHtml: boolean,
179
+ captureStorage: boolean,
180
+ fullPageScreenshots: boolean
181
+ ): Promise<{
182
+ screenshot: string;
183
+ screenshotFull?: string;
184
+ attrs: UiAttribute[];
185
+ html?: string;
186
+ storage?: StorageSnapshot;
187
+ }> {
188
+ const screenshot = `${prefix}.png`;
189
+ await page
190
+ .screenshot({ path: resolve(paths.runDir, screenshot), fullPage: false })
191
+ .catch(() => {});
192
+ let screenshotFull: string | undefined;
193
+ if (fullPageScreenshots) {
194
+ screenshotFull = `${prefix}-full.png`;
195
+ await page
196
+ .screenshot({
197
+ path: resolve(paths.runDir, screenshotFull),
198
+ fullPage: true
199
+ })
200
+ .catch(() => {});
201
+ }
202
+ const attrs = await readUiAttributes(page);
203
+ let html: string | undefined;
204
+ if (captureHtml) {
205
+ const content = await page.content().catch(() => "");
206
+ const htmlFile = `${prefix}.html`;
207
+ writeFileSync(resolve(paths.runDir, htmlFile), content);
208
+ html = htmlFile;
209
+ }
210
+ let storage: StorageSnapshot | undefined;
211
+ if (captureStorage) {
212
+ storage = await readStorage(page);
213
+ }
214
+ const result: {
215
+ screenshot: string;
216
+ screenshotFull?: string;
217
+ attrs: UiAttribute[];
218
+ html?: string;
219
+ storage?: StorageSnapshot;
220
+ } = { screenshot, attrs };
221
+ if (screenshotFull !== undefined) result.screenshotFull = screenshotFull;
222
+ if (html !== undefined) result.html = html;
223
+ if (storage !== undefined) result.storage = storage;
224
+ return result;
225
+ }
226
+
227
+ async function captureStepScreenshot(
228
+ page: Page,
229
+ paths: RunPaths,
230
+ index: number,
231
+ name: string | undefined,
232
+ fullPage: boolean
233
+ ): Promise<string> {
234
+ const safe = (name ?? "step")
235
+ .toLowerCase()
236
+ .replace(/[^a-z0-9]+/g, "-")
237
+ .replace(/^-+|-+$/g, "")
238
+ .slice(0, 60);
239
+ const fileName = `${String(index).padStart(2, "0")}-${safe}.png`;
240
+ const abs = resolve(paths.stepsDir, fileName);
241
+ await page.screenshot({ path: abs, fullPage }).catch(() => {});
242
+ return `steps/${fileName}`;
243
+ }
244
+
245
+ function tally(entries: ConsoleEntry[]): Record<string, number> {
246
+ const out: Record<string, number> = {};
247
+ for (const e of entries) out[e.type] = (out[e.type] ?? 0) + 1;
248
+ return out;
249
+ }
250
+
251
+ export async function runInspect(opts: InspectOptions): Promise<InspectResult> {
252
+ const startedAt = new Date();
253
+ const paths = ensureRunPaths(newRunId());
254
+ log.dim(`run dir: ${paths.runDir}`);
255
+
256
+ const session = await openSession({
257
+ baseUrl: opts.baseUrl,
258
+ headed: opts.headed,
259
+ videoDir: opts.recordVideo ? paths.videoDir : undefined,
260
+ loadStorageState: opts.loadStorageState
261
+ });
262
+ const buffers: CaptureBuffers = attachCapture(session.context, session.page, {
263
+ allNetwork: opts.captureAllNetwork,
264
+ allConsole: opts.captureAllConsole,
265
+ captureBodies: opts.deep
266
+ });
267
+
268
+ // Deep capture attaches a CDP debugger. Best-effort: if the protocol session
269
+ // can't open, log and carry on with the normal capture pipeline.
270
+ let deepCapture:
271
+ | Awaited<ReturnType<typeof attachDeepCapture>>
272
+ | undefined;
273
+ if (opts.deep) {
274
+ deepCapture = await attachDeepCapture(session.page).catch((err) => {
275
+ log.dim(
276
+ ` deep capture unavailable: ${err instanceof Error ? err.message : String(err)}`
277
+ );
278
+ return undefined;
279
+ });
280
+ }
281
+
282
+ const requestedUrl = opts.url.startsWith("http")
283
+ ? opts.url
284
+ : new URL(opts.url, session.baseUrl).toString();
285
+
286
+ let initial: InspectResult["initial"];
287
+ let final: InspectResult["final"];
288
+ let title = "";
289
+ const steps: StepReport[] = [];
290
+ let failedSteps = 0;
291
+ let expectations: ExpectationResult[] = [];
292
+
293
+ try {
294
+ log.step(`→ ${requestedUrl}`);
295
+ const response = await session.page.goto(requestedUrl, {
296
+ waitUntil: "domcontentloaded",
297
+ timeout: opts.gotoTimeoutMs
298
+ });
299
+ log.dim(` status: ${response?.status() ?? "?"}`);
300
+
301
+ initial = await captureSnapshot(
302
+ session.page,
303
+ paths,
304
+ "initial",
305
+ opts.captureHtml,
306
+ opts.captureStorage,
307
+ opts.fullPageScreenshots
308
+ );
309
+
310
+ let stepIndex = 0;
311
+ for (const step of opts.steps) {
312
+ stepIndex++;
313
+ const before = snapshotCursor(buffers);
314
+ const started = Date.now();
315
+ let label = "";
316
+ let ok = true;
317
+ let error: string | undefined;
318
+ let evalResult: unknown;
319
+ try {
320
+ const result = await executeStep(step, session.page);
321
+ label = result.label;
322
+ evalResult = result.evalResult;
323
+ } catch (err) {
324
+ ok = false;
325
+ failedSteps++;
326
+ error = err instanceof Error ? err.message : String(err);
327
+ label = `${step.kind} (error)`;
328
+ }
329
+ const durationMs = Date.now() - started;
330
+
331
+ let screenshot: string | undefined;
332
+ // Trivial steps don't change visible state, so the screenshot would just
333
+ // duplicate the previous step's frame. The HTML report can fall back to
334
+ // the prior screenshot for these cases.
335
+ const trivial =
336
+ step.kind === "eval" ||
337
+ (step.kind === "wait" && step.target.kind === "ms");
338
+ if (step.kind === "screenshot") {
339
+ screenshot = await captureStepScreenshot(
340
+ session.page,
341
+ paths,
342
+ stepIndex,
343
+ step.name,
344
+ step.fullPage ?? false
345
+ );
346
+ } else if (!ok) {
347
+ screenshot = await captureStepScreenshot(
348
+ session.page,
349
+ paths,
350
+ stepIndex,
351
+ `error-${step.kind}`,
352
+ false
353
+ );
354
+ } else if (!trivial) {
355
+ screenshot = await captureStepScreenshot(
356
+ session.page,
357
+ paths,
358
+ stepIndex,
359
+ step.kind,
360
+ false
361
+ );
362
+ }
363
+
364
+ const slice = sliceSince(buffers, before);
365
+ const report: StepReport = {
366
+ index: stepIndex,
367
+ step,
368
+ label,
369
+ ok,
370
+ durationMs,
371
+ url: session.page.url(),
372
+ console: slice.console,
373
+ network: slice.network,
374
+ pageErrors: slice.pageErrors
375
+ };
376
+ if (error !== undefined) report.error = error;
377
+ if (evalResult !== undefined) report.evalResult = evalResult;
378
+ if (screenshot !== undefined) report.screenshot = screenshot;
379
+ steps.push(report);
380
+ const tag = ok ? "✓" : "✗";
381
+ log.dim(` ${tag} step ${stepIndex}: ${label} (${durationMs}ms)`);
382
+ if (!ok && error) log.fail(` error: ${error}`);
383
+ }
384
+
385
+ if (opts.expectations.length > 0) {
386
+ expectations = await evaluateExpectations(session.page, opts.expectations);
387
+ for (const r of expectations) {
388
+ const tag = r.ok ? "✓" : "✗";
389
+ const desc = describeExpectation(r.expectation);
390
+ log.dim(` ${tag} expect ${desc}${r.detail ? ` — ${r.detail}` : ""}`);
391
+ }
392
+
393
+ // Persistence check: wait, then re-evaluate. Each expectation passes
394
+ // only if BOTH the initial and after-wait check pass. Catches alerts /
395
+ // toasts / transient banners that show for a beat then disappear.
396
+ if (opts.persistMs > 0) {
397
+ log.dim(` · persisting check ${opts.persistMs}ms…`);
398
+ await session.page.waitForTimeout(opts.persistMs);
399
+ const after = await evaluateExpectations(
400
+ session.page,
401
+ opts.expectations
402
+ );
403
+ expectations = expectations.map((initial, i) => {
404
+ const later = after[i];
405
+ if (!initial.ok) return initial;
406
+ if (!later || later.ok) return later ?? initial;
407
+ return {
408
+ expectation: initial.expectation,
409
+ ok: false,
410
+ detail: `held at first check but failed after ${opts.persistMs}ms: ${later.detail ?? "no detail"}`
411
+ };
412
+ });
413
+ for (let i = 0; i < expectations.length; i++) {
414
+ const e = expectations[i];
415
+ if (!e) continue;
416
+ const wasOk = after[i]?.ok ?? false;
417
+ const tag = e.ok ? "✓" : wasOk ? "✓" : "✗";
418
+ const desc = describeExpectation(e.expectation);
419
+ if (!e.ok)
420
+ log.fail(` ${tag} persist ${desc} — ${e.detail ?? "failed"}`);
421
+ }
422
+ }
423
+ }
424
+
425
+ final = await captureSnapshot(
426
+ session.page,
427
+ paths,
428
+ "final",
429
+ opts.captureHtml,
430
+ opts.captureStorage,
431
+ opts.fullPageScreenshots
432
+ );
433
+ title = await session.page.title().catch(() => "");
434
+ } finally {
435
+ // Save the browser session (cookies + localStorage) when:
436
+ // - `--save-session` was passed (bootstrap a login), OR
437
+ // - one was already loaded (refresh rotated auth tokens into the next run).
438
+ // Only on a clean run: a failed run might mean the server rejected the
439
+ // cookies, and saving the now-anonymous state would log the user out on
440
+ // every subsequent run, so we keep the old file instead.
441
+ const wantSave = opts.saveSession || session.storageStateLoaded;
442
+ if (wantSave && failedSteps === 0) {
443
+ await session
444
+ .saveStorageState()
445
+ .then(() => log.dim(" · session saved to ~/.web-tester/session.json"))
446
+ .catch((err) =>
447
+ log.fail(
448
+ ` · could not save session: ${err instanceof Error ? err.message : String(err)}`
449
+ )
450
+ );
451
+ } else if (wantSave) {
452
+ log.dim(
453
+ " · session not saved (a step failed; preserving any previous session.json)"
454
+ );
455
+ }
456
+ // Drain any in-flight body reads before the context closes — a closed
457
+ // context can't return a response body.
458
+ await flushBodies(buffers).catch(() => {});
459
+ if (deepCapture) await deepCapture.detach().catch(() => {});
460
+ await session.close();
461
+ }
462
+
463
+ // The video file is finalised on context close; resolve its path now and
464
+ // store it relative to runDir for portable report references.
465
+ let video: string | undefined;
466
+ if (opts.recordVideo) {
467
+ const videoAbs = await session.videoPath();
468
+ if (videoAbs) {
469
+ const rel = videoAbs.startsWith(paths.runDir)
470
+ ? videoAbs.slice(paths.runDir.length + 1)
471
+ : videoAbs;
472
+ video = rel;
473
+ }
474
+ }
475
+
476
+ const verdict = computeVerdict({
477
+ failedSteps,
478
+ pageErrors: buffers.pageErrors,
479
+ consoleEntries: buffers.consoleEntries,
480
+ networkEntries: buffers.networkEntries,
481
+ expectations,
482
+ failOn: opts.failOn
483
+ });
484
+
485
+ const result: InspectResult = {
486
+ runId: paths.runId,
487
+ runDir: paths.runDir,
488
+ startedAt: startedAt.toISOString(),
489
+ durationMs: Date.now() - startedAt.getTime(),
490
+ baseUrl: session.baseUrl,
491
+ requestedUrl,
492
+ finalUrl: steps[steps.length - 1]?.url ?? requestedUrl,
493
+ title,
494
+ viewport: { width: 1280, height: 900 },
495
+ ...(video !== undefined ? { video } : {}),
496
+ ok: verdict.ok,
497
+ failedSteps,
498
+ verdictTriggers: verdict.triggers,
499
+ failOn: opts.failOn,
500
+ expectations,
501
+ initial,
502
+ final,
503
+ console: {
504
+ totals: tally(buffers.consoleEntries),
505
+ entries: buffers.consoleEntries
506
+ },
507
+ network: {
508
+ count: buffers.networkEntries.length,
509
+ failedCount: buffers.networkEntries.filter(
510
+ (e) => (e.status !== null && e.status >= 400) || e.failureText !== null
511
+ ).length,
512
+ entries: buffers.networkEntries
513
+ },
514
+ pageErrors: buffers.pageErrors,
515
+ ...(deepCapture && deepCapture.buffers.errors.length
516
+ ? { deepErrors: deepCapture.buffers.errors }
517
+ : {}),
518
+ ...(deepCapture && deepCapture.buffers.rejections.length
519
+ ? { unhandledRejections: deepCapture.buffers.rejections }
520
+ : {}),
521
+ steps
522
+ };
523
+
524
+ writeFileSync(paths.consolePath, JSON.stringify(buffers.consoleEntries, null, 2));
525
+ writeFileSync(paths.networkPath, JSON.stringify(buffers.networkEntries, null, 2));
526
+
527
+ // Ask Sonnet for a short TL;DR before rendering — runs in a child process,
528
+ // returns null on any failure so the report still writes either way.
529
+ const summary = await summariseRun(result, { enabled: opts.summary });
530
+ if (summary) result.summary = summary;
531
+
532
+ // `writeReport` writes both result.json and report.html.
533
+ writeReport(result, paths);
534
+
535
+ return result;
536
+ }
537
+
538
+ function describeExpectation(e: Expectation): string {
539
+ if (e.kind === "text") return `text="${e.text}"`;
540
+ if (e.kind === "no-text") return `no-text="${e.text}"`;
541
+ if (e.kind === "selector") return `selector="${e.selector}"`;
542
+ if (e.kind === "no-selector") return `no-selector="${e.selector}"`;
543
+ return `attr ${e.name}="${e.value}"`;
544
+ }