nodebench-mcp 2.25.0 → 2.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/NODEBENCH_AGENTS.md +5 -4
  2. package/README.md +145 -16
  3. package/dist/__tests__/architectComplex.test.js +3 -5
  4. package/dist/__tests__/architectComplex.test.js.map +1 -1
  5. package/dist/__tests__/batchAutopilot.test.d.ts +8 -0
  6. package/dist/__tests__/batchAutopilot.test.js +218 -0
  7. package/dist/__tests__/batchAutopilot.test.js.map +1 -0
  8. package/dist/__tests__/cliSubcommands.test.d.ts +1 -0
  9. package/dist/__tests__/cliSubcommands.test.js +138 -0
  10. package/dist/__tests__/cliSubcommands.test.js.map +1 -0
  11. package/dist/__tests__/evalHarness.test.js +1 -1
  12. package/dist/__tests__/forecastingDogfood.test.d.ts +9 -0
  13. package/dist/__tests__/forecastingDogfood.test.js +284 -0
  14. package/dist/__tests__/forecastingDogfood.test.js.map +1 -0
  15. package/dist/__tests__/forecastingScoring.test.d.ts +9 -0
  16. package/dist/__tests__/forecastingScoring.test.js +202 -0
  17. package/dist/__tests__/forecastingScoring.test.js.map +1 -0
  18. package/dist/__tests__/localDashboard.test.d.ts +1 -0
  19. package/dist/__tests__/localDashboard.test.js +226 -0
  20. package/dist/__tests__/localDashboard.test.js.map +1 -0
  21. package/dist/__tests__/multiHopDogfood.test.js +11 -11
  22. package/dist/__tests__/multiHopDogfood.test.js.map +1 -1
  23. package/dist/__tests__/openclawDogfood.test.d.ts +23 -0
  24. package/dist/__tests__/openclawDogfood.test.js +535 -0
  25. package/dist/__tests__/openclawDogfood.test.js.map +1 -0
  26. package/dist/__tests__/openclawMessaging.test.d.ts +14 -0
  27. package/dist/__tests__/openclawMessaging.test.js +232 -0
  28. package/dist/__tests__/openclawMessaging.test.js.map +1 -0
  29. package/dist/__tests__/presetRealWorldBench.test.js +0 -2
  30. package/dist/__tests__/presetRealWorldBench.test.js.map +1 -1
  31. package/dist/__tests__/tools.test.js +9 -157
  32. package/dist/__tests__/tools.test.js.map +1 -1
  33. package/dist/__tests__/toolsetGatingEval.test.js +0 -2
  34. package/dist/__tests__/toolsetGatingEval.test.js.map +1 -1
  35. package/dist/__tests__/traceabilityDogfood.test.d.ts +12 -0
  36. package/dist/__tests__/traceabilityDogfood.test.js +241 -0
  37. package/dist/__tests__/traceabilityDogfood.test.js.map +1 -0
  38. package/dist/__tests__/webmcpTools.test.d.ts +7 -0
  39. package/dist/__tests__/webmcpTools.test.js +195 -0
  40. package/dist/__tests__/webmcpTools.test.js.map +1 -0
  41. package/dist/dashboard/briefHtml.d.ts +20 -0
  42. package/dist/dashboard/briefHtml.js +1000 -0
  43. package/dist/dashboard/briefHtml.js.map +1 -0
  44. package/dist/dashboard/briefServer.d.ts +18 -0
  45. package/dist/dashboard/briefServer.js +320 -0
  46. package/dist/dashboard/briefServer.js.map +1 -0
  47. package/dist/dashboard/html.js +1470 -1230
  48. package/dist/dashboard/html.js.map +1 -1
  49. package/dist/dashboard/server.js +166 -41
  50. package/dist/dashboard/server.js.map +1 -1
  51. package/dist/index.js +210 -14
  52. package/dist/index.js.map +1 -1
  53. package/dist/tools/critterTools.js +4 -0
  54. package/dist/tools/critterTools.js.map +1 -1
  55. package/dist/tools/forecastingTools.d.ts +11 -0
  56. package/dist/tools/forecastingTools.js +616 -0
  57. package/dist/tools/forecastingTools.js.map +1 -0
  58. package/dist/tools/localDashboardTools.d.ts +8 -0
  59. package/dist/tools/localDashboardTools.js +332 -0
  60. package/dist/tools/localDashboardTools.js.map +1 -0
  61. package/dist/tools/metaTools.js +170 -1
  62. package/dist/tools/metaTools.js.map +1 -1
  63. package/dist/tools/openclawTools.d.ts +11 -0
  64. package/dist/tools/openclawTools.js +1017 -0
  65. package/dist/tools/openclawTools.js.map +1 -0
  66. package/dist/tools/overstoryTools.d.ts +14 -0
  67. package/dist/tools/overstoryTools.js +426 -0
  68. package/dist/tools/overstoryTools.js.map +1 -0
  69. package/dist/tools/progressiveDiscoveryTools.js +50 -115
  70. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  71. package/dist/tools/selfEvalTools.js +8 -1
  72. package/dist/tools/selfEvalTools.js.map +1 -1
  73. package/dist/tools/sessionMemoryTools.js +14 -2
  74. package/dist/tools/sessionMemoryTools.js.map +1 -1
  75. package/dist/tools/toolRegistry.d.ts +1 -15
  76. package/dist/tools/toolRegistry.js +243 -228
  77. package/dist/tools/toolRegistry.js.map +1 -1
  78. package/dist/tools/visualQaTools.d.ts +2 -0
  79. package/dist/tools/visualQaTools.js +1088 -0
  80. package/dist/tools/visualQaTools.js.map +1 -0
  81. package/dist/tools/webmcpTools.d.ts +16 -0
  82. package/dist/tools/webmcpTools.js +703 -0
  83. package/dist/tools/webmcpTools.js.map +1 -0
  84. package/dist/toolsetRegistry.js +6 -2
  85. package/dist/toolsetRegistry.js.map +1 -1
  86. package/package.json +2 -2
@@ -0,0 +1,1088 @@
1
+ /**
2
+ * Visual QA tools — burst capture + SSIM stability scoring + grid collage
3
+ * for detecting animation jank, flicker, and visual regressions in web apps.
4
+ *
5
+ * Uses Playwright for sequential screenshot capture and sharp for
6
+ * frame-to-frame SSIM computation. No Python server dependency.
7
+ *
8
+ * Reuses patterns from:
9
+ * - uiCaptureTools.ts: Playwright launch, viewport presets, base64 content blocks
10
+ * - visionTools.ts: sharp raw buffer pixel operations
11
+ * - db.ts: SQLite persistence via getDb()/genId()
12
+ */
13
+ import { join } from "path";
14
+ import { homedir } from "os";
15
+ import { mkdirSync, readFileSync } from "fs";
16
+ import { getDb, genId } from "../db.js";
17
+ // ═══ Constants ═══
18
+ const BURST_DIR = join(homedir(), ".nodebench", "burst-captures");
19
+ const VIEWPORT_PRESETS = {
20
+ mobile: { width: 375, height: 812 },
21
+ tablet: { width: 768, height: 1024 },
22
+ desktop: { width: 1280, height: 800 },
23
+ wide: { width: 1920, height: 1080 },
24
+ };
25
+ // SSIM constants (Wang et al. 2004)
26
+ const C1 = (0.01 * 255) ** 2; // 6.5025
27
+ const C2 = (0.03 * 255) ** 2; // 58.5225
28
+ const BLOCK_SIZE = 8;
29
+ // ═══ Helpers ═══
30
+ async function getPlaywright() {
31
+ try {
32
+ return await import("playwright");
33
+ }
34
+ catch {
35
+ return null;
36
+ }
37
+ }
38
+ async function getSharp() {
39
+ try {
40
+ return (await import("sharp")).default;
41
+ }
42
+ catch {
43
+ return null;
44
+ }
45
+ }
46
+ function ensureBurstDir(label) {
47
+ const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
48
+ const slug = label
49
+ ? label.replace(/[^a-zA-Z0-9-_]/g, "-").toLowerCase()
50
+ : "burst";
51
+ const dir = join(BURST_DIR, `${slug}_${timestamp}`);
52
+ mkdirSync(dir, { recursive: true });
53
+ return dir;
54
+ }
55
+ /**
56
+ * Convert RGBA buffer to luminance (grayscale) for SSIM computation.
57
+ * Y = 0.299*R + 0.587*G + 0.114*B
58
+ */
59
+ function toLuminance(rgba, width, height) {
60
+ const pixels = width * height;
61
+ const lum = new Float64Array(pixels);
62
+ for (let i = 0; i < pixels; i++) {
63
+ const offset = i * 4;
64
+ lum[i] = 0.299 * rgba[offset] + 0.587 * rgba[offset + 1] + 0.114 * rgba[offset + 2];
65
+ }
66
+ return lum;
67
+ }
68
+ /**
69
+ * Compute SSIM between two luminance arrays using 8x8 block windows.
70
+ * Returns average SSIM across all blocks (0.0 to 1.0).
71
+ */
72
+ function computeSSIM(lumA, lumB, width, height) {
73
+ const blocksX = Math.floor(width / BLOCK_SIZE);
74
+ const blocksY = Math.floor(height / BLOCK_SIZE);
75
+ if (blocksX === 0 || blocksY === 0)
76
+ return 1.0; // too small to compare
77
+ let totalSSIM = 0;
78
+ let blockCount = 0;
79
+ for (let by = 0; by < blocksY; by++) {
80
+ for (let bx = 0; bx < blocksX; bx++) {
81
+ let sumA = 0, sumB = 0;
82
+ let sumA2 = 0, sumB2 = 0;
83
+ let sumAB = 0;
84
+ const n = BLOCK_SIZE * BLOCK_SIZE;
85
+ for (let dy = 0; dy < BLOCK_SIZE; dy++) {
86
+ for (let dx = 0; dx < BLOCK_SIZE; dx++) {
87
+ const idx = (by * BLOCK_SIZE + dy) * width + (bx * BLOCK_SIZE + dx);
88
+ const a = lumA[idx];
89
+ const b = lumB[idx];
90
+ sumA += a;
91
+ sumB += b;
92
+ sumA2 += a * a;
93
+ sumB2 += b * b;
94
+ sumAB += a * b;
95
+ }
96
+ }
97
+ const muA = sumA / n;
98
+ const muB = sumB / n;
99
+ const varA = sumA2 / n - muA * muA;
100
+ const varB = sumB2 / n - muB * muB;
101
+ const covAB = sumAB / n - muA * muB;
102
+ const ssim = ((2 * muA * muB + C1) * (2 * covAB + C2)) /
103
+ ((muA * muA + muB * muB + C1) * (varA + varB + C2));
104
+ totalSSIM += ssim;
105
+ blockCount++;
106
+ }
107
+ }
108
+ return blockCount > 0 ? totalSSIM / blockCount : 1.0;
109
+ }
110
+ function stabilityGrade(score) {
111
+ if (score >= 90)
112
+ return "A";
113
+ if (score >= 80)
114
+ return "B";
115
+ if (score >= 70)
116
+ return "C";
117
+ if (score >= 60)
118
+ return "D";
119
+ return "F";
120
+ }
121
+ // ═══ DB Schema (added on first getDb() call via ensureVisualQaTable) ═══
122
+ let _tableCreated = false;
123
+ function ensureVisualQaTable() {
124
+ if (_tableCreated)
125
+ return;
126
+ const db = getDb();
127
+ db.exec(`
128
+ CREATE TABLE IF NOT EXISTS visual_qa_runs (
129
+ id TEXT PRIMARY KEY,
130
+ url TEXT NOT NULL,
131
+ label TEXT,
132
+ viewport TEXT NOT NULL DEFAULT 'desktop',
133
+ frame_count INTEGER NOT NULL,
134
+ interval_ms INTEGER NOT NULL,
135
+ stability_score INTEGER,
136
+ stability_grade TEXT,
137
+ mean_ssim REAL,
138
+ jank_count INTEGER DEFAULT 0,
139
+ effective_fps REAL,
140
+ collage_path TEXT,
141
+ metadata TEXT,
142
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
143
+ );
144
+ CREATE INDEX IF NOT EXISTS idx_visual_qa_url ON visual_qa_runs(url);
145
+ CREATE INDEX IF NOT EXISTS idx_visual_qa_label ON visual_qa_runs(label);
146
+ `);
147
+ _tableCreated = true;
148
+ }
149
+ // ═══ Tools ═══
150
+ export const visualQaTools = [
151
+ // ─── Tool 1: burst_capture ───
152
+ {
153
+ name: "burst_capture",
154
+ description: "Capture N sequential screenshots at fixed intervals using Playwright. " +
155
+ "Navigate to a URL, optionally perform an action (click/scroll), then capture " +
156
+ "a burst of frames for animation stability analysis. Returns frame paths, " +
157
+ "timestamps, and first+last frames as inline images. Designed for detecting " +
158
+ "jank, flicker, and loading smoothness issues.",
159
+ rawContent: true,
160
+ inputSchema: {
161
+ type: "object",
162
+ properties: {
163
+ url: {
164
+ type: "string",
165
+ description: "URL to capture (e.g. http://localhost:5173)",
166
+ },
167
+ frameCount: {
168
+ type: "number",
169
+ description: "Number of frames to capture (default: 10, max: 30)",
170
+ },
171
+ intervalMs: {
172
+ type: "number",
173
+ description: "Interval between frames in ms (default: 50, min: 16)",
174
+ },
175
+ viewport: {
176
+ type: "string",
177
+ enum: ["mobile", "tablet", "desktop", "wide"],
178
+ description: "Viewport preset (default: desktop)",
179
+ },
180
+ preAction: {
181
+ type: "object",
182
+ properties: {
183
+ type: {
184
+ type: "string",
185
+ enum: ["click", "scroll", "navigate", "wait"],
186
+ },
187
+ target: {
188
+ type: "string",
189
+ description: "CSS selector (for click) or URL (for navigate)",
190
+ },
191
+ value: {
192
+ type: "string",
193
+ description: "Scroll amount (for scroll) or wait ms (for wait)",
194
+ },
195
+ },
196
+ description: "Action to perform before burst starts (e.g. click a tab, scroll down)",
197
+ },
198
+ settleMs: {
199
+ type: "number",
200
+ description: "Wait time after page load before burst (default: 500). Use 2000-3000 for pages with heavy initial animations.",
201
+ },
202
+ label: {
203
+ type: "string",
204
+ description: "Label for the burst session (used in directory name)",
205
+ },
206
+ reducedMotion: {
207
+ type: "boolean",
208
+ description: "Set prefers-reduced-motion:reduce before capture (default: false). Useful for comparing with/without animations.",
209
+ },
210
+ clearCache: {
211
+ type: "boolean",
212
+ description: "Clear browser cache, cookies, and localStorage before navigating (default: false). " +
213
+ "Simulates a cold first-load experience to detect loading jank.",
214
+ },
215
+ waitUntil: {
216
+ type: "string",
217
+ enum: ["networkidle", "commit", "domcontentloaded", "load"],
218
+ description: "Navigation wait strategy (default: networkidle). Use 'commit' to start capturing " +
219
+ "as soon as the page begins rendering — essential for detecting loading jank. " +
220
+ "'networkidle' waits until the page is fully loaded (hides loading issues).",
221
+ },
222
+ },
223
+ required: ["url"],
224
+ },
225
+ handler: async (args) => {
226
+ const pw = await getPlaywright();
227
+ if (!pw) {
228
+ return [
229
+ {
230
+ type: "text",
231
+ text: JSON.stringify({
232
+ error: true,
233
+ message: "Playwright is not installed. Run: npm install playwright && npx playwright install chromium",
234
+ }),
235
+ },
236
+ ];
237
+ }
238
+ const frameCount = Math.min(Math.max(args.frameCount ?? 10, 2), 30);
239
+ const intervalMs = Math.max(args.intervalMs ?? 50, 16);
240
+ const settleMs = args.settleMs ?? 500;
241
+ const viewportName = args.viewport ?? "desktop";
242
+ const viewportSize = VIEWPORT_PRESETS[viewportName];
243
+ const waitUntil = args.waitUntil ?? "networkidle";
244
+ if (!viewportSize) {
245
+ return [
246
+ {
247
+ type: "text",
248
+ text: JSON.stringify({
249
+ error: true,
250
+ message: `Unknown viewport: ${viewportName}. Use: mobile, tablet, desktop, wide`,
251
+ }),
252
+ },
253
+ ];
254
+ }
255
+ const burstDir = ensureBurstDir(args.label ?? "burst");
256
+ const framePaths = [];
257
+ const timestamps = [];
258
+ let browser;
259
+ try {
260
+ browser = await pw.chromium.launch({ headless: true });
261
+ const contextOptions = { viewport: viewportSize };
262
+ if (args.reducedMotion) {
263
+ contextOptions.reducedMotion = "reduce";
264
+ }
265
+ const context = await browser.newContext(contextOptions);
266
+ // Clear cache if requested (simulate cold first-load)
267
+ if (args.clearCache) {
268
+ await context.clearCookies();
269
+ // Service workers and cache storage cleared via CDP
270
+ const page0 = await context.newPage();
271
+ try {
272
+ const cdp = await page0.context().newCDPSession(page0);
273
+ await cdp.send("Network.clearBrowserCache");
274
+ await cdp.send("Network.clearBrowserCookies");
275
+ await page0.evaluate(() => {
276
+ try {
277
+ localStorage.clear();
278
+ }
279
+ catch { }
280
+ try {
281
+ sessionStorage.clear();
282
+ }
283
+ catch { }
284
+ try {
285
+ const w = globalThis;
286
+ if (w.caches)
287
+ w.caches.keys().then((keys) => keys.forEach((k) => w.caches.delete(k)));
288
+ }
289
+ catch { }
290
+ });
291
+ }
292
+ catch { /* CDP best-effort */ }
293
+ await page0.close();
294
+ }
295
+ const page = await context.newPage();
296
+ await page.goto(args.url, {
297
+ waitUntil: waitUntil,
298
+ timeout: 30000,
299
+ });
300
+ // Settle time — let animations start/finish
301
+ if (settleMs > 0) {
302
+ await page.waitForTimeout(settleMs);
303
+ }
304
+ // Pre-action
305
+ if (args.preAction) {
306
+ const pa = args.preAction;
307
+ switch (pa.type) {
308
+ case "click":
309
+ if (pa.target)
310
+ await page.click(pa.target, { timeout: 5000 });
311
+ break;
312
+ case "scroll":
313
+ await page.evaluate((amount) => globalThis.scrollBy(0, amount), parseInt(pa.value ?? "500", 10));
314
+ break;
315
+ case "navigate":
316
+ if (pa.target)
317
+ await page.goto(pa.target, {
318
+ waitUntil: "networkidle",
319
+ timeout: 15000,
320
+ });
321
+ break;
322
+ case "wait":
323
+ await page.waitForTimeout(parseInt(pa.value ?? "1000", 10));
324
+ break;
325
+ }
326
+ // Small settle after pre-action
327
+ await page.waitForTimeout(100);
328
+ }
329
+ // Burst capture loop
330
+ for (let i = 0; i < frameCount; i++) {
331
+ const framePath = join(burstDir, `frame_${String(i).padStart(3, "0")}.png`);
332
+ timestamps.push(Date.now());
333
+ await page.screenshot({ path: framePath, fullPage: false });
334
+ framePaths.push(framePath);
335
+ if (i < frameCount - 1) {
336
+ await page.waitForTimeout(intervalMs);
337
+ }
338
+ }
339
+ await browser.close();
340
+ browser = null;
341
+ // Read first and last frames for inline preview
342
+ const firstFrame = readFileSync(framePaths[0]).toString("base64");
343
+ const lastFrame = readFileSync(framePaths[framePaths.length - 1]).toString("base64");
344
+ const totalDuration = timestamps[timestamps.length - 1] - timestamps[0];
345
+ const content = [
346
+ {
347
+ type: "text",
348
+ text: JSON.stringify({
349
+ burstDir,
350
+ framePaths,
351
+ timestamps,
352
+ frameCount,
353
+ intervalMs,
354
+ settleMs,
355
+ totalDurationMs: totalDuration,
356
+ viewport: { preset: viewportName, ...viewportSize },
357
+ url: args.url,
358
+ label: args.label ?? null,
359
+ reducedMotion: args.reducedMotion ?? false,
360
+ instruction: "Burst captured. Run compute_web_stability to analyze frame-to-frame SSIM " +
361
+ "and detect jank. Or run generate_grid_collage to visually inspect all frames. " +
362
+ "First and last frames shown below.",
363
+ }),
364
+ },
365
+ { type: "text", text: "[FIRST FRAME]" },
366
+ { type: "image", data: firstFrame, mimeType: "image/png" },
367
+ { type: "text", text: "[LAST FRAME]" },
368
+ { type: "image", data: lastFrame, mimeType: "image/png" },
369
+ ];
370
+ return content;
371
+ }
372
+ catch (err) {
373
+ if (browser) {
374
+ try {
375
+ await browser.close();
376
+ }
377
+ catch {
378
+ /* ignore cleanup */
379
+ }
380
+ }
381
+ return [
382
+ {
383
+ type: "text",
384
+ text: JSON.stringify({
385
+ error: true,
386
+ message: `Burst capture failed: ${err.message}`,
387
+ url: args.url,
388
+ }),
389
+ },
390
+ ];
391
+ }
392
+ },
393
+ },
394
+ // ─── Tool 2: generate_grid_collage ───
395
+ {
396
+ name: "generate_grid_collage",
397
+ description: "Tile N screenshot images into a single grid collage PNG for visual inspection. " +
398
+ "Accepts frame paths from burst_capture or any PNG files. Labels each cell with " +
399
+ "its index and optional timestamp delta. Optionally highlights specific frames " +
400
+ "with a red border (e.g. jank frames from compute_web_stability).",
401
+ rawContent: true,
402
+ inputSchema: {
403
+ type: "object",
404
+ properties: {
405
+ framePaths: {
406
+ type: "array",
407
+ items: { type: "string" },
408
+ description: "Array of PNG file paths to tile into the grid",
409
+ },
410
+ columns: {
411
+ type: "number",
412
+ description: "Grid columns (default: 5)",
413
+ },
414
+ cellWidth: {
415
+ type: "number",
416
+ description: "Width of each cell in pixels (default: 384)",
417
+ },
418
+ timestamps: {
419
+ type: "array",
420
+ items: { type: "number" },
421
+ description: "Timestamps for delta labels (from burst_capture). If provided, each cell shows +Nms",
422
+ },
423
+ label: {
424
+ type: "string",
425
+ description: "Label for the collage (used in filename)",
426
+ },
427
+ highlightIndices: {
428
+ type: "array",
429
+ items: { type: "number" },
430
+ description: "Frame indices to highlight with red border (e.g. jank frames)",
431
+ },
432
+ outputPath: {
433
+ type: "string",
434
+ description: "Custom output path for the collage PNG. Default: auto-generated in ~/.nodebench/captures/",
435
+ },
436
+ },
437
+ required: ["framePaths"],
438
+ },
439
+ handler: async (args) => {
440
+ const sharp = await getSharp();
441
+ if (!sharp) {
442
+ return [
443
+ {
444
+ type: "text",
445
+ text: JSON.stringify({
446
+ error: true,
447
+ message: "sharp is not installed. Run: npm install sharp",
448
+ }),
449
+ },
450
+ ];
451
+ }
452
+ const paths = args.framePaths;
453
+ if (!paths || paths.length === 0) {
454
+ return [
455
+ {
456
+ type: "text",
457
+ text: JSON.stringify({
458
+ error: true,
459
+ message: "framePaths is required and must be non-empty",
460
+ }),
461
+ },
462
+ ];
463
+ }
464
+ const cols = args.columns ?? 5;
465
+ const cellWidth = args.cellWidth ?? 384;
466
+ const highlights = new Set(args.highlightIndices ?? []);
467
+ const timestamps = args.timestamps ?? null;
468
+ const t0 = timestamps ? timestamps[0] : 0;
469
+ try {
470
+ // Get dimensions from first frame
471
+ const firstMeta = await sharp(paths[0]).metadata();
472
+ const aspectRatio = (firstMeta.height ?? 600) / (firstMeta.width ?? 800);
473
+ const cellHeight = Math.round(cellWidth * aspectRatio);
474
+ const rows = Math.ceil(paths.length / cols);
475
+ const canvasWidth = cols * cellWidth;
476
+ const canvasHeight = rows * cellHeight;
477
+ // Resize all frames
478
+ const resizedBuffers = [];
479
+ for (const p of paths) {
480
+ const buf = await sharp(p)
481
+ .resize(cellWidth, cellHeight, { fit: "fill" })
482
+ .png()
483
+ .toBuffer();
484
+ resizedBuffers.push(buf);
485
+ }
486
+ // Build SVG overlay for labels and highlight borders
487
+ let svgParts = [];
488
+ for (let i = 0; i < paths.length; i++) {
489
+ const col = i % cols;
490
+ const row = Math.floor(i / cols);
491
+ const x = col * cellWidth;
492
+ const y = row * cellHeight;
493
+ // Frame index label
494
+ const deltaText = timestamps && i < timestamps.length
495
+ ? ` +${timestamps[i] - t0}ms`
496
+ : "";
497
+ svgParts.push(`<rect x="${x}" y="${y}" width="${Math.min(90, cellWidth)}" height="22" fill="rgba(0,0,0,0.7)" rx="3"/>`, `<text x="${x + 4}" y="${y + 16}" font-family="monospace" font-size="13" fill="white">#${i}${deltaText}</text>`);
498
+ // Highlight border for jank frames
499
+ if (highlights.has(i)) {
500
+ svgParts.push(`<rect x="${x + 1}" y="${y + 1}" width="${cellWidth - 2}" height="${cellHeight - 2}" fill="none" stroke="red" stroke-width="4" rx="2"/>`);
501
+ }
502
+ }
503
+ const overlaySvg = `<svg width="${canvasWidth}" height="${canvasHeight}" xmlns="http://www.w3.org/2000/svg">${svgParts.join("")}</svg>`;
504
+ // Composite: create blank canvas, overlay each frame, then SVG labels
505
+ const compositeInputs = resizedBuffers.map((buf, i) => ({
506
+ input: buf,
507
+ left: (i % cols) * cellWidth,
508
+ top: Math.floor(i / cols) * cellHeight,
509
+ }));
510
+ // Add SVG overlay
511
+ compositeInputs.push({
512
+ input: Buffer.from(overlaySvg),
513
+ left: 0,
514
+ top: 0,
515
+ });
516
+ const collage = await sharp({
517
+ create: {
518
+ width: canvasWidth,
519
+ height: canvasHeight,
520
+ channels: 4,
521
+ background: { r: 30, g: 30, b: 30, alpha: 1 },
522
+ },
523
+ })
524
+ .composite(compositeInputs)
525
+ .png()
526
+ .toBuffer();
527
+ // Save collage
528
+ const captureDir = join(homedir(), ".nodebench", "captures");
529
+ mkdirSync(captureDir, { recursive: true });
530
+ const slug = args.label
531
+ ? args.label.replace(/[^a-zA-Z0-9-_]/g, "-").toLowerCase()
532
+ : "collage";
533
+ const ts = new Date().toISOString().replace(/[:.]/g, "-");
534
+ const outputPath = args.outputPath ?? join(captureDir, `${slug}_grid_${ts}.png`);
535
+ const { dirname } = await import("node:path");
536
+ mkdirSync(dirname(outputPath), { recursive: true });
537
+ await sharp(collage).toFile(outputPath);
538
+ const base64 = collage.toString("base64");
539
+ return [
540
+ {
541
+ type: "text",
542
+ text: JSON.stringify({
543
+ collagePath: outputPath,
544
+ frames: paths.length,
545
+ grid: `${cols}x${rows}`,
546
+ cellSize: `${cellWidth}x${cellHeight}`,
547
+ highlightedFrames: args.highlightIndices ?? [],
548
+ instruction: "Grid collage generated. Red-bordered frames are jank frames. " +
549
+ "Send this image to analyze_screenshot for AI-powered visual analysis, " +
550
+ "or use diff_screenshots to compare against a baseline collage.",
551
+ }),
552
+ },
553
+ { type: "image", data: base64, mimeType: "image/png" },
554
+ ];
555
+ }
556
+ catch (err) {
557
+ return [
558
+ {
559
+ type: "text",
560
+ text: JSON.stringify({
561
+ error: true,
562
+ message: `Grid collage generation failed: ${err.message}`,
563
+ }),
564
+ },
565
+ ];
566
+ }
567
+ },
568
+ },
569
+ // ─── Tool 3: compute_web_stability ───
570
+ {
571
+ name: "compute_web_stability",
572
+ description: "Compute frame-to-frame stability metrics for a burst capture. Calculates " +
573
+ "SSIM (Structural Similarity Index) between consecutive frames, detects jank " +
574
+ "(frames where SSIM drops below threshold), estimates effective FPS from frame " +
575
+ "deltas, and produces a stability score (0-100) with letter grade (A-F). All " +
576
+ "computation in Node using sharp — no Python server required. " +
577
+ "Score >= 90 = A (smooth), 80-89 = B, 70-79 = C (noticeable jank), < 70 = D/F.",
578
+ inputSchema: {
579
+ type: "object",
580
+ properties: {
581
+ framePaths: {
582
+ type: "array",
583
+ items: { type: "string" },
584
+ description: "Array of PNG frame paths (from burst_capture)",
585
+ },
586
+ timestamps: {
587
+ type: "array",
588
+ items: { type: "number" },
589
+ description: "Timestamps in ms for each frame (from burst_capture). Used for FPS and delta calculations.",
590
+ },
591
+ ssimThreshold: {
592
+ type: "number",
593
+ description: "SSIM threshold for jank detection (default: 0.95). Lower = more tolerant of change. " +
594
+ "Use 0.90 for pages with intentional animations, 0.98 for static pages.",
595
+ },
596
+ label: {
597
+ type: "string",
598
+ description: "Label for this analysis (persisted to DB)",
599
+ },
600
+ url: {
601
+ type: "string",
602
+ description: "Source URL (for DB persistence)",
603
+ },
604
+ },
605
+ required: ["framePaths"],
606
+ },
607
+ handler: async (args) => {
608
+ const sharp = await getSharp();
609
+ if (!sharp) {
610
+ return {
611
+ error: true,
612
+ message: "sharp is not installed. Run: npm install sharp",
613
+ };
614
+ }
615
+ const paths = args.framePaths;
616
+ if (!paths || paths.length < 2) {
617
+ return {
618
+ error: true,
619
+ message: "framePaths must contain at least 2 frames",
620
+ };
621
+ }
622
+ const threshold = args.ssimThreshold ?? 0.95;
623
+ const timestamps = args.timestamps ?? null;
624
+ try {
625
+ // Load all frames as raw RGBA buffers at a common resolution
626
+ const firstMeta = await sharp(paths[0]).metadata();
627
+ const width = firstMeta.width;
628
+ const height = firstMeta.height;
629
+ const luminances = [];
630
+ for (const p of paths) {
631
+ const raw = await sharp(p)
632
+ .resize(width, height, { fit: "fill" })
633
+ .raw()
634
+ .ensureAlpha()
635
+ .toBuffer();
636
+ luminances.push(toLuminance(raw, width, height));
637
+ }
638
+ // Compute pairwise SSIM
639
+ const ssimScores = [];
640
+ for (let i = 0; i < luminances.length - 1; i++) {
641
+ const ssim = computeSSIM(luminances[i], luminances[i + 1], width, height);
642
+ ssimScores.push(Math.round(ssim * 10000) / 10000);
643
+ }
644
+ // Jank detection
645
+ const jankFrames = [];
646
+ for (let i = 0; i < ssimScores.length; i++) {
647
+ if (ssimScores[i] < threshold) {
648
+ jankFrames.push(i);
649
+ }
650
+ }
651
+ const meanSsim = ssimScores.reduce((a, b) => a + b, 0) / ssimScores.length;
652
+ const minSsim = Math.min(...ssimScores);
653
+ // Frame timing metrics
654
+ let frameDeltasMs = [];
655
+ let effectiveFps = 0;
656
+ let deltaVariance = 0;
657
+ if (timestamps && timestamps.length === paths.length) {
658
+ for (let i = 0; i < timestamps.length - 1; i++) {
659
+ frameDeltasMs.push(timestamps[i + 1] - timestamps[i]);
660
+ }
661
+ const meanDelta = frameDeltasMs.reduce((a, b) => a + b, 0) / frameDeltasMs.length;
662
+ effectiveFps = meanDelta > 0 ? Math.round((1000 / meanDelta) * 10) / 10 : 0;
663
+ deltaVariance =
664
+ Math.round((frameDeltasMs.reduce((sum, d) => sum + (d - meanDelta) ** 2, 0) /
665
+ frameDeltasMs.length) *
666
+ 100) / 100;
667
+ }
668
+ // Stability score: weight SSIM and jank ratio
669
+ const jankRatio = jankFrames.length / ssimScores.length;
670
+ const score = Math.round(100 * (1 - jankRatio) * meanSsim);
671
+ const grade = stabilityGrade(score);
672
+ // Persist to DB
673
+ try {
674
+ ensureVisualQaTable();
675
+ const db = getDb();
676
+ const id = genId("vqa");
677
+ db.prepare(`INSERT INTO visual_qa_runs (id, url, label, viewport, frame_count, interval_ms,
678
+ stability_score, stability_grade, mean_ssim, jank_count, effective_fps, metadata)
679
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(id, args.url ?? "unknown", args.label ?? null, "desktop", paths.length, timestamps
680
+ ? Math.round((timestamps[timestamps.length - 1] - timestamps[0]) /
681
+ (paths.length - 1))
682
+ : 50, score, grade, Math.round(meanSsim * 10000) / 10000, jankFrames.length, effectiveFps, JSON.stringify({ ssimScores, jankFrames, frameDeltasMs }));
683
+ }
684
+ catch {
685
+ /* DB persistence is best-effort */
686
+ }
687
+ const summary = jankFrames.length === 0
688
+ ? `Stability: ${grade} (${score}/100). All ${ssimScores.length} frame transitions are smooth (mean SSIM ${meanSsim.toFixed(4)}).`
689
+ : `Stability: ${grade} (${score}/100). Detected ${jankFrames.length} jank frame(s) at indices [${jankFrames.join(", ")}] ` +
690
+ `(SSIM < ${threshold}). Min SSIM: ${minSsim.toFixed(4)}, Mean: ${meanSsim.toFixed(4)}.` +
691
+ (effectiveFps > 0 ? ` Effective capture rate: ${effectiveFps} fps.` : "");
692
+ return {
693
+ ssimScores,
694
+ meanSsim: Math.round(meanSsim * 10000) / 10000,
695
+ minSsim: Math.round(minSsim * 10000) / 10000,
696
+ jankFrames,
697
+ jankCount: jankFrames.length,
698
+ effectiveFps,
699
+ frameDeltasMs,
700
+ deltaVariance,
701
+ stabilityScore: score,
702
+ stabilityGrade: grade,
703
+ threshold,
704
+ frameCount: paths.length,
705
+ resolution: `${width}x${height}`,
706
+ summary,
707
+ };
708
+ }
709
+ catch (err) {
710
+ return {
711
+ error: true,
712
+ message: `Stability analysis failed: ${err.message}`,
713
+ };
714
+ }
715
+ },
716
+ },
717
+ // ─── Tool 4: run_visual_qa_suite ───
718
+ {
719
+ name: "run_visual_qa_suite",
720
+ description: "End-to-end visual QA pipeline: burst capture → SSIM stability analysis → " +
721
+ "grid collage with jank frames highlighted. Runs against a single URL. Returns " +
722
+ "stability score/grade, grid collage image, and jank details. Combine with " +
723
+ "diff_screenshots for regression detection against baselines. This is the " +
724
+ "recommended entry point for visual QA — it chains burst_capture, " +
725
+ "compute_web_stability, and generate_grid_collage in one call.",
726
+ rawContent: true,
727
+ inputSchema: {
728
+ type: "object",
729
+ properties: {
730
+ url: {
731
+ type: "string",
732
+ description: "URL to run visual QA against",
733
+ },
734
+ label: {
735
+ type: "string",
736
+ description: "Label for this QA run (e.g. 'cinematic-home', 'dashboard')",
737
+ },
738
+ frameCount: {
739
+ type: "number",
740
+ description: "Number of frames (default: 10)",
741
+ },
742
+ intervalMs: {
743
+ type: "number",
744
+ description: "Interval between frames in ms (default: 50)",
745
+ },
746
+ viewport: {
747
+ type: "string",
748
+ enum: ["mobile", "tablet", "desktop", "wide"],
749
+ description: "Viewport preset (default: desktop)",
750
+ },
751
+ preAction: {
752
+ type: "object",
753
+ properties: {
754
+ type: { type: "string", enum: ["click", "scroll", "navigate", "wait"] },
755
+ target: { type: "string" },
756
+ value: { type: "string" },
757
+ },
758
+ description: "Action to perform before burst starts",
759
+ },
760
+ settleMs: {
761
+ type: "number",
762
+ description: "Wait time after page load (default: 500)",
763
+ },
764
+ ssimThreshold: {
765
+ type: "number",
766
+ description: "SSIM threshold for jank detection (default: 0.95)",
767
+ },
768
+ reducedMotion: {
769
+ type: "boolean",
770
+ description: "Set prefers-reduced-motion:reduce (default: false)",
771
+ },
772
+ clearCache: {
773
+ type: "boolean",
774
+ description: "Clear browser cache/cookies/localStorage before navigating (default: false). " +
775
+ "Simulates cold first-load for detecting loading jank.",
776
+ },
777
+ waitUntil: {
778
+ type: "string",
779
+ enum: ["networkidle", "commit", "domcontentloaded", "load"],
780
+ description: "Navigation wait strategy (default: networkidle). Use 'commit' for cold-load analysis.",
781
+ },
782
+ collageColumns: {
783
+ type: "number",
784
+ description: "Grid columns for collage (default: 5)",
785
+ },
786
+ },
787
+ required: ["url"],
788
+ },
789
+ handler: async (args) => {
790
+ const pw = await getPlaywright();
791
+ if (!pw) {
792
+ return [
793
+ {
794
+ type: "text",
795
+ text: JSON.stringify({
796
+ error: true,
797
+ message: "Playwright is not installed. Run: npm install playwright && npx playwright install chromium",
798
+ }),
799
+ },
800
+ ];
801
+ }
802
+ const sharpMod = await getSharp();
803
+ if (!sharpMod) {
804
+ return [
805
+ {
806
+ type: "text",
807
+ text: JSON.stringify({
808
+ error: true,
809
+ message: "sharp is not installed. Run: npm install sharp",
810
+ }),
811
+ },
812
+ ];
813
+ }
814
+ // Step 1: Burst capture (inline — reuse logic from burst_capture)
815
+ const frameCount = Math.min(Math.max(args.frameCount ?? 10, 2), 30);
816
+ const intervalMs = Math.max(args.intervalMs ?? 50, 16);
817
+ const settleMs = args.settleMs ?? 500;
818
+ const viewportName = args.viewport ?? "desktop";
819
+ const viewportSize = VIEWPORT_PRESETS[viewportName];
820
+ const threshold = args.ssimThreshold ?? 0.95;
821
+ const waitUntil = args.waitUntil ?? "networkidle";
822
+ if (!viewportSize) {
823
+ return [
824
+ {
825
+ type: "text",
826
+ text: JSON.stringify({
827
+ error: true,
828
+ message: `Unknown viewport: ${viewportName}`,
829
+ }),
830
+ },
831
+ ];
832
+ }
833
+ const burstDir = ensureBurstDir(args.label ?? "suite");
834
+ const framePaths = [];
835
+ const timestamps = [];
836
+ let browser;
837
+ try {
838
+ browser = await pw.chromium.launch({ headless: true });
839
+ const contextOptions = { viewport: viewportSize };
840
+ if (args.reducedMotion) {
841
+ contextOptions.reducedMotion = "reduce";
842
+ }
843
+ const context = await browser.newContext(contextOptions);
844
+ // Clear cache if requested (simulate cold first-load)
845
+ if (args.clearCache) {
846
+ await context.clearCookies();
847
+ const page0 = await context.newPage();
848
+ try {
849
+ const cdp = await page0.context().newCDPSession(page0);
850
+ await cdp.send("Network.clearBrowserCache");
851
+ await cdp.send("Network.clearBrowserCookies");
852
+ await page0.evaluate(() => {
853
+ try {
854
+ localStorage.clear();
855
+ }
856
+ catch { }
857
+ try {
858
+ sessionStorage.clear();
859
+ }
860
+ catch { }
861
+ try {
862
+ const w = globalThis;
863
+ if (w.caches)
864
+ w.caches.keys().then((keys) => keys.forEach((k) => w.caches.delete(k)));
865
+ }
866
+ catch { }
867
+ });
868
+ }
869
+ catch { /* CDP best-effort */ }
870
+ await page0.close();
871
+ }
872
+ const page = await context.newPage();
873
+ await page.goto(args.url, {
874
+ waitUntil: waitUntil,
875
+ timeout: 30000,
876
+ });
877
+ if (settleMs > 0)
878
+ await page.waitForTimeout(settleMs);
879
+ if (args.preAction) {
880
+ const pa = args.preAction;
881
+ switch (pa.type) {
882
+ case "click":
883
+ if (pa.target)
884
+ await page.click(pa.target, { timeout: 5000 });
885
+ break;
886
+ case "scroll":
887
+ await page.evaluate((amount) => globalThis.scrollBy(0, amount), parseInt(pa.value ?? "500", 10));
888
+ break;
889
+ case "navigate":
890
+ if (pa.target)
891
+ await page.goto(pa.target, {
892
+ waitUntil: "networkidle",
893
+ timeout: 15000,
894
+ });
895
+ break;
896
+ case "wait":
897
+ await page.waitForTimeout(parseInt(pa.value ?? "1000", 10));
898
+ break;
899
+ }
900
+ await page.waitForTimeout(100);
901
+ }
902
+ for (let i = 0; i < frameCount; i++) {
903
+ const framePath = join(burstDir, `frame_${String(i).padStart(3, "0")}.png`);
904
+ timestamps.push(Date.now());
905
+ await page.screenshot({ path: framePath, fullPage: false });
906
+ framePaths.push(framePath);
907
+ if (i < frameCount - 1) {
908
+ await page.waitForTimeout(intervalMs);
909
+ }
910
+ }
911
+ await browser.close();
912
+ browser = null;
913
+ // Step 2: SSIM stability analysis
914
+ const firstMeta = await sharpMod(framePaths[0]).metadata();
915
+ const width = firstMeta.width;
916
+ const height = firstMeta.height;
917
+ const luminances = [];
918
+ for (const p of framePaths) {
919
+ const raw = await sharpMod(p)
920
+ .resize(width, height, { fit: "fill" })
921
+ .raw()
922
+ .ensureAlpha()
923
+ .toBuffer();
924
+ luminances.push(toLuminance(raw, width, height));
925
+ }
926
+ const ssimScores = [];
927
+ for (let i = 0; i < luminances.length - 1; i++) {
928
+ ssimScores.push(Math.round(computeSSIM(luminances[i], luminances[i + 1], width, height) *
929
+ 10000) / 10000);
930
+ }
931
+ const jankFrames = [];
932
+ for (let i = 0; i < ssimScores.length; i++) {
933
+ if (ssimScores[i] < threshold)
934
+ jankFrames.push(i);
935
+ }
936
+ const meanSsim = ssimScores.reduce((a, b) => a + b, 0) / ssimScores.length;
937
+ const minSsim = Math.min(...ssimScores);
938
+ const jankRatio = jankFrames.length / ssimScores.length;
939
+ const score = Math.round(100 * (1 - jankRatio) * meanSsim);
940
+ const grade = stabilityGrade(score);
941
+ // Frame timing
942
+ const frameDeltasMs = [];
943
+ for (let i = 0; i < timestamps.length - 1; i++) {
944
+ frameDeltasMs.push(timestamps[i + 1] - timestamps[i]);
945
+ }
946
+ const meanDelta = frameDeltasMs.length > 0
947
+ ? frameDeltasMs.reduce((a, b) => a + b, 0) / frameDeltasMs.length
948
+ : 50;
949
+ const effectiveFps = meanDelta > 0 ? Math.round((1000 / meanDelta) * 10) / 10 : 0;
950
+ // Persist to DB
951
+ try {
952
+ ensureVisualQaTable();
953
+ const db = getDb();
954
+ const id = genId("vqa");
955
+ db.prepare(`INSERT INTO visual_qa_runs (id, url, label, viewport, frame_count, interval_ms,
956
+ stability_score, stability_grade, mean_ssim, jank_count, effective_fps, metadata)
957
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(id, args.url, args.label ?? null, viewportName, frameCount, Math.round(meanDelta), score, grade, Math.round(meanSsim * 10000) / 10000, jankFrames.length, effectiveFps, JSON.stringify({ ssimScores, jankFrames, frameDeltasMs }));
958
+ }
959
+ catch {
960
+ /* best-effort */
961
+ }
962
+ // Step 3: Grid collage
963
+ const cols = args.collageColumns ?? 5;
964
+ const cellWidth = 384;
965
+ const aspectRatio = height / width;
966
+ const cellHeight = Math.round(cellWidth * aspectRatio);
967
+ const gridRows = Math.ceil(framePaths.length / cols);
968
+ const canvasWidth = cols * cellWidth;
969
+ const canvasHeight = gridRows * cellHeight;
970
+ const highlights = new Set(jankFrames);
971
+ const resizedBuffers = [];
972
+ for (const p of framePaths) {
973
+ resizedBuffers.push(await sharpMod(p)
974
+ .resize(cellWidth, cellHeight, { fit: "fill" })
975
+ .png()
976
+ .toBuffer());
977
+ }
978
+ let svgParts = [];
979
+ const t0 = timestamps[0];
980
+ for (let i = 0; i < framePaths.length; i++) {
981
+ const col = i % cols;
982
+ const row = Math.floor(i / cols);
983
+ const x = col * cellWidth;
984
+ const y = row * cellHeight;
985
+ const delta = timestamps[i] - t0;
986
+ svgParts.push(`<rect x="${x}" y="${y}" width="90" height="22" fill="rgba(0,0,0,0.7)" rx="3"/>`, `<text x="${x + 4}" y="${y + 16}" font-family="monospace" font-size="13" fill="white">#${i} +${delta}ms</text>`);
987
+ if (highlights.has(i)) {
988
+ svgParts.push(`<rect x="${x + 1}" y="${y + 1}" width="${cellWidth - 2}" height="${cellHeight - 2}" fill="none" stroke="red" stroke-width="4" rx="2"/>`);
989
+ }
990
+ // Show SSIM score on frames that have one (all except last)
991
+ if (i < ssimScores.length) {
992
+ const ssimColor = ssimScores[i] < threshold ? "#ff4444" : "#44ff44";
993
+ svgParts.push(`<rect x="${x}" y="${y + 22}" width="80" height="18" fill="rgba(0,0,0,0.7)" rx="3"/>`, `<text x="${x + 4}" y="${y + 36}" font-family="monospace" font-size="11" fill="${ssimColor}">SSIM:${ssimScores[i].toFixed(3)}</text>`);
994
+ }
995
+ }
996
+ const overlaySvg = `<svg width="${canvasWidth}" height="${canvasHeight}" xmlns="http://www.w3.org/2000/svg">${svgParts.join("")}</svg>`;
997
+ const compositeInputs = resizedBuffers.map((buf, i) => ({
998
+ input: buf,
999
+ left: (i % cols) * cellWidth,
1000
+ top: Math.floor(i / cols) * cellHeight,
1001
+ }));
1002
+ compositeInputs.push({
1003
+ input: Buffer.from(overlaySvg),
1004
+ left: 0,
1005
+ top: 0,
1006
+ });
1007
+ const collage = await sharpMod({
1008
+ create: {
1009
+ width: canvasWidth,
1010
+ height: canvasHeight,
1011
+ channels: 4,
1012
+ background: { r: 30, g: 30, b: 30, alpha: 1 },
1013
+ },
1014
+ })
1015
+ .composite(compositeInputs)
1016
+ .png()
1017
+ .toBuffer();
1018
+ // Save collage
1019
+ const captureDir = join(homedir(), ".nodebench", "captures");
1020
+ mkdirSync(captureDir, { recursive: true });
1021
+ const slug = (args.label ?? "suite")
1022
+ .replace(/[^a-zA-Z0-9-_]/g, "-")
1023
+ .toLowerCase();
1024
+ const ts = new Date().toISOString().replace(/[:.]/g, "-");
1025
+ const collagePath = join(captureDir, `${slug}_qa_${ts}.png`);
1026
+ await sharpMod(collage).toFile(collagePath);
1027
+ const base64 = collage.toString("base64");
1028
+ const summary = jankFrames.length === 0
1029
+ ? `Visual QA: ${grade} (${score}/100). All ${ssimScores.length} transitions smooth (mean SSIM ${meanSsim.toFixed(4)}). No jank detected.`
1030
+ : `Visual QA: ${grade} (${score}/100). ${jankFrames.length} jank frame(s) at [${jankFrames.join(", ")}]. ` +
1031
+ `Min SSIM: ${minSsim.toFixed(4)}, Mean: ${meanSsim.toFixed(4)}. ` +
1032
+ `Red-bordered frames in collage below show where instability occurred.`;
1033
+ const content = [
1034
+ {
1035
+ type: "text",
1036
+ text: JSON.stringify({
1037
+ stabilityScore: score,
1038
+ stabilityGrade: grade,
1039
+ meanSsim: Math.round(meanSsim * 10000) / 10000,
1040
+ minSsim: Math.round(minSsim * 10000) / 10000,
1041
+ jankFrames,
1042
+ jankCount: jankFrames.length,
1043
+ effectiveFps,
1044
+ ssimScores,
1045
+ frameDeltasMs,
1046
+ threshold,
1047
+ frameCount,
1048
+ burstDir,
1049
+ collagePath,
1050
+ url: args.url,
1051
+ viewport: viewportName,
1052
+ summary,
1053
+ instruction: jankFrames.length > 0
1054
+ ? "Jank detected. Inspect the red-bordered frames in the collage below. " +
1055
+ "Consider running with reducedMotion:true to compare, or use " +
1056
+ "analyze_screenshot on the collage for AI analysis of the visual issues."
1057
+ : "No jank detected. Save the collage as a baseline for future regression detection " +
1058
+ "using diff_screenshots.",
1059
+ }),
1060
+ },
1061
+ { type: "image", data: base64, mimeType: "image/png" },
1062
+ ];
1063
+ return content;
1064
+ }
1065
+ catch (err) {
1066
+ if (browser) {
1067
+ try {
1068
+ await browser.close();
1069
+ }
1070
+ catch {
1071
+ /* ignore */
1072
+ }
1073
+ }
1074
+ return [
1075
+ {
1076
+ type: "text",
1077
+ text: JSON.stringify({
1078
+ error: true,
1079
+ message: `Visual QA suite failed: ${err.message}`,
1080
+ url: args.url,
1081
+ }),
1082
+ },
1083
+ ];
1084
+ }
1085
+ },
1086
+ },
1087
+ ];
1088
+ //# sourceMappingURL=visualQaTools.js.map