@matware/e2e-runner 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/mcp-tools.js CHANGED
@@ -23,11 +23,12 @@ import { lookupScreenshotHash, ensureProject, computeScreenshotHash, registerScr
23
23
  import { fetchIssue, checkCliAuth, detectProvider } from './issues.js';
24
24
  import { buildPrompt, hasApiKey, generateHindsightHint } from './ai-generate.js';
25
25
  import { verifyIssue } from './verify.js';
26
- import { listModules } from './module-resolver.js';
26
+ import { listModules, loadModuleRegistry } from './module-resolver.js';
27
27
  import { getLearningsSummary, getFlakySummary, getSelectorStability, getPageHealth, getApiHealth, getErrorPatterns, getTestTrends, getRunInsights, getTestHistory, getPageHistory, getSelectorHistory, getHealthSnapshot, getTestCreationContext, generateImprovements, getActionHealthScores } from './learner-sqlite.js';
28
28
  import { queryGraph } from './learner-neo4j.js';
29
29
  import { startNeo4j, stopNeo4j, getNeo4jStatus } from './neo4j-pool.js';
30
30
  import { getAppPoolStatus, isAppPoolEnabled } from './app-pool.js';
31
+ import { looksLikeBlankCapture } from './actions.js';
31
32
 
32
33
  /**
33
34
  * Resolves auth token from config: uses static authToken if set,
@@ -103,7 +104,7 @@ export const TOOLS = [
103
104
  },
104
105
  cwd: {
105
106
  type: 'string',
106
- description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
107
+ description: 'Project root directory (defaults to the current working directory).',
107
108
  },
108
109
  },
109
110
  },
@@ -117,7 +118,7 @@ export const TOOLS = [
117
118
  properties: {
118
119
  cwd: {
119
120
  type: 'string',
120
- description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
121
+ description: 'Project root directory (defaults to the current working directory).',
121
122
  },
122
123
  },
123
124
  },
@@ -125,66 +126,19 @@ export const TOOLS = [
125
126
  {
126
127
  name: 'e2e_create_test',
127
128
  description:
128
- `Create a new E2E test JSON file. IMPORTANT: prefer built-in actions over evaluate blocks.
129
-
130
- ## Action selection guide (use instead of evaluate)
131
-
132
- **Clicking elements by text** DON'T write evaluate to find+click elements:
133
- click: { type: "click", text: "Submit" } — searches button, a, [role=tab], span, etc.
134
- click_regex: { type: "click_regex", text: "save|guardar" } — regex match, case-insensitive
135
- click_menu_item: { type: "click_menu_item", text: "Delete" } — [role=menuitem], .MenuItem, etc.
136
- click_option: { type: "click_option", text: "Option A" } — [role=option] in dropdowns
137
- click_chip: { type: "click_chip", text: "Active" } — MUI Chip / tag elements
138
- click_icon: { type: "click_icon", value: "edit" } — SVG/icon by data-testid, aria-label, class
139
- click_in_context:{ type: "click_in_context", text: "Row text", selector: "button" } — child within container
140
- click (in dialog):{ type: "click", text: "Confirm", scope: "dialog", last: true } only [role=dialog]/MuiDialog; visible:true skips hidden; last:true picks last match
141
-
142
- **Selecting from a MUI Autocomplete/Select** — DON'T write evaluate to open+filter+pick:
143
- select_combobox: { type: "select_combobox", selector: "input[role='combobox']", filter: "cardio", text: "Cardiología" }
144
- — opens the combobox, types optional filter, clicks the matching option (role=option / MuiAutocomplete-option / MuiMenuItem)
145
-
146
- **Asserting text presence/absence** — DON'T write evaluate with body.includes():
147
- assert_text: { type: "assert_text", text: "Welcome" } — text IS on page (case-sensitive). Uses: text
148
- assert_no_text: { type: "assert_no_text", text: "Error" } — text is NOT on page. Uses: text
149
- assert_text_in: { type: "assert_text_in", selector: "[class*='Drawer']", text: "profesional|doctor" }
150
- — scoped regex in container (case-insensitive default). Uses: selector + text (+ value:"exact")
151
-
152
- **Asserting elements** — DON'T write evaluate to count or check visibility:
153
- assert_visible: { type: "assert_visible", selector: ".modal" } — Uses: selector (NOT text)
154
- assert_not_visible: { type: "assert_not_visible", selector: ".loader" } — Uses: selector (NOT text)
155
- assert_count: { type: "assert_count", selector: "input", value: ">= 2" } — Uses: selector + value
156
- assert_element_text: { type: "assert_element_text", selector: "h1", text: "Dashboard" } — Uses: selector + text
157
- assert_matches: { type: "assert_matches", selector: ".date", value: "\\\\d{2}/\\\\d{2}" } — Uses: selector + value (regex)
158
- assert_attribute: { type: "assert_attribute", selector: "button", value: "disabled" } — Uses: selector + value
159
- assert_url: { type: "assert_url", value: "/dashboard" } — Uses: value
160
- assert_input_value: { type: "assert_input_value", selector: "#email", value: "@" } — Uses: selector + value
161
-
162
- IMPORTANT field rules:
163
- - assert_text / assert_no_text: use "text" field only (checks full page body)
164
- - assert_visible / assert_not_visible: use "selector" field only (CSS selector, NOT text)
165
- - To verify text absence: use assert_no_text (NOT assert_not_visible with text)
166
-
167
- **Navigation & waiting** — DON'T write evaluate with setTimeout polling:
168
- goto: { type: "goto", value: "/login" } — full page navigation
169
- navigate: { type: "navigate", value: "/settings" } — SPA-friendly (won't fail if no page load)
170
- wait: { type: "wait", text: "Loading complete" } — wait for text to appear in body
171
- wait: { type: "wait", selector: ".results" } — wait for element to appear
172
- wait (gone): { type: "wait", gone: ".MuiBackdrop-root" } — wait until a selector disappears/hides (spinner, closing dialog)
173
- wait: { type: "wait", value: "2000" } — fixed delay (last resort — prefer gone/selector/text)
174
- wait_network_idle: { type: "wait_network_idle", value: "500" } — wait until no network for N ms
175
-
176
- **Form interaction** — DON'T write evaluate with native value setters (unless React):
177
- type: { type: "type", selector: "#email", value: "a@b.com" } — clears + types
178
- type_react: { type: "type_react", selector: "#email", value: "a@b.com", waitAfter: "400" } — React controlled inputs; optional blur:true / waitAfter ms
179
- select: { type: "select", selector: "select#country", value: "US" }
180
- clear: { type: "clear", selector: "#search" }
181
- press: { type: "press", value: "Enter" }
182
- focus_autocomplete: { type: "focus_autocomplete", text: "City" } — focus MUI Autocomplete by label
183
-
184
- **When evaluate IS appropriate**: computed styles, complex conditional logic, GraphQL via window.__e2eGql, math calculations, reading window/app state.
185
-
186
- ## Modules
187
- Use { "$use": "module-name", "params": {...} } to reference reusable modules from e2e/modules/. Modules compose — a module can $use other modules. Check e2e_list to see available modules for the project.`,
129
+ `Create a new E2E test JSON file. Prefer built-in actions over evaluate — more robust and readable. Full catalog: the e2e-testing skill / references/action-types.md.
130
+
131
+ Action cheat-sheet:
132
+ - Click: click (by text), click_regex, click_menu_item, click_option, click_chip, click_icon, click_in_context; in a dialog use click with scope:"dialog" (+ last/visible).
133
+ - Select (MUI): select_combobox (open+optional filter+pick), select, focus_autocomplete.
134
+ - Assert text: assert_text (present), assert_no_text (absent), assert_text_in (scoped regex), assert_element_text, assert_matches.
135
+ - Assert elements (selector, NOT text): assert_visible, assert_not_visible, assert_count, assert_attribute, assert_input_value, assert_url.
136
+ - Nav/wait: goto, navigate (SPA), wait {text|selector|gone|value(ms)}, wait_network_idle.
137
+ - Form: type, type_react (React inputs; optional blur/waitAfter), clear, press.
138
+
139
+ Field rules: assert_text/assert_no_text use "text" (whole page); assert_visible/assert_not_visible use "selector"; for text absence use assert_no_text. Use evaluate only for computed styles, complex logic, GraphQL (window.__e2eGql), or app state.
140
+
141
+ Modules: { "$use": "module-name", "params": {...} } references reusable modules in e2e/modules/ (they compose). Run e2e_list to see available modules.`,
188
142
  inputSchema: {
189
143
  type: 'object',
190
144
  properties: {
@@ -239,7 +193,7 @@ Use { "$use": "module-name", "params": {...} } to reference reusable modules fro
239
193
  },
240
194
  cwd: {
241
195
  type: 'string',
242
- description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
196
+ description: 'Project root directory (defaults to the current working directory).',
243
197
  },
244
198
  },
245
199
  required: ['name', 'tests'],
@@ -254,7 +208,7 @@ Use { "$use": "module-name", "params": {...} } to reference reusable modules fro
254
208
  properties: {
255
209
  cwd: {
256
210
  type: 'string',
257
- description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
211
+ description: 'Project root directory (defaults to the current working directory).',
258
212
  },
259
213
  },
260
214
  },
@@ -301,7 +255,7 @@ Use { "$use": "module-name", "params": {...} } to reference reusable modules fro
301
255
  },
302
256
  cwd: {
303
257
  type: 'string',
304
- description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
258
+ description: 'Project root directory (defaults to the current working directory).',
305
259
  },
306
260
  },
307
261
  },
@@ -327,7 +281,7 @@ Use { "$use": "module-name", "params": {...} } to reference reusable modules fro
327
281
  },
328
282
  cwd: {
329
283
  type: 'string',
330
- description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
284
+ description: 'Project root directory (defaults to the current working directory).',
331
285
  },
332
286
  },
333
287
  },
@@ -364,7 +318,7 @@ Use { "$use": "module-name", "params": {...} } to reference reusable modules fro
364
318
  },
365
319
  cwd: {
366
320
  type: 'string',
367
- description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
321
+ description: 'Project root directory (defaults to the current working directory).',
368
322
  },
369
323
  },
370
324
  required: ['url'],
@@ -412,7 +366,7 @@ Use { "$use": "module-name", "params": {...} } to reference reusable modules fro
412
366
  },
413
367
  cwd: {
414
368
  type: 'string',
415
- description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
369
+ description: 'Project root directory (defaults to the current working directory).',
416
370
  },
417
371
  },
418
372
  required: ['url'],
@@ -464,7 +418,7 @@ Use { "$use": "module-name", "params": {...} } to reference reusable modules fro
464
418
  },
465
419
  cwd: {
466
420
  type: 'string',
467
- description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
421
+ description: 'Project root directory (defaults to the current working directory).',
468
422
  },
469
423
  },
470
424
  required: ['url'],
@@ -539,7 +493,7 @@ Good module candidates: auth setup, page navigation, tab clicking, opening sideb
539
493
  },
540
494
  cwd: {
541
495
  type: 'string',
542
- description: 'Absolute path to the project root directory. Claude Code should pass its current working directory.',
496
+ description: 'Project root directory (defaults to the current working directory).',
543
497
  },
544
498
  },
545
499
  required: ['query'],
@@ -1010,6 +964,65 @@ async function handleCreateTest(args) {
1010
964
  }
1011
965
  } catch { /* modules dir may not exist */ }
1012
966
 
967
+ // ── #2/#3: nudge module reuse and flag extractable duplication ──
968
+ try {
969
+ const fullModules = [...loadModuleRegistry(config.modulesDir).values()];
970
+
971
+ // #2 — submitted actions already match an existing module verbatim
972
+ const matches = detectModuleMatches(args.tests, fullModules);
973
+ for (const h of matches) {
974
+ const req = h.params ? Object.entries(h.params).filter(([, d]) => d?.required).map(([n]) => n) : [];
975
+ const paramHint = req.length ? `, "params": { ${req.map(n => `"${n}": ...`).join(', ')} }` : '';
976
+ warnings.push(`♻️ Test "${h.test}" repeats the ${h.len} actions of existing module "${h.module}" inline. ` +
977
+ `Replace them with { "$use": "${h.module}"${paramHint} }.`);
978
+ }
979
+
980
+ // #3 — action sequences duplicated across tests that aren't a module yet
981
+ const matchedTests = new Set(matches.map(m => m.test));
982
+ const testsActions = args.tests
983
+ .filter(t => t.actions && !t.actions.some(a => a && a.$use) && !matchedTests.has(t.name))
984
+ .map(t => ({ name: t.name, sigs: actionsSigList(t.actions) }));
985
+
986
+ // include existing test files (excluding the one just written) for cross-file duplication
987
+ try {
988
+ if (fs.existsSync(config.testsDir)) {
989
+ for (const f of fs.readdirSync(config.testsDir).filter(x => x.endsWith('.json'))) {
990
+ const fp = path.join(config.testsDir, f);
991
+ if (fp === filePath) continue;
992
+ let parsed;
993
+ try { parsed = JSON.parse(fs.readFileSync(fp, 'utf-8')); } catch { continue; }
994
+ const list = Array.isArray(parsed) ? parsed : (parsed.tests || []);
995
+ for (const t of list) {
996
+ if (t && t.actions && !t.actions.some(a => a && a.$use)) {
997
+ testsActions.push({ name: `${f}:${t.name}`, sigs: actionsSigList(t.actions) });
998
+ }
999
+ }
1000
+ }
1001
+ }
1002
+ } catch { /* ignore unreadable test files */ }
1003
+
1004
+ // windows already covered by an existing module → don't re-suggest extracting them
1005
+ const moduleWindowSet = new Set();
1006
+ for (const m of fullModules) {
1007
+ const sigs = actionsSigList((m.actions || []).filter(a => a && !a.$use));
1008
+ for (const w of sigWindows(sigs, 2, 6)) moduleWindowSet.add(w);
1009
+ }
1010
+
1011
+ const extractable = detectExtractableSequences(testsActions, moduleWindowSet);
1012
+ for (const e of extractable) {
1013
+ const sample = e.tests.slice(0, 3).join(', ') + (e.tests.length > 3 ? '…' : '');
1014
+ warnings.push(`🧩 A sequence of ${e.len} actions appears in ${e.count} tests (${sample}) but isn't a module yet. ` +
1015
+ `Consider extracting it with e2e_create_module and referencing it via $use.`);
1016
+ }
1017
+ } catch { /* never fail test creation */ }
1018
+
1019
+ // ── Verification coverage: tests whose outcome is never checked ──
1020
+ try {
1021
+ let registry;
1022
+ try { registry = loadModuleRegistry(config.modulesDir); } catch { registry = new Map(); }
1023
+ warnings.push(...detectUnverifiedTests(args.tests, registry));
1024
+ } catch { /* never fail test creation */ }
1025
+
1013
1026
  const warningBlock = warnings.length > 0 ? '\n\n' + warnings.join('\n\n') : '';
1014
1027
 
1015
1028
  // Enrich with learnings context for smarter test authoring
@@ -1265,6 +1278,135 @@ function analyzeActionPatterns(tests) {
1265
1278
  return warnings;
1266
1279
  }
1267
1280
 
1281
+ // ── Module-reuse detection (#2 exact match, #3 extractable duplication) ──
1282
+
1283
+ // Normalized signature of an action for sequence comparison.
1284
+ function actionSig(a) {
1285
+ if (!a || typeof a !== 'object') return '∅';
1286
+ if (a.$use) return `$use:${a.$use}`;
1287
+ return `${a.type || '?'}|${a.selector || ''}|${a.text || ''}`;
1288
+ }
1289
+ function actionsSigList(actions) {
1290
+ return (actions || []).map(actionSig);
1291
+ }
1292
+ // All contiguous sig windows of length minLen..maxLen, each joined with '»'.
1293
+ function sigWindows(sigs, minLen, maxLen) {
1294
+ const out = [];
1295
+ const top = Math.min(maxLen, sigs.length);
1296
+ for (let len = minLen; len <= top; len++) {
1297
+ for (let i = 0; i + len <= sigs.length; i++) {
1298
+ out.push(sigs.slice(i, i + len).join('»'));
1299
+ }
1300
+ }
1301
+ return out;
1302
+ }
1303
+
1304
+ // #2 — a module field matches a concrete test field; {{param}} placeholders are wildcards.
1305
+ function moduleFieldMatch(modVal, testVal) {
1306
+ if (modVal == null || modVal === '') return true; // module doesn't constrain it
1307
+ if (typeof modVal === 'string' && modVal.includes('{{')) return true; // placeholder → wildcard
1308
+ return modVal === testVal;
1309
+ }
1310
+ function moduleActionMatch(modA, testA) {
1311
+ if (!modA || !testA || modA.$use || testA.$use) return false;
1312
+ return modA.type === testA.type
1313
+ && moduleFieldMatch(modA.selector, testA.selector)
1314
+ && moduleFieldMatch(modA.text, testA.text);
1315
+ }
1316
+ // Find modules whose full leaf-action sequence appears as a contiguous run inside a test
1317
+ // that doesn't already use modules. Returns [{ module, params, test, len }].
1318
+ function detectModuleMatches(tests, fullModules) {
1319
+ const hits = [];
1320
+ for (const mod of fullModules) {
1321
+ const ma = (mod.actions || []).filter(a => a && !a.$use);
1322
+ if (ma.length < 2) continue;
1323
+ for (const test of tests) {
1324
+ const ta = test.actions || [];
1325
+ if (ta.some(a => a && a.$use)) continue; // already modular
1326
+ for (let i = 0; i + ma.length <= ta.length; i++) {
1327
+ let ok = true;
1328
+ for (let j = 0; j < ma.length; j++) {
1329
+ if (!moduleActionMatch(ma[j], ta[i + j])) { ok = false; break; }
1330
+ }
1331
+ if (ok) { hits.push({ module: mod.$module, params: mod.params, test: test.name, len: ma.length }); break; }
1332
+ }
1333
+ }
1334
+ }
1335
+ return hits;
1336
+ }
1337
+ // #3 — contiguous sig windows (len>=minLen) shared by >=2 distinct tests and not already a module.
1338
+ function detectExtractableSequences(testsActions, moduleWindowSet, { minLen = 3, maxLen = 6, cap = 2 } = {}) {
1339
+ const owners = new Map(); // windowKey -> Set(testName)
1340
+ const lenOf = new Map(); // windowKey -> action count
1341
+ for (const t of testsActions) {
1342
+ const seen = new Set();
1343
+ for (const key of sigWindows(t.sigs, minLen, maxLen)) {
1344
+ if (key.includes('$use:') || moduleWindowSet.has(key) || seen.has(key)) continue;
1345
+ seen.add(key);
1346
+ if (!owners.has(key)) { owners.set(key, new Set()); lenOf.set(key, key.split('»').length); }
1347
+ owners.get(key).add(t.name);
1348
+ }
1349
+ }
1350
+ const cands = [];
1351
+ for (const [key, set] of owners) {
1352
+ if (set.size >= 2) cands.push({ key, len: lenOf.get(key), count: set.size, tests: [...set] });
1353
+ }
1354
+ cands.sort((a, b) => b.len - a.len || b.count - a.count); // longest, then most frequent
1355
+ const kept = [];
1356
+ for (const c of cands) {
1357
+ if (kept.some(k => k.key.includes(c.key))) continue; // subsumed by a longer kept window
1358
+ kept.push(c);
1359
+ if (kept.length >= cap) break;
1360
+ }
1361
+ return kept;
1362
+ }
1363
+
1364
+ // Verification coverage — a test whose interactions are never followed by a check
1365
+ // can go green even when the flow silently breaks.
1366
+ function isVerifyingAction(a) {
1367
+ if (/^assert_/.test(a.type || '')) return true;
1368
+ if (a.type === 'evaluate' || a.type === 'gql') return true; // strict semantics / inline assertions
1369
+ if (a.type === 'wait' && (a.selector || a.text || a.gone)) return true; // condition waits fail if unmet
1370
+ return false;
1371
+ }
1372
+ function isInteractingAction(a) {
1373
+ return /^(click|select|type|fill|clear|press|hover|scroll|set_storage|focus_autocomplete|goto|navigate)/.test(a.type || '');
1374
+ }
1375
+ function detectUnverifiedTests(tests, registry) {
1376
+ const out = [];
1377
+ for (const t of tests || []) {
1378
+ if (!t || !Array.isArray(t.actions) || t.expect) continue; // "expect" verifies the end state visually
1379
+ // expand $use so module-provided assertions count
1380
+ const leaf = [];
1381
+ let resolvable = true;
1382
+ for (const a of t.actions) {
1383
+ if (a && a.$use) {
1384
+ const mod = registry.get(a.$use);
1385
+ if (mod?.actions) leaf.push(...mod.actions.filter(x => x && !x.$use));
1386
+ else { resolvable = false; break; }
1387
+ } else if (a) leaf.push(a);
1388
+ }
1389
+ if (!resolvable) continue;
1390
+ let lastInteract = -1;
1391
+ leaf.forEach((a, i) => { if (isInteractingAction(a)) lastInteract = i; });
1392
+ if (lastInteract === -1) continue; // nothing happens — nothing to verify
1393
+ if (!leaf.some(isVerifyingAction)) {
1394
+ out.push(`🔎 Test "${t.name}" has no assertions and no "expect" field — it can pass without verifying anything. ` +
1395
+ `Close with assert_* actions (assert_url, assert_text, assert_visible) or add an "expect" for visual verification.`);
1396
+ } else if (!leaf.slice(lastInteract + 1).some(isVerifyingAction)) {
1397
+ out.push(`🔎 Test "${t.name}" keeps interacting after its last check — the final steps are unverified. ` +
1398
+ `Close with an assert_* action so the end state is what passes the test.`);
1399
+ } else {
1400
+ const tail = leaf.slice(lastInteract + 1).filter(isVerifyingAction);
1401
+ if (tail.length && tail.every(a => a.type === 'assert_text')) {
1402
+ out.push(`📌 Test "${t.name}" closes with page-wide assert_text only — it matches anywhere on the page. ` +
1403
+ `Scope the final check with assert_element_text or assert_text_in.`);
1404
+ }
1405
+ }
1406
+ }
1407
+ return out;
1408
+ }
1409
+
1268
1410
  async function handlePoolStatus(args) {
1269
1411
  const config = await loadConfig({}, args.cwd);
1270
1412
  const poolUrls = getPoolUrls(config);
@@ -1961,7 +2103,19 @@ async function handleAnalyze(args) {
1961
2103
  screenshotBase64 = data.toString('base64');
1962
2104
  }
1963
2105
 
1964
- const result = { meta, ...structure, suggestedTests };
2106
+ // Surface reusable modules at the decision point, so scaffolds can $use them
2107
+ // instead of duplicating action sequences.
2108
+ let availableModules = [];
2109
+ try {
2110
+ availableModules = listModules(config.modulesDir).map(m => ({
2111
+ name: m.name,
2112
+ params: m.params.map(p => (p.required ? p.name : `${p.name}?`)),
2113
+ description: m.description || undefined,
2114
+ hint: `{ "$use": "${m.name}"${m.params.some(p => p.required) ? ', "params": { ... }' : ''} }`,
2115
+ }));
2116
+ } catch { /* modules dir may not exist */ }
2117
+
2118
+ const result = { meta, ...structure, suggestedTests, availableModules };
1965
2119
  const content = [{ type: 'text', text: JSON.stringify(result, null, 2) }];
1966
2120
 
1967
2121
  if (screenshotBase64) {
@@ -2017,7 +2171,18 @@ async function handleCapture(args) {
2017
2171
  }
2018
2172
 
2019
2173
  const screenshotPath = path.join(config.screenshotsDir, filename);
2020
- await page.screenshot({ path: screenshotPath, fullPage: !!args.fullPage });
2174
+ const data = await page.screenshot({ fullPage: !!args.fullPage });
2175
+
2176
+ // Blank frame (uniform color — page never rendered): don't save it,
2177
+ // report what happened instead of returning a useless white PNG.
2178
+ if (looksLikeBlankCapture(data, 'png')) {
2179
+ return {
2180
+ content: [
2181
+ { type: 'text', text: `Capture skipped: ${args.url} rendered a blank (uniform-color) frame — nothing saved. The page likely failed to render (auth redirect, JS error, or slow load); try a longer delay or a selector to wait for.` },
2182
+ ],
2183
+ };
2184
+ }
2185
+ fs.writeFileSync(screenshotPath, data);
2021
2186
 
2022
2187
  // Register hash in SQLite
2023
2188
  const cwd = args.cwd || process.cwd();
@@ -2026,8 +2191,6 @@ async function handleCapture(args) {
2026
2191
  const hash = computeScreenshotHash(screenshotPath);
2027
2192
  registerScreenshotHash(hash, screenshotPath, projectId, null);
2028
2193
 
2029
- // Read image for response
2030
- const data = fs.readFileSync(screenshotPath);
2031
2194
  const base64 = data.toString('base64');
2032
2195
 
2033
2196
  return {
package/src/narrate.js CHANGED
@@ -59,6 +59,12 @@ export function narrateAction(action, result) {
59
59
  case 'assert_text':
60
60
  return `Verified text "${text}" is present on page${time}`;
61
61
 
62
+ case 'assert_no_text':
63
+ return `Verified text "${text}" is NOT present on page${time}`;
64
+
65
+ case 'assert_text_in':
66
+ return `Verified "${selector}" contains text "${text}"${value === 'exact' ? ' (exact)' : ''}${time}`;
67
+
62
68
  case 'assert_url':
63
69
  return `Verified URL contains "${value}"${time}`;
64
70
 
@@ -186,6 +192,15 @@ export function narrateAction(action, result) {
186
192
  return `Visual comparison against "${value}": ${pct}${time}`;
187
193
  }
188
194
 
195
+ case 'gql': {
196
+ const query = (value || '').replace(/\s+/g, ' ').trim();
197
+ const snippet = query.length > 60 ? query.slice(0, 57) + '...' : query;
198
+ return `Executed GraphQL: ${snippet}${selector ? ' (asserted response)' : ''}${time}`;
199
+ }
200
+
201
+ case 'wait_network_idle':
202
+ return `Waited for network idle (${value || 500}ms)${time}`;
203
+
189
204
  case 'open_tab':
190
205
  return `Opened new tab${text ? ` "${text}"` : ''} → ${value}${time}`;
191
206
 
@@ -223,6 +238,8 @@ function describeIntent(action) {
223
238
  return `Wait ${value}ms`;
224
239
  case 'screenshot': return 'Capture screenshot';
225
240
  case 'assert_text': return `Assert text "${text}" present`;
241
+ case 'assert_no_text': return `Assert text "${text}" NOT present`;
242
+ case 'assert_text_in': return `Assert "${selector}" contains "${text}"`;
226
243
  case 'assert_url': return `Assert URL contains "${value}"`;
227
244
  case 'assert_visible': return `Assert "${selector}" visible`;
228
245
  case 'assert_count': return `Assert "${selector}" count = ${value}`;
@@ -259,6 +276,8 @@ function describeIntent(action) {
259
276
  case 'click_in_context': return `Click "${selector}" in context of "${text}"`;
260
277
  case 'evaluate': return 'Execute JS';
261
278
  case 'assert_visual': return `Visual compare against "${value}"`;
279
+ case 'gql': return 'Execute GraphQL query';
280
+ case 'wait_network_idle': return 'Wait for network idle';
262
281
  case 'open_tab': return `Open new tab → ${value}`;
263
282
  case 'switch_tab': return `Switch to tab "${value}"`;
264
283
  case 'close_tab': return `Close tab${value ? ` "${value}"` : ''}`;
package/src/runner.js CHANGED
@@ -9,6 +9,7 @@ import fs from 'fs';
9
9
  import path from 'path';
10
10
  import http from 'http';
11
11
  import https from 'https';
12
+ import crypto from 'crypto';
12
13
  import { connectToPool, getCachedDriver, disconnectFromPool } from './pool.js';
13
14
  import { getPoolUrls, selectPool, releasePending, resolvePoolsForTest } from './pool-manager.js';
14
15
  import { forkAppInstance, destroyFork, isAppPoolEnabled } from './app-pool.js';
@@ -28,9 +29,17 @@ function sleep(ms) {
28
29
  * Captures once in memory, writes to disk AND returns base64 so callers
29
30
  * can stream the same frame through the live preview WebSocket.
30
31
  * Skips silently on any error so it never breaks a test run.
32
+ *
33
+ * Content dedup: when the captured frame is byte-identical to the previous
34
+ * step's frame (tracked per-test via dedupState), reuses the existing file
35
+ * instead of writing a duplicate, and skips re-streaming the live frame.
36
+ *
37
+ * Raw data responses (JSON/plain-text endpoints rendered by Chrome's viewer
38
+ * as a white page with a single <pre>) are NOT screenshotted — the body is
39
+ * saved as a minified .json sidecar instead and returned as { dataPath }.
31
40
  */
32
41
  const NO_AUTO_CAPTURE_TYPES = new Set(['screenshot', 'close_tab']);
33
- async function tryAutoCaptureStep(page, action, idx, testName, effectiveConfig, alreadyCaptured) {
42
+ async function tryAutoCaptureStep(page, action, idx, testName, effectiveConfig, alreadyCaptured, dedupState) {
34
43
  if (!effectiveConfig.autoCaptureSteps) return null;
35
44
  if (NO_AUTO_CAPTURE_TYPES.has(action?.type)) return null;
36
45
  if (alreadyCaptured) return null;
@@ -40,6 +49,31 @@ async function tryAutoCaptureStep(page, action, idx, testName, effectiveConfig,
40
49
  if (!(await pageHasRenderableContent(page))) return null;
41
50
  try {
42
51
  const safeName = String(testName).replace(/[^a-zA-Z0-9_\-. ]/g, '_');
52
+ // Raw JSON/text response? Save the body as data, not as a white JPEG.
53
+ const rawBody = await page.evaluate(() => {
54
+ const ct = document.contentType || '';
55
+ const b = document.body;
56
+ const lonePre = !!(b && b.children.length === 1 && b.children[0].tagName === 'PRE' && b.children[0].children.length === 0);
57
+ if ((ct && ct !== 'text/html') || lonePre) return (b && b.innerText) || '';
58
+ return null;
59
+ }).catch(() => null);
60
+ if (rawBody !== null) {
61
+ let text = rawBody.trim();
62
+ if (!text) return null;
63
+ try { text = JSON.stringify(JSON.parse(text)); } catch { /* not JSON — keep raw text */ }
64
+ const dataBuf = Buffer.from(text, 'utf8');
65
+ const dataHash = crypto.createHash('sha1').update(dataBuf).digest('hex');
66
+ if (dedupState && dedupState.hash === dataHash && dedupState.path) {
67
+ return { dataPath: dedupState.path, deduped: true };
68
+ }
69
+ const dataPath = path.join(effectiveConfig.screenshotsDir, `step-${safeName}-${String(idx).padStart(3, '0')}-${Date.now()}.json`);
70
+ fs.writeFileSync(dataPath, dataBuf);
71
+ if (dedupState) {
72
+ dedupState.hash = dataHash;
73
+ dedupState.path = dataPath;
74
+ }
75
+ return { dataPath };
76
+ }
43
77
  const filename = `step-${safeName}-${String(idx).padStart(3, '0')}-${Date.now()}.jpg`;
44
78
  const filepath = path.join(effectiveConfig.screenshotsDir, filename);
45
79
  const buf = await page.screenshot({
@@ -49,7 +83,16 @@ async function tryAutoCaptureStep(page, action, idx, testName, effectiveConfig,
49
83
  encoding: 'binary',
50
84
  });
51
85
  if (looksLikeBlankCapture(buf, 'jpeg')) return null;
86
+ const contentHash = crypto.createHash('sha1').update(buf).digest('hex');
87
+ if (dedupState && dedupState.hash === contentHash && dedupState.path) {
88
+ // Same frame as the previous step — reuse the file, don't re-stream
89
+ return { path: dedupState.path, base64: null, deduped: true };
90
+ }
52
91
  fs.writeFileSync(filepath, buf);
92
+ if (dedupState) {
93
+ dedupState.hash = contentHash;
94
+ dedupState.path = filepath;
95
+ }
53
96
  return { path: filepath, base64: buf.toString('base64') };
54
97
  } catch {
55
98
  return null;
@@ -356,16 +399,25 @@ export async function runTest(test, config, hooks = {}, progressFn = () => {}) {
356
399
  await executeHookActions(page, hooks.beforeEach, effectiveConfig);
357
400
  }
358
401
 
359
- // Auto-capture baseline screenshot if test has "expect" (BEFORE actions)
402
+ // Auto-capture baseline screenshot if test has "expect" (BEFORE actions).
403
+ // Blank frames (about:blank, white unrendered page) are not saved —
404
+ // they have no comparison value and pollute screenshotsDir.
360
405
  if (test.expect && page) {
361
406
  try {
362
- const safeName = test.name.replace(/[^a-zA-Z0-9_\-. ]/g, '_');
363
- const baselinePath = path.join(effectiveConfig.screenshotsDir, `baseline-${safeName}-${Date.now()}.png`);
364
- await page.screenshot({ path: baselinePath, fullPage: true });
365
- result.baselineScreenshot = baselinePath;
407
+ const baseBuf = await page.screenshot({ fullPage: true });
408
+ if (!looksLikeBlankCapture(baseBuf, 'png')) {
409
+ const safeName = test.name.replace(/[^a-zA-Z0-9_\-. ]/g, '_');
410
+ const baselinePath = path.join(effectiveConfig.screenshotsDir, `baseline-${safeName}-${Date.now()}.png`);
411
+ fs.writeFileSync(baselinePath, baseBuf);
412
+ result.baselineScreenshot = baselinePath;
413
+ }
366
414
  } catch { /* page may not be ready */ }
367
415
  }
368
416
 
417
+ // Tracks the last auto-captured frame (content hash + path) so identical
418
+ // consecutive step screenshots reuse the same file instead of duplicating
419
+ const stepCaptureState = { hash: null, path: null };
420
+
369
421
  for (let i = 0; i < test.actions.length; i++) {
370
422
  const action = test.actions[i];
371
423
  const maxActionRetries = action.retries ?? effectiveConfig.actionRetries ?? 0;
@@ -492,14 +544,15 @@ export async function runTest(test, config, hooks = {}, progressFn = () => {}) {
492
544
  actionResult = await executeAction(page, action, effectiveConfig);
493
545
  }
494
546
  const actionDuration = Date.now() - actionStart;
495
- const autoShot = await tryAutoCaptureStep(page, action, i, test.name, effectiveConfig, !!actionResult?.screenshot);
547
+ const autoShot = await tryAutoCaptureStep(page, action, i, test.name, effectiveConfig, !!actionResult?.screenshot, stepCaptureState);
496
548
  const actionEntry = {
497
549
  ...action,
498
550
  success: true,
499
551
  duration: actionDuration,
500
552
  result: actionResult,
501
553
  };
502
- if (autoShot) actionEntry.autoScreenshot = autoShot.path;
554
+ if (autoShot?.path) actionEntry.autoScreenshot = autoShot.path;
555
+ if (autoShot?.dataPath) actionEntry.dataCapture = autoShot.dataPath;
503
556
  if (attempt > 0) actionEntry.actionRetries = attempt;
504
557
  actionEntry.narrative = narrateAction(action, actionEntry);
505
558
  result.actions.push(actionEntry);
@@ -516,14 +569,15 @@ export async function runTest(test, config, hooks = {}, progressFn = () => {}) {
516
569
  continue;
517
570
  }
518
571
  const actionDuration = Date.now() - actionStart;
519
- const autoShot = await tryAutoCaptureStep(page, action, i, test.name, effectiveConfig, false);
572
+ const autoShot = await tryAutoCaptureStep(page, action, i, test.name, effectiveConfig, false, stepCaptureState);
520
573
  const failedEntry = {
521
574
  ...action,
522
575
  success: false,
523
576
  duration: actionDuration,
524
577
  error: error.message,
525
578
  };
526
- if (autoShot) failedEntry.autoScreenshot = autoShot.path;
579
+ if (autoShot?.path) failedEntry.autoScreenshot = autoShot.path;
580
+ if (autoShot?.dataPath) failedEntry.dataCapture = autoShot.dataPath;
527
581
  if (maxActionRetries > 0) failedEntry.actionRetries = attempt;
528
582
  failedEntry.narrative = narrateAction(action, failedEntry);
529
583
  result.actions.push(failedEntry);
@@ -540,14 +594,18 @@ export async function runTest(test, config, hooks = {}, progressFn = () => {}) {
540
594
  throw new Error(`Network errors detected (failOnNetworkError=true): ${result.networkErrors.length} error(s): ${summary}`);
541
595
  }
542
596
 
543
- // Auto-capture verification screenshot if test has "expect"
597
+ // Auto-capture verification screenshot if test has "expect".
598
+ // Blank frames are skipped (not saved) — same guard as the baseline.
544
599
  if (test.expect && page) {
545
600
  result.expect = test.expect;
546
601
  try {
547
602
  const safeName = test.name.replace(/[^a-zA-Z0-9_\-. ]/g, '_');
548
- const verifyPath = path.join(effectiveConfig.screenshotsDir, `verify-${safeName}-${Date.now()}.png`);
549
- await page.screenshot({ path: verifyPath, fullPage: true });
550
- result.verificationScreenshot = verifyPath;
603
+ const verifyBuf = await page.screenshot({ fullPage: true });
604
+ if (!looksLikeBlankCapture(verifyBuf, 'png')) {
605
+ const verifyPath = path.join(effectiveConfig.screenshotsDir, `verify-${safeName}-${Date.now()}.png`);
606
+ fs.writeFileSync(verifyPath, verifyBuf);
607
+ result.verificationScreenshot = verifyPath;
608
+ }
551
609
 
552
610
  // Auto visual comparison: compare baseline vs verification screenshot
553
611
  if (result.baselineScreenshot && result.verificationScreenshot) {