donobu 5.41.3 → 5.41.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -150,6 +150,7 @@ class PageAi {
150
150
  async ai(page, instruction, options) {
151
151
  const startedAt = Date.now();
152
152
  let cacheHit = false;
153
+ let cacheStored = false;
153
154
  let thrownError = undefined;
154
155
  try {
155
156
  const descriptor = this.buildDescriptor(page, instruction, options);
@@ -197,6 +198,7 @@ class PageAi {
197
198
  }, this.donobu.toolRegistry);
198
199
  const cacheEntry = cacheEntryBuilder_1.PageAiCacheEntryBuilder.fromMetadata(descriptor.key.pageUrl, runResult.donobuFlow.metadata, preparedToolCalls);
199
200
  await this.cache.put(cacheEntry);
201
+ cacheStored = true;
200
202
  }
201
203
  return runResult.parsedResult;
202
204
  }
@@ -212,6 +214,7 @@ class PageAi {
212
214
  startedAt,
213
215
  endedAt: Date.now(),
214
216
  cacheHit,
217
+ cacheStored,
215
218
  passed: thrownError === undefined,
216
219
  error: thrownError !== undefined
217
220
  ? { message: thrownError?.message }
@@ -488,10 +488,40 @@ export interface AiInvocationRecord {
488
488
  startedAt: number;
489
489
  endedAt: number;
490
490
  cacheHit: boolean;
491
+ /**
492
+ * For live (non-replay) invocations: `true` once this run successfully
493
+ * wrote an entry into the relevant page-AI cache, `false` if a write was
494
+ * attempted (or would have been) but didn't land. Combined with
495
+ * `cacheHit`, this gives the reporter a tri-state cache outcome — hit
496
+ * (replayed), stored (live + recorded), or miss (live + nothing cached).
497
+ * Always `false` when `cacheHit` is `true`.
498
+ */
499
+ cacheStored: boolean;
491
500
  passed: boolean;
492
501
  error?: {
493
502
  message?: string;
494
503
  };
504
+ /**
505
+ * For live `page.ai.assert` runs: metadata about the post-pass structured
506
+ * step verification. After the AI judges the assertion passed against a
507
+ * screenshot, AssertTool re-executes the AI-emitted Playwright `expect()`
508
+ * calls against the page to decide whether those structured steps are
509
+ * cache-worthy. When `failed: true`, the AI's visual verdict still stands
510
+ * — the tool returns success — but one of the structured `expect()` calls
511
+ * underneath threw. The reporter uses this to surface the divergence as a
512
+ * labelled signal rather than render the inner expect failure as a regular
513
+ * assertion failure.
514
+ *
515
+ * Undefined when verification didn't run (no structured steps emitted, AI
516
+ * verdict was failed, cached replay path, or AssertTool invoked outside
517
+ * the page.ai.assert wrapper).
518
+ */
519
+ verification?: {
520
+ startedAt: number;
521
+ endedAt: number;
522
+ failed: boolean;
523
+ errorMessage?: string;
524
+ };
495
525
  /**
496
526
  * For cached `page.ai.assert` invocations: the structured Playwright
497
527
  * assertion steps that were replayed. The reporter formats these back
@@ -220,8 +220,10 @@ Valid options:
220
220
  assert: async (assertion, options) => {
221
221
  const aiInvocationStartedAt = Date.now();
222
222
  let aiInvocationCacheHit = false;
223
+ let aiInvocationCacheStored = false;
223
224
  let aiInvocationError = undefined;
224
225
  let aiInvocationAssertSteps;
226
+ let aiInvocationVerification;
225
227
  try {
226
228
  const useCache = options?.cache !== false;
227
229
  const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
@@ -322,6 +324,7 @@ Valid options:
322
324
  finally {
323
325
  sharedState.envVals = previousEnvVals;
324
326
  }
327
+ aiInvocationVerification = result.outcome.metadata?.verification;
325
328
  if (!result.outcome.isSuccessful) {
326
329
  throw new ToolCallFailedException_1.ToolCallFailedException(AssertTool_1.AssertTool.NAME, result.outcome);
327
330
  }
@@ -333,6 +336,7 @@ Valid options:
333
336
  const cache = getOrInitPageAiCache();
334
337
  const pageUrl = (0, cacheLocator_1.extractCacheKeyHostname)(page.url());
335
338
  await cache.putAssert({ pageUrl, assertion, steps });
339
+ aiInvocationCacheStored = true;
336
340
  Logger_1.appLogger.debug(`Assert cache STORED for: "${assertion}"`);
337
341
  }
338
342
  catch (error) {
@@ -352,11 +356,13 @@ Valid options:
352
356
  startedAt: aiInvocationStartedAt,
353
357
  endedAt: Date.now(),
354
358
  cacheHit: aiInvocationCacheHit,
359
+ cacheStored: aiInvocationCacheStored,
355
360
  passed: aiInvocationError === undefined,
356
361
  error: aiInvocationError !== undefined
357
362
  ? { message: aiInvocationError?.message }
358
363
  : undefined,
359
364
  assertSteps: aiInvocationAssertSteps,
365
+ verification: aiInvocationVerification,
360
366
  });
361
367
  }
362
368
  },
@@ -434,6 +440,7 @@ Use this information to return an appropriate JSON object.`,
434
440
  locate: async (description, options) => {
435
441
  const aiInvocationStartedAt = Date.now();
436
442
  let aiInvocationCacheHit = false;
443
+ let aiInvocationCacheStored = false;
437
444
  let aiInvocationError = undefined;
438
445
  const useCache = options?.cache !== false;
439
446
  const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
@@ -525,6 +532,7 @@ Use this information to return an appropriate JSON object.`,
525
532
  try {
526
533
  const cache = getOrInitPageAiCache();
527
534
  await cache.putLocate({ pageUrl, description, result });
535
+ aiInvocationCacheStored = true;
528
536
  Logger_1.appLogger.debug(`Locate cache STORED for: "${description}"`);
529
537
  }
530
538
  catch (error) {
@@ -545,6 +553,7 @@ Use this information to return an appropriate JSON object.`,
545
553
  startedAt: aiInvocationStartedAt,
546
554
  endedAt: Date.now(),
547
555
  cacheHit: aiInvocationCacheHit,
556
+ cacheStored: aiInvocationCacheStored,
548
557
  passed: aiInvocationError === undefined,
549
558
  error: aiInvocationError !== undefined
550
559
  ? { message: aiInvocationError?.message }
@@ -577,18 +577,42 @@ function renderErrors(errors) {
577
577
  }
578
578
  return html;
579
579
  }
580
- function renderNativeStep(ns, childrenHtml) {
581
- const statusIcon = ns.passed
582
- ? '<span class="step-status-ok">&#10003;</span>'
583
- : '<span class="step-status-fail">&#10007;</span>';
584
- const categoryBadge = `<span class="native-step-badge native-step-badge--${ns.category}">${esc(ns.category)}</span>`;
580
+ function renderNativeStep(ns, childrenHtml, verifyContext = false) {
581
+ // Expects inside an assert tool's cache-worthiness verification window are
582
+ // not real assertion checks — they're AssertTool re-running its own
583
+ // AI-emitted structured `expect()` calls to decide whether to cache them.
584
+ // When one fails, the AI's screenshot-based verdict still stands; only the
585
+ // structured locator faithfulness is in question. Render those with a
586
+ // distinct status (passed → "verified", failed → "diverged") so they
587
+ // don't look like assertion failures sitting under a passing assertion.
588
+ const statusIcon = verifyContext
589
+ ? ns.passed
590
+ ? '<span class="step-status-verified" title="Cache-verify check passed">&#10003;</span>'
591
+ : '<span class="step-status-diverged" title="Cache-verify locator did not match the AI&#39;s visual verdict">&#10073;</span>'
592
+ : ns.passed
593
+ ? '<span class="step-status-ok">&#10003;</span>'
594
+ : '<span class="step-status-fail">&#10007;</span>';
595
+ const categoryLabel = verifyContext
596
+ ? ns.passed
597
+ ? 'verify-cache'
598
+ : 'verify-cache diverged'
599
+ : ns.category;
600
+ const categoryClass = verifyContext
601
+ ? ns.passed
602
+ ? 'native-step-badge--verify'
603
+ : 'native-step-badge--verify-diverged'
604
+ : `native-step-badge--${ns.category}`;
605
+ const categoryBadge = `<span class="native-step-badge ${categoryClass}">${esc(categoryLabel)}</span>`;
585
606
  const locationStr = ns.location?.file
586
607
  ? esc(`${ns.location.file.replace(/.*[/\\]/, '')}:${ns.location.line}`)
587
608
  : '';
588
609
  const snippet = ns.location?.file
589
610
  ? readSourceSnippet(ns.location.file, ns.location.line)
590
611
  : null;
591
- const hasError = !ns.passed && !!ns.error?.message;
612
+ // Cache-verify failures aren't surfaced as red errors; the message lives
613
+ // alongside the parent invocation's `cache · miss` pill instead. We still
614
+ // want the body open so the locator's call log is visible at a glance.
615
+ const hasError = !ns.passed && !!ns.error?.message && !verifyContext;
592
616
  const hasBody = !!snippet || hasError || !!childrenHtml;
593
617
  const renderHeader = (tag) => {
594
618
  let header = `<${tag} class="filmstrip-header">`;
@@ -609,9 +633,17 @@ function renderNativeStep(ns, childrenHtml) {
609
633
  // Failures always render expanded so the error is immediately visible.
610
634
  // test.step blocks with nested content also default open so users see
611
635
  // what's inside; bare passing expects with just a snippet collapse to
612
- // keep tests with many assertions scannable.
613
- const defaultOpen = !ns.passed || (ns.category === 'test.step' && !!childrenHtml);
614
- const passClass = ns.passed ? 'native-step--passed' : 'native-step--failed';
636
+ // keep tests with many assertions scannable. Cache-verify divergences
637
+ // are routine signal start collapsed so they don't dominate the view.
638
+ const defaultOpen = !verifyContext &&
639
+ (!ns.passed || (ns.category === 'test.step' && !!childrenHtml));
640
+ const passClass = verifyContext
641
+ ? ns.passed
642
+ ? 'native-step--verify'
643
+ : 'native-step--verify-diverged'
644
+ : ns.passed
645
+ ? 'native-step--passed'
646
+ : 'native-step--failed';
615
647
  let html = `<details class="filmstrip-step native-step expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
616
648
  html += renderHeader('summary');
617
649
  if (hasError) {
@@ -679,12 +711,31 @@ function renderAiInvocation(inv, childrenHtml) {
679
711
  ? '<span class="step-status-ok">&#10003;</span>'
680
712
  : '<span class="step-status-fail">&#10007;</span>';
681
713
  const kindBadge = `<span class="ai-invocation-badge ai-invocation-badge--${inv.kind}">${esc(AI_KIND_LABELS[inv.kind])}</span>`;
682
- const cachedBadge = inv.cacheHit
683
- ? '<span class="ai-cached-badge">cached</span>'
684
- : '';
714
+ const cacheState = inv.cacheHit
715
+ ? 'hit'
716
+ : inv.cacheStored
717
+ ? 'stored'
718
+ : 'miss';
719
+ const cacheLabel = {
720
+ hit: 'cache · hit',
721
+ stored: 'cache · stored',
722
+ miss: 'cache · miss',
723
+ };
724
+ const cacheTitle = {
725
+ hit: 'Replayed from the page-AI cache. No AI call this run.',
726
+ stored: 'Live AI run; the resulting locators/steps were recorded to the page-AI cache. The next run can replay them without calling the AI.',
727
+ miss: "Live AI run; nothing was recorded to the page-AI cache. The next run will hit the AI again. For asserts, this typically means the AI's structured Playwright locators didn't reproduce its screenshot verdict.",
728
+ };
729
+ const cacheBadge = `<span class="ai-cache-badge ai-cache-badge--${cacheState}" title="${esc(cacheTitle[cacheState])}">${cacheLabel[cacheState]}</span>`;
730
+ // For a passing assert whose structured-step verifier failed, surface
731
+ // *why* the cache outcome was `miss`. The header pill carries the
732
+ // at-a-glance signal; this body content is the technical detail.
733
+ // (When the assert itself failed, the regular failure path already
734
+ // covers it.)
735
+ const showVerifierDetail = inv.passed && inv.verification?.failed === true;
685
736
  const hasError = !inv.passed && !!inv.error?.message;
686
737
  const hasAssertSteps = !!inv.assertSteps && inv.assertSteps.length > 0;
687
- const hasBody = hasError || !!childrenHtml || hasAssertSteps;
738
+ const hasBody = hasError || !!childrenHtml || hasAssertSteps || showVerifierDetail;
688
739
  const renderHeader = (tag) => {
689
740
  let header = `<${tag} class="filmstrip-header">`;
690
741
  header +=
@@ -692,7 +743,7 @@ function renderAiInvocation(inv, childrenHtml) {
692
743
  header += statusIcon;
693
744
  header += `<span class="ai-invocation-title">${esc(inv.description)}</span>`;
694
745
  header += kindBadge;
695
- header += cachedBadge;
746
+ header += cacheBadge;
696
747
  header += `</${tag}>`;
697
748
  return header;
698
749
  };
@@ -706,13 +757,20 @@ function renderAiInvocation(inv, childrenHtml) {
706
757
  // by default so the contents are visible without an extra click.
707
758
  const defaultOpen = !inv.passed || !!childrenHtml || hasAssertSteps;
708
759
  const passClass = inv.passed
709
- ? 'ai-invocation--passed'
760
+ ? showVerifierDetail
761
+ ? 'ai-invocation--passed ai-invocation--cache-miss'
762
+ : 'ai-invocation--passed'
710
763
  : 'ai-invocation--failed';
711
764
  let html = `<details class="filmstrip-step ai-invocation expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
712
765
  html += renderHeader('summary');
713
766
  if (hasError) {
714
767
  html += `<pre class="native-step-error">${ansiToHtml(inv.error.message)}</pre>`;
715
768
  }
769
+ if (showVerifierDetail && inv.verification?.errorMessage) {
770
+ html +=
771
+ `<div class="ai-cache-miss-explainer">The AI&rsquo;s screenshot verdict (passed) is what counts. Its structured Playwright steps did not reproduce that verdict against the live page — most often an over-broad locator — so they were not cached. The diverging check is highlighted below.</div>` +
772
+ `<pre class="ai-cache-miss-detail">${ansiToHtml(inv.verification.errorMessage)}</pre>`;
773
+ }
716
774
  if (hasAssertSteps) {
717
775
  const lines = inv
718
776
  .assertSteps.map((s) => esc(formatAssertionStep(s)))
@@ -1070,6 +1128,28 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
1070
1128
  }
1071
1129
  return c;
1072
1130
  };
1131
+ // A native step is part of an AssertTool cache-worthiness verification
1132
+ // (rather than a user-authored assertion) iff its time window falls
1133
+ // inside the `verification` window of some enclosing AI invocation.
1134
+ // `verifyWindows` is the ordered list of those windows; `inVerify`
1135
+ // checks membership without scanning the tree.
1136
+ const verifyWindows = [];
1137
+ for (const inv of aiInvocations) {
1138
+ if (inv.verification) {
1139
+ verifyWindows.push({
1140
+ start: inv.verification.startedAt,
1141
+ end: inv.verification.endedAt,
1142
+ });
1143
+ }
1144
+ }
1145
+ const inVerify = (t, tEnd) => {
1146
+ for (const w of verifyWindows) {
1147
+ if (t >= w.start && tEnd <= w.end) {
1148
+ return true;
1149
+ }
1150
+ }
1151
+ return false;
1152
+ };
1073
1153
  const renderNode = (node) => {
1074
1154
  if (node.kind === 'donobu') {
1075
1155
  return renderFilmstripStep(node.ss, outputDir);
@@ -1083,7 +1163,7 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
1083
1163
  const childrenHtml = node.children.length > 0
1084
1164
  ? `<div class="native-step-children">${node.children.map(renderNode).join('')}</div>`
1085
1165
  : '';
1086
- return renderNativeStep(node.ns, childrenHtml);
1166
+ return renderNativeStep(node.ns, childrenHtml, inVerify(node.t, node.tEnd));
1087
1167
  };
1088
1168
  const stepCount = countNodes(roots);
1089
1169
  let html = '<details class="steps-section"><summary>Steps (' +
@@ -1781,6 +1861,8 @@ body::before{content:'';position:fixed;top:-750px;left:50%;transform:translateX(
1781
1861
  .filmstrip-summary{font-size:11px;color:var(--text-dim);margin-top:2px;padding-left:44px}
1782
1862
  .step-status-ok{color:var(--green);font-size:12px;font-weight:bold}
1783
1863
  .step-status-fail{color:var(--red);font-size:12px;font-weight:bold}
1864
+ .step-status-verified{color:#94a3b8;font-size:12px;font-weight:bold}
1865
+ .step-status-diverged{color:#fbbf24;font-size:14px;font-weight:bold;line-height:1}
1784
1866
  .filmstrip-detail{display:none;padding:8px 0 4px 44px;flex-direction:row;gap:12px;align-items:flex-start}
1785
1867
  .filmstrip-step.open .filmstrip-detail{display:flex}
1786
1868
  .filmstrip-detail>a{flex-shrink:0;max-width:50%}
@@ -1840,6 +1922,8 @@ details.native-step>summary::-webkit-details-marker{display:none}
1840
1922
  .native-step-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0}
1841
1923
  .native-step-badge--expect{background:rgba(99,102,241,.12);color:#818cf8}
1842
1924
  .native-step-badge--test\.step{background:rgba(16,185,129,.10);color:#34d399}
1925
+ .native-step-badge--verify{background:rgba(148,163,184,.12);color:#94a3b8}
1926
+ .native-step-badge--verify-diverged{background:rgba(245,158,11,.12);color:#fbbf24}
1843
1927
  .native-step-location{font-size:10px;color:var(--text-dim);font-family:var(--mono);margin-left:auto;flex-shrink:0;white-space:nowrap}
1844
1928
  details.native-step[open]>summary .native-step-chevron{transform:rotate(90deg)}
1845
1929
  .native-step-error{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-muted)}
@@ -1860,7 +1944,17 @@ details.ai-invocation>summary::-webkit-details-marker{display:none}
1860
1944
  .ai-invocation-badge--act{background:rgba(168,85,247,.12);color:#c084fc}
1861
1945
  .ai-invocation-badge--assert{background:rgba(236,72,153,.12);color:#f472b6}
1862
1946
  .ai-invocation-badge--locate{background:rgba(59,130,246,.12);color:#60a5fa}
1863
- .ai-cached-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;background:rgba(245,158,11,.12);color:#fbbf24}
1947
+ .ai-cache-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;font-family:var(--mono);cursor:help}
1948
+ .ai-cache-badge--hit{background:rgba(59,130,246,.12);color:#60a5fa}
1949
+ .ai-cache-badge--stored{background:rgba(52,211,153,.12);color:#34d399}
1950
+ .ai-cache-badge--miss{background:rgba(245,158,11,.12);color:#fbbf24}
1951
+ .ai-cache-miss-explainer{font-size:11px;color:var(--text-muted);padding:4px 0 2px 44px;line-height:1.45}
1952
+ .ai-cache-miss-detail{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-dim)}
1953
+ .ai-invocation--cache-miss>summary{box-shadow:inset 3px 0 0 0 rgba(245,158,11,.6)}
1954
+ .native-step--verify .snippet-line--target{background:rgba(148,163,184,.10)}
1955
+ .native-step--verify .snippet-line--target .snippet-linenum{color:#94a3b8}
1956
+ .native-step--verify-diverged .snippet-line--target{background:rgba(245,158,11,.10)}
1957
+ .native-step--verify-diverged .snippet-line--target .snippet-linenum{color:#fbbf24}
1864
1958
  details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)}
1865
1959
  .ai-assert-steps{font-size:11px;font-family:var(--mono);background:var(--bg);border:1px solid var(--border-subtle);border-radius:var(--radius);padding:8px 12px;margin:6px 0 2px 44px;color:var(--text-muted);white-space:pre-wrap;word-break:break-word;overflow-x:auto;max-height:240px;overflow-y:auto}
1866
1960
  .snippet-line{display:flex;padding:1px 8px;white-space:pre}
@@ -207,18 +207,34 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
207
207
  // When the AI assertion passes and structured steps were returned,
208
208
  // verify the steps against the live page before considering them
209
209
  // cacheable. If the steps fail, discard them but still return the
210
- // passing AI result.
210
+ // passing AI result. The verification window is recorded so the HTML
211
+ // reporter can label its `expect()` calls as cache-worthiness checks
212
+ // rather than treating an internal locator mismatch as an assertion
213
+ // failure.
211
214
  let verifiedSteps = assertionOutcome.output.playwrightAssertionSteps;
215
+ let verification;
212
216
  if (assertPassed &&
213
217
  Array.isArray(verifiedSteps) &&
214
218
  verifiedSteps.length > 0) {
219
+ const verifyStartedAt = Date.now();
215
220
  try {
216
221
  const executor = (0, assertCache_1.buildAssertExecutor)(verifiedSteps);
217
222
  await executor({ page: page, envData: context.envData });
223
+ verification = {
224
+ startedAt: verifyStartedAt,
225
+ endedAt: Date.now(),
226
+ failed: false,
227
+ };
218
228
  }
219
229
  catch (error) {
220
230
  Logger_1.appLogger.debug(`Structured assertion steps failed verification for: "${parameters.assertionToTestFor}" — discarding steps. Error: ${error.message}`);
221
231
  verifiedSteps = null;
232
+ verification = {
233
+ startedAt: verifyStartedAt,
234
+ endedAt: Date.now(),
235
+ failed: true,
236
+ errorMessage: error.message,
237
+ };
222
238
  }
223
239
  }
224
240
  const result = {
@@ -227,6 +243,7 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
227
243
  metadata: {
228
244
  ...assertionOutcome.output,
229
245
  playwrightAssertionSteps: verifiedSteps,
246
+ verification,
230
247
  attempt: attempt + 1,
231
248
  },
232
249
  };
@@ -150,6 +150,7 @@ class PageAi {
150
150
  async ai(page, instruction, options) {
151
151
  const startedAt = Date.now();
152
152
  let cacheHit = false;
153
+ let cacheStored = false;
153
154
  let thrownError = undefined;
154
155
  try {
155
156
  const descriptor = this.buildDescriptor(page, instruction, options);
@@ -197,6 +198,7 @@ class PageAi {
197
198
  }, this.donobu.toolRegistry);
198
199
  const cacheEntry = cacheEntryBuilder_1.PageAiCacheEntryBuilder.fromMetadata(descriptor.key.pageUrl, runResult.donobuFlow.metadata, preparedToolCalls);
199
200
  await this.cache.put(cacheEntry);
201
+ cacheStored = true;
200
202
  }
201
203
  return runResult.parsedResult;
202
204
  }
@@ -212,6 +214,7 @@ class PageAi {
212
214
  startedAt,
213
215
  endedAt: Date.now(),
214
216
  cacheHit,
217
+ cacheStored,
215
218
  passed: thrownError === undefined,
216
219
  error: thrownError !== undefined
217
220
  ? { message: thrownError?.message }
@@ -488,10 +488,40 @@ export interface AiInvocationRecord {
488
488
  startedAt: number;
489
489
  endedAt: number;
490
490
  cacheHit: boolean;
491
+ /**
492
+ * For live (non-replay) invocations: `true` once this run successfully
493
+ * wrote an entry into the relevant page-AI cache, `false` if a write was
494
+ * attempted (or would have been) but didn't land. Combined with
495
+ * `cacheHit`, this gives the reporter a tri-state cache outcome — hit
496
+ * (replayed), stored (live + recorded), or miss (live + nothing cached).
497
+ * Always `false` when `cacheHit` is `true`.
498
+ */
499
+ cacheStored: boolean;
491
500
  passed: boolean;
492
501
  error?: {
493
502
  message?: string;
494
503
  };
504
+ /**
505
+ * For live `page.ai.assert` runs: metadata about the post-pass structured
506
+ * step verification. After the AI judges the assertion passed against a
507
+ * screenshot, AssertTool re-executes the AI-emitted Playwright `expect()`
508
+ * calls against the page to decide whether those structured steps are
509
+ * cache-worthy. When `failed: true`, the AI's visual verdict still stands
510
+ * — the tool returns success — but one of the structured `expect()` calls
511
+ * underneath threw. The reporter uses this to surface the divergence as a
512
+ * labelled signal rather than render the inner expect failure as a regular
513
+ * assertion failure.
514
+ *
515
+ * Undefined when verification didn't run (no structured steps emitted, AI
516
+ * verdict was failed, cached replay path, or AssertTool invoked outside
517
+ * the page.ai.assert wrapper).
518
+ */
519
+ verification?: {
520
+ startedAt: number;
521
+ endedAt: number;
522
+ failed: boolean;
523
+ errorMessage?: string;
524
+ };
495
525
  /**
496
526
  * For cached `page.ai.assert` invocations: the structured Playwright
497
527
  * assertion steps that were replayed. The reporter formats these back
@@ -220,8 +220,10 @@ Valid options:
220
220
  assert: async (assertion, options) => {
221
221
  const aiInvocationStartedAt = Date.now();
222
222
  let aiInvocationCacheHit = false;
223
+ let aiInvocationCacheStored = false;
223
224
  let aiInvocationError = undefined;
224
225
  let aiInvocationAssertSteps;
226
+ let aiInvocationVerification;
225
227
  try {
226
228
  const useCache = options?.cache !== false;
227
229
  const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
@@ -322,6 +324,7 @@ Valid options:
322
324
  finally {
323
325
  sharedState.envVals = previousEnvVals;
324
326
  }
327
+ aiInvocationVerification = result.outcome.metadata?.verification;
325
328
  if (!result.outcome.isSuccessful) {
326
329
  throw new ToolCallFailedException_1.ToolCallFailedException(AssertTool_1.AssertTool.NAME, result.outcome);
327
330
  }
@@ -333,6 +336,7 @@ Valid options:
333
336
  const cache = getOrInitPageAiCache();
334
337
  const pageUrl = (0, cacheLocator_1.extractCacheKeyHostname)(page.url());
335
338
  await cache.putAssert({ pageUrl, assertion, steps });
339
+ aiInvocationCacheStored = true;
336
340
  Logger_1.appLogger.debug(`Assert cache STORED for: "${assertion}"`);
337
341
  }
338
342
  catch (error) {
@@ -352,11 +356,13 @@ Valid options:
352
356
  startedAt: aiInvocationStartedAt,
353
357
  endedAt: Date.now(),
354
358
  cacheHit: aiInvocationCacheHit,
359
+ cacheStored: aiInvocationCacheStored,
355
360
  passed: aiInvocationError === undefined,
356
361
  error: aiInvocationError !== undefined
357
362
  ? { message: aiInvocationError?.message }
358
363
  : undefined,
359
364
  assertSteps: aiInvocationAssertSteps,
365
+ verification: aiInvocationVerification,
360
366
  });
361
367
  }
362
368
  },
@@ -434,6 +440,7 @@ Use this information to return an appropriate JSON object.`,
434
440
  locate: async (description, options) => {
435
441
  const aiInvocationStartedAt = Date.now();
436
442
  let aiInvocationCacheHit = false;
443
+ let aiInvocationCacheStored = false;
437
444
  let aiInvocationError = undefined;
438
445
  const useCache = options?.cache !== false;
439
446
  const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
@@ -525,6 +532,7 @@ Use this information to return an appropriate JSON object.`,
525
532
  try {
526
533
  const cache = getOrInitPageAiCache();
527
534
  await cache.putLocate({ pageUrl, description, result });
535
+ aiInvocationCacheStored = true;
528
536
  Logger_1.appLogger.debug(`Locate cache STORED for: "${description}"`);
529
537
  }
530
538
  catch (error) {
@@ -545,6 +553,7 @@ Use this information to return an appropriate JSON object.`,
545
553
  startedAt: aiInvocationStartedAt,
546
554
  endedAt: Date.now(),
547
555
  cacheHit: aiInvocationCacheHit,
556
+ cacheStored: aiInvocationCacheStored,
548
557
  passed: aiInvocationError === undefined,
549
558
  error: aiInvocationError !== undefined
550
559
  ? { message: aiInvocationError?.message }
@@ -577,18 +577,42 @@ function renderErrors(errors) {
577
577
  }
578
578
  return html;
579
579
  }
580
- function renderNativeStep(ns, childrenHtml) {
581
- const statusIcon = ns.passed
582
- ? '<span class="step-status-ok">&#10003;</span>'
583
- : '<span class="step-status-fail">&#10007;</span>';
584
- const categoryBadge = `<span class="native-step-badge native-step-badge--${ns.category}">${esc(ns.category)}</span>`;
580
+ function renderNativeStep(ns, childrenHtml, verifyContext = false) {
581
+ // Expects inside an assert tool's cache-worthiness verification window are
582
+ // not real assertion checks — they're AssertTool re-running its own
583
+ // AI-emitted structured `expect()` calls to decide whether to cache them.
584
+ // When one fails, the AI's screenshot-based verdict still stands; only the
585
+ // structured locator faithfulness is in question. Render those with a
586
+ // distinct status (passed → "verified", failed → "diverged") so they
587
+ // don't look like assertion failures sitting under a passing assertion.
588
+ const statusIcon = verifyContext
589
+ ? ns.passed
590
+ ? '<span class="step-status-verified" title="Cache-verify check passed">&#10003;</span>'
591
+ : '<span class="step-status-diverged" title="Cache-verify locator did not match the AI&#39;s visual verdict">&#10073;</span>'
592
+ : ns.passed
593
+ ? '<span class="step-status-ok">&#10003;</span>'
594
+ : '<span class="step-status-fail">&#10007;</span>';
595
+ const categoryLabel = verifyContext
596
+ ? ns.passed
597
+ ? 'verify-cache'
598
+ : 'verify-cache diverged'
599
+ : ns.category;
600
+ const categoryClass = verifyContext
601
+ ? ns.passed
602
+ ? 'native-step-badge--verify'
603
+ : 'native-step-badge--verify-diverged'
604
+ : `native-step-badge--${ns.category}`;
605
+ const categoryBadge = `<span class="native-step-badge ${categoryClass}">${esc(categoryLabel)}</span>`;
585
606
  const locationStr = ns.location?.file
586
607
  ? esc(`${ns.location.file.replace(/.*[/\\]/, '')}:${ns.location.line}`)
587
608
  : '';
588
609
  const snippet = ns.location?.file
589
610
  ? readSourceSnippet(ns.location.file, ns.location.line)
590
611
  : null;
591
- const hasError = !ns.passed && !!ns.error?.message;
612
+ // Cache-verify failures aren't surfaced as red errors; the message lives
613
+ // alongside the parent invocation's `cache · miss` pill instead. We still
614
+ // want the body open so the locator's call log is visible at a glance.
615
+ const hasError = !ns.passed && !!ns.error?.message && !verifyContext;
592
616
  const hasBody = !!snippet || hasError || !!childrenHtml;
593
617
  const renderHeader = (tag) => {
594
618
  let header = `<${tag} class="filmstrip-header">`;
@@ -609,9 +633,17 @@ function renderNativeStep(ns, childrenHtml) {
609
633
  // Failures always render expanded so the error is immediately visible.
610
634
  // test.step blocks with nested content also default open so users see
611
635
  // what's inside; bare passing expects with just a snippet collapse to
612
- // keep tests with many assertions scannable.
613
- const defaultOpen = !ns.passed || (ns.category === 'test.step' && !!childrenHtml);
614
- const passClass = ns.passed ? 'native-step--passed' : 'native-step--failed';
636
+ // keep tests with many assertions scannable. Cache-verify divergences
637
+ // are routine signal start collapsed so they don't dominate the view.
638
+ const defaultOpen = !verifyContext &&
639
+ (!ns.passed || (ns.category === 'test.step' && !!childrenHtml));
640
+ const passClass = verifyContext
641
+ ? ns.passed
642
+ ? 'native-step--verify'
643
+ : 'native-step--verify-diverged'
644
+ : ns.passed
645
+ ? 'native-step--passed'
646
+ : 'native-step--failed';
615
647
  let html = `<details class="filmstrip-step native-step expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
616
648
  html += renderHeader('summary');
617
649
  if (hasError) {
@@ -679,12 +711,31 @@ function renderAiInvocation(inv, childrenHtml) {
679
711
  ? '<span class="step-status-ok">&#10003;</span>'
680
712
  : '<span class="step-status-fail">&#10007;</span>';
681
713
  const kindBadge = `<span class="ai-invocation-badge ai-invocation-badge--${inv.kind}">${esc(AI_KIND_LABELS[inv.kind])}</span>`;
682
- const cachedBadge = inv.cacheHit
683
- ? '<span class="ai-cached-badge">cached</span>'
684
- : '';
714
+ const cacheState = inv.cacheHit
715
+ ? 'hit'
716
+ : inv.cacheStored
717
+ ? 'stored'
718
+ : 'miss';
719
+ const cacheLabel = {
720
+ hit: 'cache · hit',
721
+ stored: 'cache · stored',
722
+ miss: 'cache · miss',
723
+ };
724
+ const cacheTitle = {
725
+ hit: 'Replayed from the page-AI cache. No AI call this run.',
726
+ stored: 'Live AI run; the resulting locators/steps were recorded to the page-AI cache. The next run can replay them without calling the AI.',
727
+ miss: "Live AI run; nothing was recorded to the page-AI cache. The next run will hit the AI again. For asserts, this typically means the AI's structured Playwright locators didn't reproduce its screenshot verdict.",
728
+ };
729
+ const cacheBadge = `<span class="ai-cache-badge ai-cache-badge--${cacheState}" title="${esc(cacheTitle[cacheState])}">${cacheLabel[cacheState]}</span>`;
730
+ // For a passing assert whose structured-step verifier failed, surface
731
+ // *why* the cache outcome was `miss`. The header pill carries the
732
+ // at-a-glance signal; this body content is the technical detail.
733
+ // (When the assert itself failed, the regular failure path already
734
+ // covers it.)
735
+ const showVerifierDetail = inv.passed && inv.verification?.failed === true;
685
736
  const hasError = !inv.passed && !!inv.error?.message;
686
737
  const hasAssertSteps = !!inv.assertSteps && inv.assertSteps.length > 0;
687
- const hasBody = hasError || !!childrenHtml || hasAssertSteps;
738
+ const hasBody = hasError || !!childrenHtml || hasAssertSteps || showVerifierDetail;
688
739
  const renderHeader = (tag) => {
689
740
  let header = `<${tag} class="filmstrip-header">`;
690
741
  header +=
@@ -692,7 +743,7 @@ function renderAiInvocation(inv, childrenHtml) {
692
743
  header += statusIcon;
693
744
  header += `<span class="ai-invocation-title">${esc(inv.description)}</span>`;
694
745
  header += kindBadge;
695
- header += cachedBadge;
746
+ header += cacheBadge;
696
747
  header += `</${tag}>`;
697
748
  return header;
698
749
  };
@@ -706,13 +757,20 @@ function renderAiInvocation(inv, childrenHtml) {
706
757
  // by default so the contents are visible without an extra click.
707
758
  const defaultOpen = !inv.passed || !!childrenHtml || hasAssertSteps;
708
759
  const passClass = inv.passed
709
- ? 'ai-invocation--passed'
760
+ ? showVerifierDetail
761
+ ? 'ai-invocation--passed ai-invocation--cache-miss'
762
+ : 'ai-invocation--passed'
710
763
  : 'ai-invocation--failed';
711
764
  let html = `<details class="filmstrip-step ai-invocation expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
712
765
  html += renderHeader('summary');
713
766
  if (hasError) {
714
767
  html += `<pre class="native-step-error">${ansiToHtml(inv.error.message)}</pre>`;
715
768
  }
769
+ if (showVerifierDetail && inv.verification?.errorMessage) {
770
+ html +=
771
+ `<div class="ai-cache-miss-explainer">The AI&rsquo;s screenshot verdict (passed) is what counts. Its structured Playwright steps did not reproduce that verdict against the live page — most often an over-broad locator — so they were not cached. The diverging check is highlighted below.</div>` +
772
+ `<pre class="ai-cache-miss-detail">${ansiToHtml(inv.verification.errorMessage)}</pre>`;
773
+ }
716
774
  if (hasAssertSteps) {
717
775
  const lines = inv
718
776
  .assertSteps.map((s) => esc(formatAssertionStep(s)))
@@ -1070,6 +1128,28 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
1070
1128
  }
1071
1129
  return c;
1072
1130
  };
1131
+ // A native step is part of an AssertTool cache-worthiness verification
1132
+ // (rather than a user-authored assertion) iff its time window falls
1133
+ // inside the `verification` window of some enclosing AI invocation.
1134
+ // `verifyWindows` is the ordered list of those windows; `inVerify`
1135
+ // checks membership without scanning the tree.
1136
+ const verifyWindows = [];
1137
+ for (const inv of aiInvocations) {
1138
+ if (inv.verification) {
1139
+ verifyWindows.push({
1140
+ start: inv.verification.startedAt,
1141
+ end: inv.verification.endedAt,
1142
+ });
1143
+ }
1144
+ }
1145
+ const inVerify = (t, tEnd) => {
1146
+ for (const w of verifyWindows) {
1147
+ if (t >= w.start && tEnd <= w.end) {
1148
+ return true;
1149
+ }
1150
+ }
1151
+ return false;
1152
+ };
1073
1153
  const renderNode = (node) => {
1074
1154
  if (node.kind === 'donobu') {
1075
1155
  return renderFilmstripStep(node.ss, outputDir);
@@ -1083,7 +1163,7 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
1083
1163
  const childrenHtml = node.children.length > 0
1084
1164
  ? `<div class="native-step-children">${node.children.map(renderNode).join('')}</div>`
1085
1165
  : '';
1086
- return renderNativeStep(node.ns, childrenHtml);
1166
+ return renderNativeStep(node.ns, childrenHtml, inVerify(node.t, node.tEnd));
1087
1167
  };
1088
1168
  const stepCount = countNodes(roots);
1089
1169
  let html = '<details class="steps-section"><summary>Steps (' +
@@ -1781,6 +1861,8 @@ body::before{content:'';position:fixed;top:-750px;left:50%;transform:translateX(
1781
1861
  .filmstrip-summary{font-size:11px;color:var(--text-dim);margin-top:2px;padding-left:44px}
1782
1862
  .step-status-ok{color:var(--green);font-size:12px;font-weight:bold}
1783
1863
  .step-status-fail{color:var(--red);font-size:12px;font-weight:bold}
1864
+ .step-status-verified{color:#94a3b8;font-size:12px;font-weight:bold}
1865
+ .step-status-diverged{color:#fbbf24;font-size:14px;font-weight:bold;line-height:1}
1784
1866
  .filmstrip-detail{display:none;padding:8px 0 4px 44px;flex-direction:row;gap:12px;align-items:flex-start}
1785
1867
  .filmstrip-step.open .filmstrip-detail{display:flex}
1786
1868
  .filmstrip-detail>a{flex-shrink:0;max-width:50%}
@@ -1840,6 +1922,8 @@ details.native-step>summary::-webkit-details-marker{display:none}
1840
1922
  .native-step-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0}
1841
1923
  .native-step-badge--expect{background:rgba(99,102,241,.12);color:#818cf8}
1842
1924
  .native-step-badge--test\.step{background:rgba(16,185,129,.10);color:#34d399}
1925
+ .native-step-badge--verify{background:rgba(148,163,184,.12);color:#94a3b8}
1926
+ .native-step-badge--verify-diverged{background:rgba(245,158,11,.12);color:#fbbf24}
1843
1927
  .native-step-location{font-size:10px;color:var(--text-dim);font-family:var(--mono);margin-left:auto;flex-shrink:0;white-space:nowrap}
1844
1928
  details.native-step[open]>summary .native-step-chevron{transform:rotate(90deg)}
1845
1929
  .native-step-error{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-muted)}
@@ -1860,7 +1944,17 @@ details.ai-invocation>summary::-webkit-details-marker{display:none}
1860
1944
  .ai-invocation-badge--act{background:rgba(168,85,247,.12);color:#c084fc}
1861
1945
  .ai-invocation-badge--assert{background:rgba(236,72,153,.12);color:#f472b6}
1862
1946
  .ai-invocation-badge--locate{background:rgba(59,130,246,.12);color:#60a5fa}
1863
- .ai-cached-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;background:rgba(245,158,11,.12);color:#fbbf24}
1947
+ .ai-cache-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;font-family:var(--mono);cursor:help}
1948
+ .ai-cache-badge--hit{background:rgba(59,130,246,.12);color:#60a5fa}
1949
+ .ai-cache-badge--stored{background:rgba(52,211,153,.12);color:#34d399}
1950
+ .ai-cache-badge--miss{background:rgba(245,158,11,.12);color:#fbbf24}
1951
+ .ai-cache-miss-explainer{font-size:11px;color:var(--text-muted);padding:4px 0 2px 44px;line-height:1.45}
1952
+ .ai-cache-miss-detail{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-dim)}
1953
+ .ai-invocation--cache-miss>summary{box-shadow:inset 3px 0 0 0 rgba(245,158,11,.6)}
1954
+ .native-step--verify .snippet-line--target{background:rgba(148,163,184,.10)}
1955
+ .native-step--verify .snippet-line--target .snippet-linenum{color:#94a3b8}
1956
+ .native-step--verify-diverged .snippet-line--target{background:rgba(245,158,11,.10)}
1957
+ .native-step--verify-diverged .snippet-line--target .snippet-linenum{color:#fbbf24}
1864
1958
  details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)}
1865
1959
  .ai-assert-steps{font-size:11px;font-family:var(--mono);background:var(--bg);border:1px solid var(--border-subtle);border-radius:var(--radius);padding:8px 12px;margin:6px 0 2px 44px;color:var(--text-muted);white-space:pre-wrap;word-break:break-word;overflow-x:auto;max-height:240px;overflow-y:auto}
1866
1960
  .snippet-line{display:flex;padding:1px 8px;white-space:pre}
@@ -207,18 +207,34 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
207
207
  // When the AI assertion passes and structured steps were returned,
208
208
  // verify the steps against the live page before considering them
209
209
  // cacheable. If the steps fail, discard them but still return the
210
- // passing AI result.
210
+ // passing AI result. The verification window is recorded so the HTML
211
+ // reporter can label its `expect()` calls as cache-worthiness checks
212
+ // rather than treating an internal locator mismatch as an assertion
213
+ // failure.
211
214
  let verifiedSteps = assertionOutcome.output.playwrightAssertionSteps;
215
+ let verification;
212
216
  if (assertPassed &&
213
217
  Array.isArray(verifiedSteps) &&
214
218
  verifiedSteps.length > 0) {
219
+ const verifyStartedAt = Date.now();
215
220
  try {
216
221
  const executor = (0, assertCache_1.buildAssertExecutor)(verifiedSteps);
217
222
  await executor({ page: page, envData: context.envData });
223
+ verification = {
224
+ startedAt: verifyStartedAt,
225
+ endedAt: Date.now(),
226
+ failed: false,
227
+ };
218
228
  }
219
229
  catch (error) {
220
230
  Logger_1.appLogger.debug(`Structured assertion steps failed verification for: "${parameters.assertionToTestFor}" — discarding steps. Error: ${error.message}`);
221
231
  verifiedSteps = null;
232
+ verification = {
233
+ startedAt: verifyStartedAt,
234
+ endedAt: Date.now(),
235
+ failed: true,
236
+ errorMessage: error.message,
237
+ };
222
238
  }
223
239
  }
224
240
  const result = {
@@ -227,6 +243,7 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
227
243
  metadata: {
228
244
  ...assertionOutcome.output,
229
245
  playwrightAssertionSteps: verifiedSteps,
246
+ verification,
230
247
  attempt: attempt + 1,
231
248
  },
232
249
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "donobu",
3
- "version": "5.41.3",
3
+ "version": "5.41.4",
4
4
  "description": "Create browser automations with an LLM agent and replay them as Playwright scripts.",
5
5
  "main": "dist/main.js",
6
6
  "module": "dist/esm/main.js",