npm - donobu - Versions diffs - 5.41.3 → 5.41.4 - Mend

donobu 5.41.3 → 5.41.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/esm/lib/ai/PageAi.js +3 -0
package/dist/esm/lib/page/DonobuExtendedPage.d.ts +30 -0
package/dist/esm/lib/page/extendPage.js +9 -0
package/dist/esm/reporter/render.js +111 -17
package/dist/esm/tools/AssertTool.js +18 -1
package/dist/lib/ai/PageAi.js +3 -0
package/dist/lib/page/DonobuExtendedPage.d.ts +30 -0
package/dist/lib/page/extendPage.js +9 -0
package/dist/reporter/render.js +111 -17
package/dist/tools/AssertTool.js +18 -1
package/package.json +1 -1

package/dist/esm/lib/ai/PageAi.js CHANGED Viewed

@@ -150,6 +150,7 @@ class PageAi {
     async ai(page, instruction, options) {
         const startedAt = Date.now();
         let cacheHit = false;
+        let cacheStored = false;
         let thrownError = undefined;
         try {
             const descriptor = this.buildDescriptor(page, instruction, options);
@@ -197,6 +198,7 @@ class PageAi {
                     }, this.donobu.toolRegistry);
                     const cacheEntry = cacheEntryBuilder_1.PageAiCacheEntryBuilder.fromMetadata(descriptor.key.pageUrl, runResult.donobuFlow.metadata, preparedToolCalls);
                     await this.cache.put(cacheEntry);
+                    cacheStored = true;
                 }
                 return runResult.parsedResult;
             }
@@ -212,6 +214,7 @@ class PageAi {
                 startedAt,
                 endedAt: Date.now(),
                 cacheHit,
+                cacheStored,
                 passed: thrownError === undefined,
                 error: thrownError !== undefined
                     ? { message: thrownError?.message }

package/dist/esm/lib/page/DonobuExtendedPage.d.ts CHANGED Viewed

@@ -488,10 +488,40 @@ export interface AiInvocationRecord {
     startedAt: number;
     endedAt: number;
     cacheHit: boolean;
+    /**
+     * For live (non-replay) invocations: `true` once this run successfully
+     * wrote an entry into the relevant page-AI cache, `false` if a write was
+     * attempted (or would have been) but didn't land. Combined with
+     * `cacheHit`, this gives the reporter a tri-state cache outcome — hit
+     * (replayed), stored (live + recorded), or miss (live + nothing cached).
+     * Always `false` when `cacheHit` is `true`.
+     */
+    cacheStored: boolean;
     passed: boolean;
     error?: {
         message?: string;
     };
+    /**
+     * For live `page.ai.assert` runs: metadata about the post-pass structured
+     * step verification. After the AI judges the assertion passed against a
+     * screenshot, AssertTool re-executes the AI-emitted Playwright `expect()`
+     * calls against the page to decide whether those structured steps are
+     * cache-worthy. When `failed: true`, the AI's visual verdict still stands
+     * — the tool returns success — but one of the structured `expect()` calls
+     * underneath threw. The reporter uses this to surface the divergence as a
+     * labelled signal rather than render the inner expect failure as a regular
+     * assertion failure.
+     *
+     * Undefined when verification didn't run (no structured steps emitted, AI
+     * verdict was failed, cached replay path, or AssertTool invoked outside
+     * the page.ai.assert wrapper).
+     */
+    verification?: {
+        startedAt: number;
+        endedAt: number;
+        failed: boolean;
+        errorMessage?: string;
+    };
     /**
      * For cached `page.ai.assert` invocations: the structured Playwright
      * assertion steps that were replayed. The reporter formats these back

package/dist/esm/lib/page/extendPage.js CHANGED Viewed

@@ -220,8 +220,10 @@ Valid options:
         assert: async (assertion, options) => {
             const aiInvocationStartedAt = Date.now();
             let aiInvocationCacheHit = false;
+            let aiInvocationCacheStored = false;
             let aiInvocationError = undefined;
             let aiInvocationAssertSteps;
+            let aiInvocationVerification;
             try {
                 const useCache = options?.cache !== false;
                 const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
@@ -322,6 +324,7 @@ Valid options:
                 finally {
                     sharedState.envVals = previousEnvVals;
                 }
+                aiInvocationVerification = result.outcome.metadata?.verification;
                 if (!result.outcome.isSuccessful) {
                     throw new ToolCallFailedException_1.ToolCallFailedException(AssertTool_1.AssertTool.NAME, result.outcome);
                 }
@@ -333,6 +336,7 @@ Valid options:
                             const cache = getOrInitPageAiCache();
                             const pageUrl = (0, cacheLocator_1.extractCacheKeyHostname)(page.url());
                             await cache.putAssert({ pageUrl, assertion, steps });
+                            aiInvocationCacheStored = true;
                             Logger_1.appLogger.debug(`Assert cache STORED for: "${assertion}"`);
                         }
                         catch (error) {
@@ -352,11 +356,13 @@ Valid options:
                     startedAt: aiInvocationStartedAt,
                     endedAt: Date.now(),
                     cacheHit: aiInvocationCacheHit,
+                    cacheStored: aiInvocationCacheStored,
                     passed: aiInvocationError === undefined,
                     error: aiInvocationError !== undefined
                         ? { message: aiInvocationError?.message }
                         : undefined,
                     assertSteps: aiInvocationAssertSteps,
+                    verification: aiInvocationVerification,
                 });
             }
         },
@@ -434,6 +440,7 @@ Use this information to return an appropriate JSON object.`,
         locate: async (description, options) => {
             const aiInvocationStartedAt = Date.now();
             let aiInvocationCacheHit = false;
+            let aiInvocationCacheStored = false;
             let aiInvocationError = undefined;
             const useCache = options?.cache !== false;
             const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
@@ -525,6 +532,7 @@ Use this information to return an appropriate JSON object.`,
                     try {
                         const cache = getOrInitPageAiCache();
                         await cache.putLocate({ pageUrl, description, result });
+                        aiInvocationCacheStored = true;
                         Logger_1.appLogger.debug(`Locate cache STORED for: "${description}"`);
                     }
                     catch (error) {
@@ -545,6 +553,7 @@ Use this information to return an appropriate JSON object.`,
                     startedAt: aiInvocationStartedAt,
                     endedAt: Date.now(),
                     cacheHit: aiInvocationCacheHit,
+                    cacheStored: aiInvocationCacheStored,
                     passed: aiInvocationError === undefined,
                     error: aiInvocationError !== undefined
                         ? { message: aiInvocationError?.message }

package/dist/esm/reporter/render.js CHANGED Viewed

@@ -577,18 +577,42 @@ function renderErrors(errors) {
     }
     return html;
 }
-function renderNativeStep(ns, childrenHtml) {
-    const statusIcon = ns.passed
-        ? '<span class="step-status-ok">&#10003;</span>'
-        : '<span class="step-status-fail">&#10007;</span>';
-    const categoryBadge = `<span class="native-step-badge native-step-badge--${ns.category}">${esc(ns.category)}</span>`;
+function renderNativeStep(ns, childrenHtml, verifyContext = false) {
+    // Expects inside an assert tool's cache-worthiness verification window are
+    // not real assertion checks — they're AssertTool re-running its own
+    // AI-emitted structured `expect()` calls to decide whether to cache them.
+    // When one fails, the AI's screenshot-based verdict still stands; only the
+    // structured locator faithfulness is in question. Render those with a
+    // distinct status (passed → "verified", failed → "diverged") so they
+    // don't look like assertion failures sitting under a passing assertion.
+    const statusIcon = verifyContext
+        ? ns.passed
+            ? '<span class="step-status-verified" title="Cache-verify check passed">&#10003;</span>'
+            : '<span class="step-status-diverged" title="Cache-verify locator did not match the AI&#39;s visual verdict">&#10073;</span>'
+        : ns.passed
+            ? '<span class="step-status-ok">&#10003;</span>'
+            : '<span class="step-status-fail">&#10007;</span>';
+    const categoryLabel = verifyContext
+        ? ns.passed
+            ? 'verify-cache'
+            : 'verify-cache diverged'
+        : ns.category;
+    const categoryClass = verifyContext
+        ? ns.passed
+            ? 'native-step-badge--verify'
+            : 'native-step-badge--verify-diverged'
+        : `native-step-badge--${ns.category}`;
+    const categoryBadge = `<span class="native-step-badge ${categoryClass}">${esc(categoryLabel)}</span>`;
     const locationStr = ns.location?.file
         ? esc(`${ns.location.file.replace(/.*[/\\]/, '')}:${ns.location.line}`)
         : '';
     const snippet = ns.location?.file
         ? readSourceSnippet(ns.location.file, ns.location.line)
         : null;
-    const hasError = !ns.passed && !!ns.error?.message;
+    // Cache-verify failures aren't surfaced as red errors; the message lives
+    // alongside the parent invocation's `cache · miss` pill instead. We still
+    // want the body open so the locator's call log is visible at a glance.
+    const hasError = !ns.passed && !!ns.error?.message && !verifyContext;
     const hasBody = !!snippet || hasError || !!childrenHtml;
     const renderHeader = (tag) => {
         let header = `<${tag} class="filmstrip-header">`;
@@ -609,9 +633,17 @@ function renderNativeStep(ns, childrenHtml) {
     // Failures always render expanded so the error is immediately visible.
     // test.step blocks with nested content also default open so users see
     // what's inside; bare passing expects with just a snippet collapse to
-    // keep tests with many assertions scannable.
-    const defaultOpen = !ns.passed || (ns.category === 'test.step' && !!childrenHtml);
-    const passClass = ns.passed ? 'native-step--passed' : 'native-step--failed';
+    // keep tests with many assertions scannable. Cache-verify divergences
+    // are routine signal — start collapsed so they don't dominate the view.
+    const defaultOpen = !verifyContext &&
+        (!ns.passed || (ns.category === 'test.step' && !!childrenHtml));
+    const passClass = verifyContext
+        ? ns.passed
+            ? 'native-step--verify'
+            : 'native-step--verify-diverged'
+        : ns.passed
+            ? 'native-step--passed'
+            : 'native-step--failed';
     let html = `<details class="filmstrip-step native-step expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
     html += renderHeader('summary');
     if (hasError) {
@@ -679,12 +711,31 @@ function renderAiInvocation(inv, childrenHtml) {
         ? '<span class="step-status-ok">&#10003;</span>'
         : '<span class="step-status-fail">&#10007;</span>';
     const kindBadge = `<span class="ai-invocation-badge ai-invocation-badge--${inv.kind}">${esc(AI_KIND_LABELS[inv.kind])}</span>`;
-    const cachedBadge = inv.cacheHit
-        ? '<span class="ai-cached-badge">cached</span>'
-        : '';
+    const cacheState = inv.cacheHit
+        ? 'hit'
+        : inv.cacheStored
+            ? 'stored'
+            : 'miss';
+    const cacheLabel = {
+        hit: 'cache · hit',
+        stored: 'cache · stored',
+        miss: 'cache · miss',
+    };
+    const cacheTitle = {
+        hit: 'Replayed from the page-AI cache. No AI call this run.',
+        stored: 'Live AI run; the resulting locators/steps were recorded to the page-AI cache. The next run can replay them without calling the AI.',
+        miss: "Live AI run; nothing was recorded to the page-AI cache. The next run will hit the AI again. For asserts, this typically means the AI's structured Playwright locators didn't reproduce its screenshot verdict.",
+    };
+    const cacheBadge = `<span class="ai-cache-badge ai-cache-badge--${cacheState}" title="${esc(cacheTitle[cacheState])}">${cacheLabel[cacheState]}</span>`;
+    // For a passing assert whose structured-step verifier failed, surface
+    // *why* the cache outcome was `miss`. The header pill carries the
+    // at-a-glance signal; this body content is the technical detail.
+    // (When the assert itself failed, the regular failure path already
+    // covers it.)
+    const showVerifierDetail = inv.passed && inv.verification?.failed === true;
     const hasError = !inv.passed && !!inv.error?.message;
     const hasAssertSteps = !!inv.assertSteps && inv.assertSteps.length > 0;
-    const hasBody = hasError || !!childrenHtml || hasAssertSteps;
+    const hasBody = hasError || !!childrenHtml || hasAssertSteps || showVerifierDetail;
     const renderHeader = (tag) => {
         let header = `<${tag} class="filmstrip-header">`;
         header +=
@@ -692,7 +743,7 @@ function renderAiInvocation(inv, childrenHtml) {
         header += statusIcon;
         header += `<span class="ai-invocation-title">${esc(inv.description)}</span>`;
         header += kindBadge;
-        header += cachedBadge;
+        header += cacheBadge;
         header += `</${tag}>`;
         return header;
     };
@@ -706,13 +757,20 @@ function renderAiInvocation(inv, childrenHtml) {
     // by default so the contents are visible without an extra click.
     const defaultOpen = !inv.passed || !!childrenHtml || hasAssertSteps;
     const passClass = inv.passed
-        ? 'ai-invocation--passed'
+        ? showVerifierDetail
+            ? 'ai-invocation--passed ai-invocation--cache-miss'
+            : 'ai-invocation--passed'
         : 'ai-invocation--failed';
     let html = `<details class="filmstrip-step ai-invocation expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
     html += renderHeader('summary');
     if (hasError) {
         html += `<pre class="native-step-error">${ansiToHtml(inv.error.message)}</pre>`;
     }
+    if (showVerifierDetail && inv.verification?.errorMessage) {
+        html +=
+            `<div class="ai-cache-miss-explainer">The AI&rsquo;s screenshot verdict (passed) is what counts. Its structured Playwright steps did not reproduce that verdict against the live page — most often an over-broad locator — so they were not cached. The diverging check is highlighted below.</div>` +
+                `<pre class="ai-cache-miss-detail">${ansiToHtml(inv.verification.errorMessage)}</pre>`;
+    }
     if (hasAssertSteps) {
         const lines = inv
             .assertSteps.map((s) => esc(formatAssertionStep(s)))
@@ -1070,6 +1128,28 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
             }
             return c;
         };
+        // A native step is part of an AssertTool cache-worthiness verification
+        // (rather than a user-authored assertion) iff its time window falls
+        // inside the `verification` window of some enclosing AI invocation.
+        // `verifyWindows` is the ordered list of those windows; `inVerify`
+        // checks membership without scanning the tree.
+        const verifyWindows = [];
+        for (const inv of aiInvocations) {
+            if (inv.verification) {
+                verifyWindows.push({
+                    start: inv.verification.startedAt,
+                    end: inv.verification.endedAt,
+                });
+            }
+        }
+        const inVerify = (t, tEnd) => {
+            for (const w of verifyWindows) {
+                if (t >= w.start && tEnd <= w.end) {
+                    return true;
+                }
+            }
+            return false;
+        };
         const renderNode = (node) => {
             if (node.kind === 'donobu') {
                 return renderFilmstripStep(node.ss, outputDir);
@@ -1083,7 +1163,7 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
             const childrenHtml = node.children.length > 0
                 ? `<div class="native-step-children">${node.children.map(renderNode).join('')}</div>`
                 : '';
-            return renderNativeStep(node.ns, childrenHtml);
+            return renderNativeStep(node.ns, childrenHtml, inVerify(node.t, node.tEnd));
         };
         const stepCount = countNodes(roots);
         let html = '<details class="steps-section"><summary>Steps (' +
@@ -1781,6 +1861,8 @@ body::before{content:'';position:fixed;top:-750px;left:50%;transform:translateX(
 .filmstrip-summary{font-size:11px;color:var(--text-dim);margin-top:2px;padding-left:44px}
 .step-status-ok{color:var(--green);font-size:12px;font-weight:bold}
 .step-status-fail{color:var(--red);font-size:12px;font-weight:bold}
+.step-status-verified{color:#94a3b8;font-size:12px;font-weight:bold}
+.step-status-diverged{color:#fbbf24;font-size:14px;font-weight:bold;line-height:1}
 .filmstrip-detail{display:none;padding:8px 0 4px 44px;flex-direction:row;gap:12px;align-items:flex-start}
 .filmstrip-step.open .filmstrip-detail{display:flex}
 .filmstrip-detail>a{flex-shrink:0;max-width:50%}
@@ -1840,6 +1922,8 @@ details.native-step>summary::-webkit-details-marker{display:none}
 .native-step-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0}
 .native-step-badge--expect{background:rgba(99,102,241,.12);color:#818cf8}
 .native-step-badge--test\.step{background:rgba(16,185,129,.10);color:#34d399}
+.native-step-badge--verify{background:rgba(148,163,184,.12);color:#94a3b8}
+.native-step-badge--verify-diverged{background:rgba(245,158,11,.12);color:#fbbf24}
 .native-step-location{font-size:10px;color:var(--text-dim);font-family:var(--mono);margin-left:auto;flex-shrink:0;white-space:nowrap}
 details.native-step[open]>summary .native-step-chevron{transform:rotate(90deg)}
 .native-step-error{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-muted)}
@@ -1860,7 +1944,17 @@ details.ai-invocation>summary::-webkit-details-marker{display:none}
 .ai-invocation-badge--act{background:rgba(168,85,247,.12);color:#c084fc}
 .ai-invocation-badge--assert{background:rgba(236,72,153,.12);color:#f472b6}
 .ai-invocation-badge--locate{background:rgba(59,130,246,.12);color:#60a5fa}
-.ai-cached-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;background:rgba(245,158,11,.12);color:#fbbf24}
+.ai-cache-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;font-family:var(--mono);cursor:help}
+.ai-cache-badge--hit{background:rgba(59,130,246,.12);color:#60a5fa}
+.ai-cache-badge--stored{background:rgba(52,211,153,.12);color:#34d399}
+.ai-cache-badge--miss{background:rgba(245,158,11,.12);color:#fbbf24}
+.ai-cache-miss-explainer{font-size:11px;color:var(--text-muted);padding:4px 0 2px 44px;line-height:1.45}
+.ai-cache-miss-detail{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-dim)}
+.ai-invocation--cache-miss>summary{box-shadow:inset 3px 0 0 0 rgba(245,158,11,.6)}
+.native-step--verify .snippet-line--target{background:rgba(148,163,184,.10)}
+.native-step--verify .snippet-line--target .snippet-linenum{color:#94a3b8}
+.native-step--verify-diverged .snippet-line--target{background:rgba(245,158,11,.10)}
+.native-step--verify-diverged .snippet-line--target .snippet-linenum{color:#fbbf24}
 details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)}
 .ai-assert-steps{font-size:11px;font-family:var(--mono);background:var(--bg);border:1px solid var(--border-subtle);border-radius:var(--radius);padding:8px 12px;margin:6px 0 2px 44px;color:var(--text-muted);white-space:pre-wrap;word-break:break-word;overflow-x:auto;max-height:240px;overflow-y:auto}
 .snippet-line{display:flex;padding:1px 8px;white-space:pre}

package/dist/esm/tools/AssertTool.js CHANGED Viewed

@@ -207,18 +207,34 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
             // When the AI assertion passes and structured steps were returned,
             // verify the steps against the live page before considering them
             // cacheable. If the steps fail, discard them but still return the
-            // passing AI result.
+            // passing AI result. The verification window is recorded so the HTML
+            // reporter can label its `expect()` calls as cache-worthiness checks
+            // rather than treating an internal locator mismatch as an assertion
+            // failure.
             let verifiedSteps = assertionOutcome.output.playwrightAssertionSteps;
+            let verification;
             if (assertPassed &&
                 Array.isArray(verifiedSteps) &&
                 verifiedSteps.length > 0) {
+                const verifyStartedAt = Date.now();
                 try {
                     const executor = (0, assertCache_1.buildAssertExecutor)(verifiedSteps);
                     await executor({ page: page, envData: context.envData });
+                    verification = {
+                        startedAt: verifyStartedAt,
+                        endedAt: Date.now(),
+                        failed: false,
+                    };
                 }
                 catch (error) {
                     Logger_1.appLogger.debug(`Structured assertion steps failed verification for: "${parameters.assertionToTestFor}" — discarding steps. Error: ${error.message}`);
                     verifiedSteps = null;
+                    verification = {
+                        startedAt: verifyStartedAt,
+                        endedAt: Date.now(),
+                        failed: true,
+                        errorMessage: error.message,
+                    };
                 }
             }
             const result = {
@@ -227,6 +243,7 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
                 metadata: {
                     ...assertionOutcome.output,
                     playwrightAssertionSteps: verifiedSteps,
+                    verification,
                     attempt: attempt + 1,
                 },
             };

package/dist/lib/ai/PageAi.js CHANGED Viewed

@@ -150,6 +150,7 @@ class PageAi {
     async ai(page, instruction, options) {
         const startedAt = Date.now();
         let cacheHit = false;
+        let cacheStored = false;
         let thrownError = undefined;
         try {
             const descriptor = this.buildDescriptor(page, instruction, options);
@@ -197,6 +198,7 @@ class PageAi {
                     }, this.donobu.toolRegistry);
                     const cacheEntry = cacheEntryBuilder_1.PageAiCacheEntryBuilder.fromMetadata(descriptor.key.pageUrl, runResult.donobuFlow.metadata, preparedToolCalls);
                     await this.cache.put(cacheEntry);
+                    cacheStored = true;
                 }
                 return runResult.parsedResult;
             }
@@ -212,6 +214,7 @@ class PageAi {
                 startedAt,
                 endedAt: Date.now(),
                 cacheHit,
+                cacheStored,
                 passed: thrownError === undefined,
                 error: thrownError !== undefined
                     ? { message: thrownError?.message }

package/dist/lib/page/DonobuExtendedPage.d.ts CHANGED Viewed

@@ -488,10 +488,40 @@ export interface AiInvocationRecord {
     startedAt: number;
     endedAt: number;
     cacheHit: boolean;
+    /**
+     * For live (non-replay) invocations: `true` once this run successfully
+     * wrote an entry into the relevant page-AI cache, `false` if a write was
+     * attempted (or would have been) but didn't land. Combined with
+     * `cacheHit`, this gives the reporter a tri-state cache outcome — hit
+     * (replayed), stored (live + recorded), or miss (live + nothing cached).
+     * Always `false` when `cacheHit` is `true`.
+     */
+    cacheStored: boolean;
     passed: boolean;
     error?: {
         message?: string;
     };
+    /**
+     * For live `page.ai.assert` runs: metadata about the post-pass structured
+     * step verification. After the AI judges the assertion passed against a
+     * screenshot, AssertTool re-executes the AI-emitted Playwright `expect()`
+     * calls against the page to decide whether those structured steps are
+     * cache-worthy. When `failed: true`, the AI's visual verdict still stands
+     * — the tool returns success — but one of the structured `expect()` calls
+     * underneath threw. The reporter uses this to surface the divergence as a
+     * labelled signal rather than render the inner expect failure as a regular
+     * assertion failure.
+     *
+     * Undefined when verification didn't run (no structured steps emitted, AI
+     * verdict was failed, cached replay path, or AssertTool invoked outside
+     * the page.ai.assert wrapper).
+     */
+    verification?: {
+        startedAt: number;
+        endedAt: number;
+        failed: boolean;
+        errorMessage?: string;
+    };
     /**
      * For cached `page.ai.assert` invocations: the structured Playwright
      * assertion steps that were replayed. The reporter formats these back

package/dist/lib/page/extendPage.js CHANGED Viewed

@@ -220,8 +220,10 @@ Valid options:
         assert: async (assertion, options) => {
             const aiInvocationStartedAt = Date.now();
             let aiInvocationCacheHit = false;
+            let aiInvocationCacheStored = false;
             let aiInvocationError = undefined;
             let aiInvocationAssertSteps;
+            let aiInvocationVerification;
             try {
                 const useCache = options?.cache !== false;
                 const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
@@ -322,6 +324,7 @@ Valid options:
                 finally {
                     sharedState.envVals = previousEnvVals;
                 }
+                aiInvocationVerification = result.outcome.metadata?.verification;
                 if (!result.outcome.isSuccessful) {
                     throw new ToolCallFailedException_1.ToolCallFailedException(AssertTool_1.AssertTool.NAME, result.outcome);
                 }
@@ -333,6 +336,7 @@ Valid options:
                             const cache = getOrInitPageAiCache();
                             const pageUrl = (0, cacheLocator_1.extractCacheKeyHostname)(page.url());
                             await cache.putAssert({ pageUrl, assertion, steps });
+                            aiInvocationCacheStored = true;
                             Logger_1.appLogger.debug(`Assert cache STORED for: "${assertion}"`);
                         }
                         catch (error) {
@@ -352,11 +356,13 @@ Valid options:
                     startedAt: aiInvocationStartedAt,
                     endedAt: Date.now(),
                     cacheHit: aiInvocationCacheHit,
+                    cacheStored: aiInvocationCacheStored,
                     passed: aiInvocationError === undefined,
                     error: aiInvocationError !== undefined
                         ? { message: aiInvocationError?.message }
                         : undefined,
                     assertSteps: aiInvocationAssertSteps,
+                    verification: aiInvocationVerification,
                 });
             }
         },
@@ -434,6 +440,7 @@ Use this information to return an appropriate JSON object.`,
         locate: async (description, options) => {
             const aiInvocationStartedAt = Date.now();
             let aiInvocationCacheHit = false;
+            let aiInvocationCacheStored = false;
             let aiInvocationError = undefined;
             const useCache = options?.cache !== false;
             const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
@@ -525,6 +532,7 @@ Use this information to return an appropriate JSON object.`,
                     try {
                         const cache = getOrInitPageAiCache();
                         await cache.putLocate({ pageUrl, description, result });
+                        aiInvocationCacheStored = true;
                         Logger_1.appLogger.debug(`Locate cache STORED for: "${description}"`);
                     }
                     catch (error) {
@@ -545,6 +553,7 @@ Use this information to return an appropriate JSON object.`,
                     startedAt: aiInvocationStartedAt,
                     endedAt: Date.now(),
                     cacheHit: aiInvocationCacheHit,
+                    cacheStored: aiInvocationCacheStored,
                     passed: aiInvocationError === undefined,
                     error: aiInvocationError !== undefined
                         ? { message: aiInvocationError?.message }

package/dist/reporter/render.js CHANGED Viewed

@@ -577,18 +577,42 @@ function renderErrors(errors) {
     }
     return html;
 }
-function renderNativeStep(ns, childrenHtml) {
-    const statusIcon = ns.passed
-        ? '<span class="step-status-ok">&#10003;</span>'
-        : '<span class="step-status-fail">&#10007;</span>';
-    const categoryBadge = `<span class="native-step-badge native-step-badge--${ns.category}">${esc(ns.category)}</span>`;
+function renderNativeStep(ns, childrenHtml, verifyContext = false) {
+    // Expects inside an assert tool's cache-worthiness verification window are
+    // not real assertion checks — they're AssertTool re-running its own
+    // AI-emitted structured `expect()` calls to decide whether to cache them.
+    // When one fails, the AI's screenshot-based verdict still stands; only the
+    // structured locator faithfulness is in question. Render those with a
+    // distinct status (passed → "verified", failed → "diverged") so they
+    // don't look like assertion failures sitting under a passing assertion.
+    const statusIcon = verifyContext
+        ? ns.passed
+            ? '<span class="step-status-verified" title="Cache-verify check passed">&#10003;</span>'
+            : '<span class="step-status-diverged" title="Cache-verify locator did not match the AI&#39;s visual verdict">&#10073;</span>'
+        : ns.passed
+            ? '<span class="step-status-ok">&#10003;</span>'
+            : '<span class="step-status-fail">&#10007;</span>';
+    const categoryLabel = verifyContext
+        ? ns.passed
+            ? 'verify-cache'
+            : 'verify-cache diverged'
+        : ns.category;
+    const categoryClass = verifyContext
+        ? ns.passed
+            ? 'native-step-badge--verify'
+            : 'native-step-badge--verify-diverged'
+        : `native-step-badge--${ns.category}`;
+    const categoryBadge = `<span class="native-step-badge ${categoryClass}">${esc(categoryLabel)}</span>`;
     const locationStr = ns.location?.file
         ? esc(`${ns.location.file.replace(/.*[/\\]/, '')}:${ns.location.line}`)
         : '';
     const snippet = ns.location?.file
         ? readSourceSnippet(ns.location.file, ns.location.line)
         : null;
-    const hasError = !ns.passed && !!ns.error?.message;
+    // Cache-verify failures aren't surfaced as red errors; the message lives
+    // alongside the parent invocation's `cache · miss` pill instead. We still
+    // want the body open so the locator's call log is visible at a glance.
+    const hasError = !ns.passed && !!ns.error?.message && !verifyContext;
     const hasBody = !!snippet || hasError || !!childrenHtml;
     const renderHeader = (tag) => {
         let header = `<${tag} class="filmstrip-header">`;
@@ -609,9 +633,17 @@ function renderNativeStep(ns, childrenHtml) {
     // Failures always render expanded so the error is immediately visible.
     // test.step blocks with nested content also default open so users see
     // what's inside; bare passing expects with just a snippet collapse to
-    // keep tests with many assertions scannable.
-    const defaultOpen = !ns.passed || (ns.category === 'test.step' && !!childrenHtml);
-    const passClass = ns.passed ? 'native-step--passed' : 'native-step--failed';
+    // keep tests with many assertions scannable. Cache-verify divergences
+    // are routine signal — start collapsed so they don't dominate the view.
+    const defaultOpen = !verifyContext &&
+        (!ns.passed || (ns.category === 'test.step' && !!childrenHtml));
+    const passClass = verifyContext
+        ? ns.passed
+            ? 'native-step--verify'
+            : 'native-step--verify-diverged'
+        : ns.passed
+            ? 'native-step--passed'
+            : 'native-step--failed';
     let html = `<details class="filmstrip-step native-step expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
     html += renderHeader('summary');
     if (hasError) {
@@ -679,12 +711,31 @@ function renderAiInvocation(inv, childrenHtml) {
         ? '<span class="step-status-ok">&#10003;</span>'
         : '<span class="step-status-fail">&#10007;</span>';
     const kindBadge = `<span class="ai-invocation-badge ai-invocation-badge--${inv.kind}">${esc(AI_KIND_LABELS[inv.kind])}</span>`;
-    const cachedBadge = inv.cacheHit
-        ? '<span class="ai-cached-badge">cached</span>'
-        : '';
+    const cacheState = inv.cacheHit
+        ? 'hit'
+        : inv.cacheStored
+            ? 'stored'
+            : 'miss';
+    const cacheLabel = {
+        hit: 'cache · hit',
+        stored: 'cache · stored',
+        miss: 'cache · miss',
+    };
+    const cacheTitle = {
+        hit: 'Replayed from the page-AI cache. No AI call this run.',
+        stored: 'Live AI run; the resulting locators/steps were recorded to the page-AI cache. The next run can replay them without calling the AI.',
+        miss: "Live AI run; nothing was recorded to the page-AI cache. The next run will hit the AI again. For asserts, this typically means the AI's structured Playwright locators didn't reproduce its screenshot verdict.",
+    };
+    const cacheBadge = `<span class="ai-cache-badge ai-cache-badge--${cacheState}" title="${esc(cacheTitle[cacheState])}">${cacheLabel[cacheState]}</span>`;
+    // For a passing assert whose structured-step verifier failed, surface
+    // *why* the cache outcome was `miss`. The header pill carries the
+    // at-a-glance signal; this body content is the technical detail.
+    // (When the assert itself failed, the regular failure path already
+    // covers it.)
+    const showVerifierDetail = inv.passed && inv.verification?.failed === true;
     const hasError = !inv.passed && !!inv.error?.message;
     const hasAssertSteps = !!inv.assertSteps && inv.assertSteps.length > 0;
-    const hasBody = hasError || !!childrenHtml || hasAssertSteps;
+    const hasBody = hasError || !!childrenHtml || hasAssertSteps || showVerifierDetail;
     const renderHeader = (tag) => {
         let header = `<${tag} class="filmstrip-header">`;
         header +=
@@ -692,7 +743,7 @@ function renderAiInvocation(inv, childrenHtml) {
         header += statusIcon;
         header += `<span class="ai-invocation-title">${esc(inv.description)}</span>`;
         header += kindBadge;
-        header += cachedBadge;
+        header += cacheBadge;
         header += `</${tag}>`;
         return header;
     };
@@ -706,13 +757,20 @@ function renderAiInvocation(inv, childrenHtml) {
     // by default so the contents are visible without an extra click.
     const defaultOpen = !inv.passed || !!childrenHtml || hasAssertSteps;
     const passClass = inv.passed
-        ? 'ai-invocation--passed'
+        ? showVerifierDetail
+            ? 'ai-invocation--passed ai-invocation--cache-miss'
+            : 'ai-invocation--passed'
         : 'ai-invocation--failed';
     let html = `<details class="filmstrip-step ai-invocation expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
     html += renderHeader('summary');
     if (hasError) {
         html += `<pre class="native-step-error">${ansiToHtml(inv.error.message)}</pre>`;
     }
+    if (showVerifierDetail && inv.verification?.errorMessage) {
+        html +=
+            `<div class="ai-cache-miss-explainer">The AI&rsquo;s screenshot verdict (passed) is what counts. Its structured Playwright steps did not reproduce that verdict against the live page — most often an over-broad locator — so they were not cached. The diverging check is highlighted below.</div>` +
+                `<pre class="ai-cache-miss-detail">${ansiToHtml(inv.verification.errorMessage)}</pre>`;
+    }
     if (hasAssertSteps) {
         const lines = inv
             .assertSteps.map((s) => esc(formatAssertionStep(s)))
@@ -1070,6 +1128,28 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
             }
             return c;
         };
+        // A native step is part of an AssertTool cache-worthiness verification
+        // (rather than a user-authored assertion) iff its time window falls
+        // inside the `verification` window of some enclosing AI invocation.
+        // `verifyWindows` is the ordered list of those windows; `inVerify`
+        // checks membership without scanning the tree.
+        const verifyWindows = [];
+        for (const inv of aiInvocations) {
+            if (inv.verification) {
+                verifyWindows.push({
+                    start: inv.verification.startedAt,
+                    end: inv.verification.endedAt,
+                });
+            }
+        }
+        const inVerify = (t, tEnd) => {
+            for (const w of verifyWindows) {
+                if (t >= w.start && tEnd <= w.end) {
+                    return true;
+                }
+            }
+            return false;
+        };
         const renderNode = (node) => {
             if (node.kind === 'donobu') {
                 return renderFilmstripStep(node.ss, outputDir);
@@ -1083,7 +1163,7 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
             const childrenHtml = node.children.length > 0
                 ? `<div class="native-step-children">${node.children.map(renderNode).join('')}</div>`
                 : '';
-            return renderNativeStep(node.ns, childrenHtml);
+            return renderNativeStep(node.ns, childrenHtml, inVerify(node.t, node.tEnd));
         };
         const stepCount = countNodes(roots);
         let html = '<details class="steps-section"><summary>Steps (' +
@@ -1781,6 +1861,8 @@ body::before{content:'';position:fixed;top:-750px;left:50%;transform:translateX(
 .filmstrip-summary{font-size:11px;color:var(--text-dim);margin-top:2px;padding-left:44px}
 .step-status-ok{color:var(--green);font-size:12px;font-weight:bold}
 .step-status-fail{color:var(--red);font-size:12px;font-weight:bold}
+.step-status-verified{color:#94a3b8;font-size:12px;font-weight:bold}
+.step-status-diverged{color:#fbbf24;font-size:14px;font-weight:bold;line-height:1}
 .filmstrip-detail{display:none;padding:8px 0 4px 44px;flex-direction:row;gap:12px;align-items:flex-start}
 .filmstrip-step.open .filmstrip-detail{display:flex}
 .filmstrip-detail>a{flex-shrink:0;max-width:50%}
@@ -1840,6 +1922,8 @@ details.native-step>summary::-webkit-details-marker{display:none}
 .native-step-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0}
 .native-step-badge--expect{background:rgba(99,102,241,.12);color:#818cf8}
 .native-step-badge--test\.step{background:rgba(16,185,129,.10);color:#34d399}
+.native-step-badge--verify{background:rgba(148,163,184,.12);color:#94a3b8}
+.native-step-badge--verify-diverged{background:rgba(245,158,11,.12);color:#fbbf24}
 .native-step-location{font-size:10px;color:var(--text-dim);font-family:var(--mono);margin-left:auto;flex-shrink:0;white-space:nowrap}
 details.native-step[open]>summary .native-step-chevron{transform:rotate(90deg)}
 .native-step-error{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-muted)}
@@ -1860,7 +1944,17 @@ details.ai-invocation>summary::-webkit-details-marker{display:none}
 .ai-invocation-badge--act{background:rgba(168,85,247,.12);color:#c084fc}
 .ai-invocation-badge--assert{background:rgba(236,72,153,.12);color:#f472b6}
 .ai-invocation-badge--locate{background:rgba(59,130,246,.12);color:#60a5fa}
-.ai-cached-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;background:rgba(245,158,11,.12);color:#fbbf24}
+.ai-cache-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;font-family:var(--mono);cursor:help}
+.ai-cache-badge--hit{background:rgba(59,130,246,.12);color:#60a5fa}
+.ai-cache-badge--stored{background:rgba(52,211,153,.12);color:#34d399}
+.ai-cache-badge--miss{background:rgba(245,158,11,.12);color:#fbbf24}
+.ai-cache-miss-explainer{font-size:11px;color:var(--text-muted);padding:4px 0 2px 44px;line-height:1.45}
+.ai-cache-miss-detail{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-dim)}
+.ai-invocation--cache-miss>summary{box-shadow:inset 3px 0 0 0 rgba(245,158,11,.6)}
+.native-step--verify .snippet-line--target{background:rgba(148,163,184,.10)}
+.native-step--verify .snippet-line--target .snippet-linenum{color:#94a3b8}
+.native-step--verify-diverged .snippet-line--target{background:rgba(245,158,11,.10)}
+.native-step--verify-diverged .snippet-line--target .snippet-linenum{color:#fbbf24}
 details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)}
 .ai-assert-steps{font-size:11px;font-family:var(--mono);background:var(--bg);border:1px solid var(--border-subtle);border-radius:var(--radius);padding:8px 12px;margin:6px 0 2px 44px;color:var(--text-muted);white-space:pre-wrap;word-break:break-word;overflow-x:auto;max-height:240px;overflow-y:auto}
 .snippet-line{display:flex;padding:1px 8px;white-space:pre}

package/dist/tools/AssertTool.js CHANGED Viewed

@@ -207,18 +207,34 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
             // When the AI assertion passes and structured steps were returned,
             // verify the steps against the live page before considering them
             // cacheable. If the steps fail, discard them but still return the
-            // passing AI result.
+            // passing AI result. The verification window is recorded so the HTML
+            // reporter can label its `expect()` calls as cache-worthiness checks
+            // rather than treating an internal locator mismatch as an assertion
+            // failure.
             let verifiedSteps = assertionOutcome.output.playwrightAssertionSteps;
+            let verification;
             if (assertPassed &&
                 Array.isArray(verifiedSteps) &&
                 verifiedSteps.length > 0) {
+                const verifyStartedAt = Date.now();
                 try {
                     const executor = (0, assertCache_1.buildAssertExecutor)(verifiedSteps);
                     await executor({ page: page, envData: context.envData });
+                    verification = {
+                        startedAt: verifyStartedAt,
+                        endedAt: Date.now(),
+                        failed: false,
+                    };
                 }
                 catch (error) {
                     Logger_1.appLogger.debug(`Structured assertion steps failed verification for: "${parameters.assertionToTestFor}" — discarding steps. Error: ${error.message}`);
                     verifiedSteps = null;
+                    verification = {
+                        startedAt: verifyStartedAt,
+                        endedAt: Date.now(),
+                        failed: true,
+                        errorMessage: error.message,
+                    };
                 }
             }
             const result = {
@@ -227,6 +243,7 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
                 metadata: {
                     ...assertionOutcome.output,
                     playwrightAssertionSteps: verifiedSteps,
+                    verification,
                     attempt: attempt + 1,
                 },
             };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "donobu",
-  "version": "5.41.3",
+  "version": "5.41.4",
   "description": "Create browser automations with an LLM agent and replay them as Playwright scripts.",
   "main": "dist/main.js",
   "module": "dist/esm/main.js",