donobu 5.41.3 → 5.41.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/lib/ai/PageAi.js +3 -0
- package/dist/esm/lib/page/DonobuExtendedPage.d.ts +30 -0
- package/dist/esm/lib/page/extendPage.js +9 -0
- package/dist/esm/reporter/render.js +111 -17
- package/dist/esm/tools/AssertTool.js +18 -1
- package/dist/lib/ai/PageAi.js +3 -0
- package/dist/lib/page/DonobuExtendedPage.d.ts +30 -0
- package/dist/lib/page/extendPage.js +9 -0
- package/dist/reporter/render.js +111 -17
- package/dist/tools/AssertTool.js +18 -1
- package/package.json +1 -1
|
@@ -150,6 +150,7 @@ class PageAi {
|
|
|
150
150
|
async ai(page, instruction, options) {
|
|
151
151
|
const startedAt = Date.now();
|
|
152
152
|
let cacheHit = false;
|
|
153
|
+
let cacheStored = false;
|
|
153
154
|
let thrownError = undefined;
|
|
154
155
|
try {
|
|
155
156
|
const descriptor = this.buildDescriptor(page, instruction, options);
|
|
@@ -197,6 +198,7 @@ class PageAi {
|
|
|
197
198
|
}, this.donobu.toolRegistry);
|
|
198
199
|
const cacheEntry = cacheEntryBuilder_1.PageAiCacheEntryBuilder.fromMetadata(descriptor.key.pageUrl, runResult.donobuFlow.metadata, preparedToolCalls);
|
|
199
200
|
await this.cache.put(cacheEntry);
|
|
201
|
+
cacheStored = true;
|
|
200
202
|
}
|
|
201
203
|
return runResult.parsedResult;
|
|
202
204
|
}
|
|
@@ -212,6 +214,7 @@ class PageAi {
|
|
|
212
214
|
startedAt,
|
|
213
215
|
endedAt: Date.now(),
|
|
214
216
|
cacheHit,
|
|
217
|
+
cacheStored,
|
|
215
218
|
passed: thrownError === undefined,
|
|
216
219
|
error: thrownError !== undefined
|
|
217
220
|
? { message: thrownError?.message }
|
|
@@ -488,10 +488,40 @@ export interface AiInvocationRecord {
|
|
|
488
488
|
startedAt: number;
|
|
489
489
|
endedAt: number;
|
|
490
490
|
cacheHit: boolean;
|
|
491
|
+
/**
|
|
492
|
+
* For live (non-replay) invocations: `true` once this run successfully
|
|
493
|
+
* wrote an entry into the relevant page-AI cache, `false` if a write was
|
|
494
|
+
* attempted (or would have been) but didn't land. Combined with
|
|
495
|
+
* `cacheHit`, this gives the reporter a tri-state cache outcome — hit
|
|
496
|
+
* (replayed), stored (live + recorded), or miss (live + nothing cached).
|
|
497
|
+
* Always `false` when `cacheHit` is `true`.
|
|
498
|
+
*/
|
|
499
|
+
cacheStored: boolean;
|
|
491
500
|
passed: boolean;
|
|
492
501
|
error?: {
|
|
493
502
|
message?: string;
|
|
494
503
|
};
|
|
504
|
+
/**
|
|
505
|
+
* For live `page.ai.assert` runs: metadata about the post-pass structured
|
|
506
|
+
* step verification. After the AI judges the assertion passed against a
|
|
507
|
+
* screenshot, AssertTool re-executes the AI-emitted Playwright `expect()`
|
|
508
|
+
* calls against the page to decide whether those structured steps are
|
|
509
|
+
* cache-worthy. When `failed: true`, the AI's visual verdict still stands
|
|
510
|
+
* — the tool returns success — but one of the structured `expect()` calls
|
|
511
|
+
* underneath threw. The reporter uses this to surface the divergence as a
|
|
512
|
+
* labelled signal rather than render the inner expect failure as a regular
|
|
513
|
+
* assertion failure.
|
|
514
|
+
*
|
|
515
|
+
* Undefined when verification didn't run (no structured steps emitted, AI
|
|
516
|
+
* verdict was failed, cached replay path, or AssertTool invoked outside
|
|
517
|
+
* the page.ai.assert wrapper).
|
|
518
|
+
*/
|
|
519
|
+
verification?: {
|
|
520
|
+
startedAt: number;
|
|
521
|
+
endedAt: number;
|
|
522
|
+
failed: boolean;
|
|
523
|
+
errorMessage?: string;
|
|
524
|
+
};
|
|
495
525
|
/**
|
|
496
526
|
* For cached `page.ai.assert` invocations: the structured Playwright
|
|
497
527
|
* assertion steps that were replayed. The reporter formats these back
|
|
@@ -220,8 +220,10 @@ Valid options:
|
|
|
220
220
|
assert: async (assertion, options) => {
|
|
221
221
|
const aiInvocationStartedAt = Date.now();
|
|
222
222
|
let aiInvocationCacheHit = false;
|
|
223
|
+
let aiInvocationCacheStored = false;
|
|
223
224
|
let aiInvocationError = undefined;
|
|
224
225
|
let aiInvocationAssertSteps;
|
|
226
|
+
let aiInvocationVerification;
|
|
225
227
|
try {
|
|
226
228
|
const useCache = options?.cache !== false;
|
|
227
229
|
const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
|
|
@@ -322,6 +324,7 @@ Valid options:
|
|
|
322
324
|
finally {
|
|
323
325
|
sharedState.envVals = previousEnvVals;
|
|
324
326
|
}
|
|
327
|
+
aiInvocationVerification = result.outcome.metadata?.verification;
|
|
325
328
|
if (!result.outcome.isSuccessful) {
|
|
326
329
|
throw new ToolCallFailedException_1.ToolCallFailedException(AssertTool_1.AssertTool.NAME, result.outcome);
|
|
327
330
|
}
|
|
@@ -333,6 +336,7 @@ Valid options:
|
|
|
333
336
|
const cache = getOrInitPageAiCache();
|
|
334
337
|
const pageUrl = (0, cacheLocator_1.extractCacheKeyHostname)(page.url());
|
|
335
338
|
await cache.putAssert({ pageUrl, assertion, steps });
|
|
339
|
+
aiInvocationCacheStored = true;
|
|
336
340
|
Logger_1.appLogger.debug(`Assert cache STORED for: "${assertion}"`);
|
|
337
341
|
}
|
|
338
342
|
catch (error) {
|
|
@@ -352,11 +356,13 @@ Valid options:
|
|
|
352
356
|
startedAt: aiInvocationStartedAt,
|
|
353
357
|
endedAt: Date.now(),
|
|
354
358
|
cacheHit: aiInvocationCacheHit,
|
|
359
|
+
cacheStored: aiInvocationCacheStored,
|
|
355
360
|
passed: aiInvocationError === undefined,
|
|
356
361
|
error: aiInvocationError !== undefined
|
|
357
362
|
? { message: aiInvocationError?.message }
|
|
358
363
|
: undefined,
|
|
359
364
|
assertSteps: aiInvocationAssertSteps,
|
|
365
|
+
verification: aiInvocationVerification,
|
|
360
366
|
});
|
|
361
367
|
}
|
|
362
368
|
},
|
|
@@ -434,6 +440,7 @@ Use this information to return an appropriate JSON object.`,
|
|
|
434
440
|
locate: async (description, options) => {
|
|
435
441
|
const aiInvocationStartedAt = Date.now();
|
|
436
442
|
let aiInvocationCacheHit = false;
|
|
443
|
+
let aiInvocationCacheStored = false;
|
|
437
444
|
let aiInvocationError = undefined;
|
|
438
445
|
const useCache = options?.cache !== false;
|
|
439
446
|
const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
|
|
@@ -525,6 +532,7 @@ Use this information to return an appropriate JSON object.`,
|
|
|
525
532
|
try {
|
|
526
533
|
const cache = getOrInitPageAiCache();
|
|
527
534
|
await cache.putLocate({ pageUrl, description, result });
|
|
535
|
+
aiInvocationCacheStored = true;
|
|
528
536
|
Logger_1.appLogger.debug(`Locate cache STORED for: "${description}"`);
|
|
529
537
|
}
|
|
530
538
|
catch (error) {
|
|
@@ -545,6 +553,7 @@ Use this information to return an appropriate JSON object.`,
|
|
|
545
553
|
startedAt: aiInvocationStartedAt,
|
|
546
554
|
endedAt: Date.now(),
|
|
547
555
|
cacheHit: aiInvocationCacheHit,
|
|
556
|
+
cacheStored: aiInvocationCacheStored,
|
|
548
557
|
passed: aiInvocationError === undefined,
|
|
549
558
|
error: aiInvocationError !== undefined
|
|
550
559
|
? { message: aiInvocationError?.message }
|
|
@@ -577,18 +577,42 @@ function renderErrors(errors) {
|
|
|
577
577
|
}
|
|
578
578
|
return html;
|
|
579
579
|
}
|
|
580
|
-
function renderNativeStep(ns, childrenHtml) {
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
580
|
+
function renderNativeStep(ns, childrenHtml, verifyContext = false) {
|
|
581
|
+
// Expects inside an assert tool's cache-worthiness verification window are
|
|
582
|
+
// not real assertion checks — they're AssertTool re-running its own
|
|
583
|
+
// AI-emitted structured `expect()` calls to decide whether to cache them.
|
|
584
|
+
// When one fails, the AI's screenshot-based verdict still stands; only the
|
|
585
|
+
// structured locator faithfulness is in question. Render those with a
|
|
586
|
+
// distinct status (passed → "verified", failed → "diverged") so they
|
|
587
|
+
// don't look like assertion failures sitting under a passing assertion.
|
|
588
|
+
const statusIcon = verifyContext
|
|
589
|
+
? ns.passed
|
|
590
|
+
? '<span class="step-status-verified" title="Cache-verify check passed">✓</span>'
|
|
591
|
+
: '<span class="step-status-diverged" title="Cache-verify locator did not match the AI's visual verdict">❙</span>'
|
|
592
|
+
: ns.passed
|
|
593
|
+
? '<span class="step-status-ok">✓</span>'
|
|
594
|
+
: '<span class="step-status-fail">✗</span>';
|
|
595
|
+
const categoryLabel = verifyContext
|
|
596
|
+
? ns.passed
|
|
597
|
+
? 'verify-cache'
|
|
598
|
+
: 'verify-cache diverged'
|
|
599
|
+
: ns.category;
|
|
600
|
+
const categoryClass = verifyContext
|
|
601
|
+
? ns.passed
|
|
602
|
+
? 'native-step-badge--verify'
|
|
603
|
+
: 'native-step-badge--verify-diverged'
|
|
604
|
+
: `native-step-badge--${ns.category}`;
|
|
605
|
+
const categoryBadge = `<span class="native-step-badge ${categoryClass}">${esc(categoryLabel)}</span>`;
|
|
585
606
|
const locationStr = ns.location?.file
|
|
586
607
|
? esc(`${ns.location.file.replace(/.*[/\\]/, '')}:${ns.location.line}`)
|
|
587
608
|
: '';
|
|
588
609
|
const snippet = ns.location?.file
|
|
589
610
|
? readSourceSnippet(ns.location.file, ns.location.line)
|
|
590
611
|
: null;
|
|
591
|
-
|
|
612
|
+
// Cache-verify failures aren't surfaced as red errors; the message lives
|
|
613
|
+
// alongside the parent invocation's `cache · miss` pill instead. We still
|
|
614
|
+
// want the body open so the locator's call log is visible at a glance.
|
|
615
|
+
const hasError = !ns.passed && !!ns.error?.message && !verifyContext;
|
|
592
616
|
const hasBody = !!snippet || hasError || !!childrenHtml;
|
|
593
617
|
const renderHeader = (tag) => {
|
|
594
618
|
let header = `<${tag} class="filmstrip-header">`;
|
|
@@ -609,9 +633,17 @@ function renderNativeStep(ns, childrenHtml) {
|
|
|
609
633
|
// Failures always render expanded so the error is immediately visible.
|
|
610
634
|
// test.step blocks with nested content also default open so users see
|
|
611
635
|
// what's inside; bare passing expects with just a snippet collapse to
|
|
612
|
-
// keep tests with many assertions scannable.
|
|
613
|
-
|
|
614
|
-
const
|
|
636
|
+
// keep tests with many assertions scannable. Cache-verify divergences
|
|
637
|
+
// are routine signal — start collapsed so they don't dominate the view.
|
|
638
|
+
const defaultOpen = !verifyContext &&
|
|
639
|
+
(!ns.passed || (ns.category === 'test.step' && !!childrenHtml));
|
|
640
|
+
const passClass = verifyContext
|
|
641
|
+
? ns.passed
|
|
642
|
+
? 'native-step--verify'
|
|
643
|
+
: 'native-step--verify-diverged'
|
|
644
|
+
: ns.passed
|
|
645
|
+
? 'native-step--passed'
|
|
646
|
+
: 'native-step--failed';
|
|
615
647
|
let html = `<details class="filmstrip-step native-step expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
|
|
616
648
|
html += renderHeader('summary');
|
|
617
649
|
if (hasError) {
|
|
@@ -679,12 +711,31 @@ function renderAiInvocation(inv, childrenHtml) {
|
|
|
679
711
|
? '<span class="step-status-ok">✓</span>'
|
|
680
712
|
: '<span class="step-status-fail">✗</span>';
|
|
681
713
|
const kindBadge = `<span class="ai-invocation-badge ai-invocation-badge--${inv.kind}">${esc(AI_KIND_LABELS[inv.kind])}</span>`;
|
|
682
|
-
const
|
|
683
|
-
? '
|
|
684
|
-
:
|
|
714
|
+
const cacheState = inv.cacheHit
|
|
715
|
+
? 'hit'
|
|
716
|
+
: inv.cacheStored
|
|
717
|
+
? 'stored'
|
|
718
|
+
: 'miss';
|
|
719
|
+
const cacheLabel = {
|
|
720
|
+
hit: 'cache · hit',
|
|
721
|
+
stored: 'cache · stored',
|
|
722
|
+
miss: 'cache · miss',
|
|
723
|
+
};
|
|
724
|
+
const cacheTitle = {
|
|
725
|
+
hit: 'Replayed from the page-AI cache. No AI call this run.',
|
|
726
|
+
stored: 'Live AI run; the resulting locators/steps were recorded to the page-AI cache. The next run can replay them without calling the AI.',
|
|
727
|
+
miss: "Live AI run; nothing was recorded to the page-AI cache. The next run will hit the AI again. For asserts, this typically means the AI's structured Playwright locators didn't reproduce its screenshot verdict.",
|
|
728
|
+
};
|
|
729
|
+
const cacheBadge = `<span class="ai-cache-badge ai-cache-badge--${cacheState}" title="${esc(cacheTitle[cacheState])}">${cacheLabel[cacheState]}</span>`;
|
|
730
|
+
// For a passing assert whose structured-step verifier failed, surface
|
|
731
|
+
// *why* the cache outcome was `miss`. The header pill carries the
|
|
732
|
+
// at-a-glance signal; this body content is the technical detail.
|
|
733
|
+
// (When the assert itself failed, the regular failure path already
|
|
734
|
+
// covers it.)
|
|
735
|
+
const showVerifierDetail = inv.passed && inv.verification?.failed === true;
|
|
685
736
|
const hasError = !inv.passed && !!inv.error?.message;
|
|
686
737
|
const hasAssertSteps = !!inv.assertSteps && inv.assertSteps.length > 0;
|
|
687
|
-
const hasBody = hasError || !!childrenHtml || hasAssertSteps;
|
|
738
|
+
const hasBody = hasError || !!childrenHtml || hasAssertSteps || showVerifierDetail;
|
|
688
739
|
const renderHeader = (tag) => {
|
|
689
740
|
let header = `<${tag} class="filmstrip-header">`;
|
|
690
741
|
header +=
|
|
@@ -692,7 +743,7 @@ function renderAiInvocation(inv, childrenHtml) {
|
|
|
692
743
|
header += statusIcon;
|
|
693
744
|
header += `<span class="ai-invocation-title">${esc(inv.description)}</span>`;
|
|
694
745
|
header += kindBadge;
|
|
695
|
-
header +=
|
|
746
|
+
header += cacheBadge;
|
|
696
747
|
header += `</${tag}>`;
|
|
697
748
|
return header;
|
|
698
749
|
};
|
|
@@ -706,13 +757,20 @@ function renderAiInvocation(inv, childrenHtml) {
|
|
|
706
757
|
// by default so the contents are visible without an extra click.
|
|
707
758
|
const defaultOpen = !inv.passed || !!childrenHtml || hasAssertSteps;
|
|
708
759
|
const passClass = inv.passed
|
|
709
|
-
?
|
|
760
|
+
? showVerifierDetail
|
|
761
|
+
? 'ai-invocation--passed ai-invocation--cache-miss'
|
|
762
|
+
: 'ai-invocation--passed'
|
|
710
763
|
: 'ai-invocation--failed';
|
|
711
764
|
let html = `<details class="filmstrip-step ai-invocation expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
|
|
712
765
|
html += renderHeader('summary');
|
|
713
766
|
if (hasError) {
|
|
714
767
|
html += `<pre class="native-step-error">${ansiToHtml(inv.error.message)}</pre>`;
|
|
715
768
|
}
|
|
769
|
+
if (showVerifierDetail && inv.verification?.errorMessage) {
|
|
770
|
+
html +=
|
|
771
|
+
`<div class="ai-cache-miss-explainer">The AI’s screenshot verdict (passed) is what counts. Its structured Playwright steps did not reproduce that verdict against the live page — most often an over-broad locator — so they were not cached. The diverging check is highlighted below.</div>` +
|
|
772
|
+
`<pre class="ai-cache-miss-detail">${ansiToHtml(inv.verification.errorMessage)}</pre>`;
|
|
773
|
+
}
|
|
716
774
|
if (hasAssertSteps) {
|
|
717
775
|
const lines = inv
|
|
718
776
|
.assertSteps.map((s) => esc(formatAssertionStep(s)))
|
|
@@ -1070,6 +1128,28 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
|
|
|
1070
1128
|
}
|
|
1071
1129
|
return c;
|
|
1072
1130
|
};
|
|
1131
|
+
// A native step is part of an AssertTool cache-worthiness verification
|
|
1132
|
+
// (rather than a user-authored assertion) iff its time window falls
|
|
1133
|
+
// inside the `verification` window of some enclosing AI invocation.
|
|
1134
|
+
// `verifyWindows` is the ordered list of those windows; `inVerify`
|
|
1135
|
+
// checks membership without scanning the tree.
|
|
1136
|
+
const verifyWindows = [];
|
|
1137
|
+
for (const inv of aiInvocations) {
|
|
1138
|
+
if (inv.verification) {
|
|
1139
|
+
verifyWindows.push({
|
|
1140
|
+
start: inv.verification.startedAt,
|
|
1141
|
+
end: inv.verification.endedAt,
|
|
1142
|
+
});
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
const inVerify = (t, tEnd) => {
|
|
1146
|
+
for (const w of verifyWindows) {
|
|
1147
|
+
if (t >= w.start && tEnd <= w.end) {
|
|
1148
|
+
return true;
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
return false;
|
|
1152
|
+
};
|
|
1073
1153
|
const renderNode = (node) => {
|
|
1074
1154
|
if (node.kind === 'donobu') {
|
|
1075
1155
|
return renderFilmstripStep(node.ss, outputDir);
|
|
@@ -1083,7 +1163,7 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
|
|
|
1083
1163
|
const childrenHtml = node.children.length > 0
|
|
1084
1164
|
? `<div class="native-step-children">${node.children.map(renderNode).join('')}</div>`
|
|
1085
1165
|
: '';
|
|
1086
|
-
return renderNativeStep(node.ns, childrenHtml);
|
|
1166
|
+
return renderNativeStep(node.ns, childrenHtml, inVerify(node.t, node.tEnd));
|
|
1087
1167
|
};
|
|
1088
1168
|
const stepCount = countNodes(roots);
|
|
1089
1169
|
let html = '<details class="steps-section"><summary>Steps (' +
|
|
@@ -1781,6 +1861,8 @@ body::before{content:'';position:fixed;top:-750px;left:50%;transform:translateX(
|
|
|
1781
1861
|
.filmstrip-summary{font-size:11px;color:var(--text-dim);margin-top:2px;padding-left:44px}
|
|
1782
1862
|
.step-status-ok{color:var(--green);font-size:12px;font-weight:bold}
|
|
1783
1863
|
.step-status-fail{color:var(--red);font-size:12px;font-weight:bold}
|
|
1864
|
+
.step-status-verified{color:#94a3b8;font-size:12px;font-weight:bold}
|
|
1865
|
+
.step-status-diverged{color:#fbbf24;font-size:14px;font-weight:bold;line-height:1}
|
|
1784
1866
|
.filmstrip-detail{display:none;padding:8px 0 4px 44px;flex-direction:row;gap:12px;align-items:flex-start}
|
|
1785
1867
|
.filmstrip-step.open .filmstrip-detail{display:flex}
|
|
1786
1868
|
.filmstrip-detail>a{flex-shrink:0;max-width:50%}
|
|
@@ -1840,6 +1922,8 @@ details.native-step>summary::-webkit-details-marker{display:none}
|
|
|
1840
1922
|
.native-step-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0}
|
|
1841
1923
|
.native-step-badge--expect{background:rgba(99,102,241,.12);color:#818cf8}
|
|
1842
1924
|
.native-step-badge--test\.step{background:rgba(16,185,129,.10);color:#34d399}
|
|
1925
|
+
.native-step-badge--verify{background:rgba(148,163,184,.12);color:#94a3b8}
|
|
1926
|
+
.native-step-badge--verify-diverged{background:rgba(245,158,11,.12);color:#fbbf24}
|
|
1843
1927
|
.native-step-location{font-size:10px;color:var(--text-dim);font-family:var(--mono);margin-left:auto;flex-shrink:0;white-space:nowrap}
|
|
1844
1928
|
details.native-step[open]>summary .native-step-chevron{transform:rotate(90deg)}
|
|
1845
1929
|
.native-step-error{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-muted)}
|
|
@@ -1860,7 +1944,17 @@ details.ai-invocation>summary::-webkit-details-marker{display:none}
|
|
|
1860
1944
|
.ai-invocation-badge--act{background:rgba(168,85,247,.12);color:#c084fc}
|
|
1861
1945
|
.ai-invocation-badge--assert{background:rgba(236,72,153,.12);color:#f472b6}
|
|
1862
1946
|
.ai-invocation-badge--locate{background:rgba(59,130,246,.12);color:#60a5fa}
|
|
1863
|
-
.ai-
|
|
1947
|
+
.ai-cache-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;font-family:var(--mono);cursor:help}
|
|
1948
|
+
.ai-cache-badge--hit{background:rgba(59,130,246,.12);color:#60a5fa}
|
|
1949
|
+
.ai-cache-badge--stored{background:rgba(52,211,153,.12);color:#34d399}
|
|
1950
|
+
.ai-cache-badge--miss{background:rgba(245,158,11,.12);color:#fbbf24}
|
|
1951
|
+
.ai-cache-miss-explainer{font-size:11px;color:var(--text-muted);padding:4px 0 2px 44px;line-height:1.45}
|
|
1952
|
+
.ai-cache-miss-detail{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-dim)}
|
|
1953
|
+
.ai-invocation--cache-miss>summary{box-shadow:inset 3px 0 0 0 rgba(245,158,11,.6)}
|
|
1954
|
+
.native-step--verify .snippet-line--target{background:rgba(148,163,184,.10)}
|
|
1955
|
+
.native-step--verify .snippet-line--target .snippet-linenum{color:#94a3b8}
|
|
1956
|
+
.native-step--verify-diverged .snippet-line--target{background:rgba(245,158,11,.10)}
|
|
1957
|
+
.native-step--verify-diverged .snippet-line--target .snippet-linenum{color:#fbbf24}
|
|
1864
1958
|
details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)}
|
|
1865
1959
|
.ai-assert-steps{font-size:11px;font-family:var(--mono);background:var(--bg);border:1px solid var(--border-subtle);border-radius:var(--radius);padding:8px 12px;margin:6px 0 2px 44px;color:var(--text-muted);white-space:pre-wrap;word-break:break-word;overflow-x:auto;max-height:240px;overflow-y:auto}
|
|
1866
1960
|
.snippet-line{display:flex;padding:1px 8px;white-space:pre}
|
|
@@ -207,18 +207,34 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
|
|
|
207
207
|
// When the AI assertion passes and structured steps were returned,
|
|
208
208
|
// verify the steps against the live page before considering them
|
|
209
209
|
// cacheable. If the steps fail, discard them but still return the
|
|
210
|
-
// passing AI result.
|
|
210
|
+
// passing AI result. The verification window is recorded so the HTML
|
|
211
|
+
// reporter can label its `expect()` calls as cache-worthiness checks
|
|
212
|
+
// rather than treating an internal locator mismatch as an assertion
|
|
213
|
+
// failure.
|
|
211
214
|
let verifiedSteps = assertionOutcome.output.playwrightAssertionSteps;
|
|
215
|
+
let verification;
|
|
212
216
|
if (assertPassed &&
|
|
213
217
|
Array.isArray(verifiedSteps) &&
|
|
214
218
|
verifiedSteps.length > 0) {
|
|
219
|
+
const verifyStartedAt = Date.now();
|
|
215
220
|
try {
|
|
216
221
|
const executor = (0, assertCache_1.buildAssertExecutor)(verifiedSteps);
|
|
217
222
|
await executor({ page: page, envData: context.envData });
|
|
223
|
+
verification = {
|
|
224
|
+
startedAt: verifyStartedAt,
|
|
225
|
+
endedAt: Date.now(),
|
|
226
|
+
failed: false,
|
|
227
|
+
};
|
|
218
228
|
}
|
|
219
229
|
catch (error) {
|
|
220
230
|
Logger_1.appLogger.debug(`Structured assertion steps failed verification for: "${parameters.assertionToTestFor}" — discarding steps. Error: ${error.message}`);
|
|
221
231
|
verifiedSteps = null;
|
|
232
|
+
verification = {
|
|
233
|
+
startedAt: verifyStartedAt,
|
|
234
|
+
endedAt: Date.now(),
|
|
235
|
+
failed: true,
|
|
236
|
+
errorMessage: error.message,
|
|
237
|
+
};
|
|
222
238
|
}
|
|
223
239
|
}
|
|
224
240
|
const result = {
|
|
@@ -227,6 +243,7 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
|
|
|
227
243
|
metadata: {
|
|
228
244
|
...assertionOutcome.output,
|
|
229
245
|
playwrightAssertionSteps: verifiedSteps,
|
|
246
|
+
verification,
|
|
230
247
|
attempt: attempt + 1,
|
|
231
248
|
},
|
|
232
249
|
};
|
package/dist/lib/ai/PageAi.js
CHANGED
|
@@ -150,6 +150,7 @@ class PageAi {
|
|
|
150
150
|
async ai(page, instruction, options) {
|
|
151
151
|
const startedAt = Date.now();
|
|
152
152
|
let cacheHit = false;
|
|
153
|
+
let cacheStored = false;
|
|
153
154
|
let thrownError = undefined;
|
|
154
155
|
try {
|
|
155
156
|
const descriptor = this.buildDescriptor(page, instruction, options);
|
|
@@ -197,6 +198,7 @@ class PageAi {
|
|
|
197
198
|
}, this.donobu.toolRegistry);
|
|
198
199
|
const cacheEntry = cacheEntryBuilder_1.PageAiCacheEntryBuilder.fromMetadata(descriptor.key.pageUrl, runResult.donobuFlow.metadata, preparedToolCalls);
|
|
199
200
|
await this.cache.put(cacheEntry);
|
|
201
|
+
cacheStored = true;
|
|
200
202
|
}
|
|
201
203
|
return runResult.parsedResult;
|
|
202
204
|
}
|
|
@@ -212,6 +214,7 @@ class PageAi {
|
|
|
212
214
|
startedAt,
|
|
213
215
|
endedAt: Date.now(),
|
|
214
216
|
cacheHit,
|
|
217
|
+
cacheStored,
|
|
215
218
|
passed: thrownError === undefined,
|
|
216
219
|
error: thrownError !== undefined
|
|
217
220
|
? { message: thrownError?.message }
|
|
@@ -488,10 +488,40 @@ export interface AiInvocationRecord {
|
|
|
488
488
|
startedAt: number;
|
|
489
489
|
endedAt: number;
|
|
490
490
|
cacheHit: boolean;
|
|
491
|
+
/**
|
|
492
|
+
* For live (non-replay) invocations: `true` once this run successfully
|
|
493
|
+
* wrote an entry into the relevant page-AI cache, `false` if a write was
|
|
494
|
+
* attempted (or would have been) but didn't land. Combined with
|
|
495
|
+
* `cacheHit`, this gives the reporter a tri-state cache outcome — hit
|
|
496
|
+
* (replayed), stored (live + recorded), or miss (live + nothing cached).
|
|
497
|
+
* Always `false` when `cacheHit` is `true`.
|
|
498
|
+
*/
|
|
499
|
+
cacheStored: boolean;
|
|
491
500
|
passed: boolean;
|
|
492
501
|
error?: {
|
|
493
502
|
message?: string;
|
|
494
503
|
};
|
|
504
|
+
/**
|
|
505
|
+
* For live `page.ai.assert` runs: metadata about the post-pass structured
|
|
506
|
+
* step verification. After the AI judges the assertion passed against a
|
|
507
|
+
* screenshot, AssertTool re-executes the AI-emitted Playwright `expect()`
|
|
508
|
+
* calls against the page to decide whether those structured steps are
|
|
509
|
+
* cache-worthy. When `failed: true`, the AI's visual verdict still stands
|
|
510
|
+
* — the tool returns success — but one of the structured `expect()` calls
|
|
511
|
+
* underneath threw. The reporter uses this to surface the divergence as a
|
|
512
|
+
* labelled signal rather than render the inner expect failure as a regular
|
|
513
|
+
* assertion failure.
|
|
514
|
+
*
|
|
515
|
+
* Undefined when verification didn't run (no structured steps emitted, AI
|
|
516
|
+
* verdict was failed, cached replay path, or AssertTool invoked outside
|
|
517
|
+
* the page.ai.assert wrapper).
|
|
518
|
+
*/
|
|
519
|
+
verification?: {
|
|
520
|
+
startedAt: number;
|
|
521
|
+
endedAt: number;
|
|
522
|
+
failed: boolean;
|
|
523
|
+
errorMessage?: string;
|
|
524
|
+
};
|
|
495
525
|
/**
|
|
496
526
|
* For cached `page.ai.assert` invocations: the structured Playwright
|
|
497
527
|
* assertion steps that were replayed. The reporter formats these back
|
|
@@ -220,8 +220,10 @@ Valid options:
|
|
|
220
220
|
assert: async (assertion, options) => {
|
|
221
221
|
const aiInvocationStartedAt = Date.now();
|
|
222
222
|
let aiInvocationCacheHit = false;
|
|
223
|
+
let aiInvocationCacheStored = false;
|
|
223
224
|
let aiInvocationError = undefined;
|
|
224
225
|
let aiInvocationAssertSteps;
|
|
226
|
+
let aiInvocationVerification;
|
|
225
227
|
try {
|
|
226
228
|
const useCache = options?.cache !== false;
|
|
227
229
|
const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
|
|
@@ -322,6 +324,7 @@ Valid options:
|
|
|
322
324
|
finally {
|
|
323
325
|
sharedState.envVals = previousEnvVals;
|
|
324
326
|
}
|
|
327
|
+
aiInvocationVerification = result.outcome.metadata?.verification;
|
|
325
328
|
if (!result.outcome.isSuccessful) {
|
|
326
329
|
throw new ToolCallFailedException_1.ToolCallFailedException(AssertTool_1.AssertTool.NAME, result.outcome);
|
|
327
330
|
}
|
|
@@ -333,6 +336,7 @@ Valid options:
|
|
|
333
336
|
const cache = getOrInitPageAiCache();
|
|
334
337
|
const pageUrl = (0, cacheLocator_1.extractCacheKeyHostname)(page.url());
|
|
335
338
|
await cache.putAssert({ pageUrl, assertion, steps });
|
|
339
|
+
aiInvocationCacheStored = true;
|
|
336
340
|
Logger_1.appLogger.debug(`Assert cache STORED for: "${assertion}"`);
|
|
337
341
|
}
|
|
338
342
|
catch (error) {
|
|
@@ -352,11 +356,13 @@ Valid options:
|
|
|
352
356
|
startedAt: aiInvocationStartedAt,
|
|
353
357
|
endedAt: Date.now(),
|
|
354
358
|
cacheHit: aiInvocationCacheHit,
|
|
359
|
+
cacheStored: aiInvocationCacheStored,
|
|
355
360
|
passed: aiInvocationError === undefined,
|
|
356
361
|
error: aiInvocationError !== undefined
|
|
357
362
|
? { message: aiInvocationError?.message }
|
|
358
363
|
: undefined,
|
|
359
364
|
assertSteps: aiInvocationAssertSteps,
|
|
365
|
+
verification: aiInvocationVerification,
|
|
360
366
|
});
|
|
361
367
|
}
|
|
362
368
|
},
|
|
@@ -434,6 +440,7 @@ Use this information to return an appropriate JSON object.`,
|
|
|
434
440
|
locate: async (description, options) => {
|
|
435
441
|
const aiInvocationStartedAt = Date.now();
|
|
436
442
|
let aiInvocationCacheHit = false;
|
|
443
|
+
let aiInvocationCacheStored = false;
|
|
437
444
|
let aiInvocationError = undefined;
|
|
438
445
|
const useCache = options?.cache !== false;
|
|
439
446
|
const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
|
|
@@ -525,6 +532,7 @@ Use this information to return an appropriate JSON object.`,
|
|
|
525
532
|
try {
|
|
526
533
|
const cache = getOrInitPageAiCache();
|
|
527
534
|
await cache.putLocate({ pageUrl, description, result });
|
|
535
|
+
aiInvocationCacheStored = true;
|
|
528
536
|
Logger_1.appLogger.debug(`Locate cache STORED for: "${description}"`);
|
|
529
537
|
}
|
|
530
538
|
catch (error) {
|
|
@@ -545,6 +553,7 @@ Use this information to return an appropriate JSON object.`,
|
|
|
545
553
|
startedAt: aiInvocationStartedAt,
|
|
546
554
|
endedAt: Date.now(),
|
|
547
555
|
cacheHit: aiInvocationCacheHit,
|
|
556
|
+
cacheStored: aiInvocationCacheStored,
|
|
548
557
|
passed: aiInvocationError === undefined,
|
|
549
558
|
error: aiInvocationError !== undefined
|
|
550
559
|
? { message: aiInvocationError?.message }
|
package/dist/reporter/render.js
CHANGED
|
@@ -577,18 +577,42 @@ function renderErrors(errors) {
|
|
|
577
577
|
}
|
|
578
578
|
return html;
|
|
579
579
|
}
|
|
580
|
-
function renderNativeStep(ns, childrenHtml) {
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
580
|
+
function renderNativeStep(ns, childrenHtml, verifyContext = false) {
|
|
581
|
+
// Expects inside an assert tool's cache-worthiness verification window are
|
|
582
|
+
// not real assertion checks — they're AssertTool re-running its own
|
|
583
|
+
// AI-emitted structured `expect()` calls to decide whether to cache them.
|
|
584
|
+
// When one fails, the AI's screenshot-based verdict still stands; only the
|
|
585
|
+
// structured locator faithfulness is in question. Render those with a
|
|
586
|
+
// distinct status (passed → "verified", failed → "diverged") so they
|
|
587
|
+
// don't look like assertion failures sitting under a passing assertion.
|
|
588
|
+
const statusIcon = verifyContext
|
|
589
|
+
? ns.passed
|
|
590
|
+
? '<span class="step-status-verified" title="Cache-verify check passed">✓</span>'
|
|
591
|
+
: '<span class="step-status-diverged" title="Cache-verify locator did not match the AI's visual verdict">❙</span>'
|
|
592
|
+
: ns.passed
|
|
593
|
+
? '<span class="step-status-ok">✓</span>'
|
|
594
|
+
: '<span class="step-status-fail">✗</span>';
|
|
595
|
+
const categoryLabel = verifyContext
|
|
596
|
+
? ns.passed
|
|
597
|
+
? 'verify-cache'
|
|
598
|
+
: 'verify-cache diverged'
|
|
599
|
+
: ns.category;
|
|
600
|
+
const categoryClass = verifyContext
|
|
601
|
+
? ns.passed
|
|
602
|
+
? 'native-step-badge--verify'
|
|
603
|
+
: 'native-step-badge--verify-diverged'
|
|
604
|
+
: `native-step-badge--${ns.category}`;
|
|
605
|
+
const categoryBadge = `<span class="native-step-badge ${categoryClass}">${esc(categoryLabel)}</span>`;
|
|
585
606
|
const locationStr = ns.location?.file
|
|
586
607
|
? esc(`${ns.location.file.replace(/.*[/\\]/, '')}:${ns.location.line}`)
|
|
587
608
|
: '';
|
|
588
609
|
const snippet = ns.location?.file
|
|
589
610
|
? readSourceSnippet(ns.location.file, ns.location.line)
|
|
590
611
|
: null;
|
|
591
|
-
|
|
612
|
+
// Cache-verify failures aren't surfaced as red errors; the message lives
|
|
613
|
+
// alongside the parent invocation's `cache · miss` pill instead. We still
|
|
614
|
+
// want the body open so the locator's call log is visible at a glance.
|
|
615
|
+
const hasError = !ns.passed && !!ns.error?.message && !verifyContext;
|
|
592
616
|
const hasBody = !!snippet || hasError || !!childrenHtml;
|
|
593
617
|
const renderHeader = (tag) => {
|
|
594
618
|
let header = `<${tag} class="filmstrip-header">`;
|
|
@@ -609,9 +633,17 @@ function renderNativeStep(ns, childrenHtml) {
|
|
|
609
633
|
// Failures always render expanded so the error is immediately visible.
|
|
610
634
|
// test.step blocks with nested content also default open so users see
|
|
611
635
|
// what's inside; bare passing expects with just a snippet collapse to
|
|
612
|
-
// keep tests with many assertions scannable.
|
|
613
|
-
|
|
614
|
-
const
|
|
636
|
+
// keep tests with many assertions scannable. Cache-verify divergences
|
|
637
|
+
// are routine signal — start collapsed so they don't dominate the view.
|
|
638
|
+
const defaultOpen = !verifyContext &&
|
|
639
|
+
(!ns.passed || (ns.category === 'test.step' && !!childrenHtml));
|
|
640
|
+
const passClass = verifyContext
|
|
641
|
+
? ns.passed
|
|
642
|
+
? 'native-step--verify'
|
|
643
|
+
: 'native-step--verify-diverged'
|
|
644
|
+
: ns.passed
|
|
645
|
+
? 'native-step--passed'
|
|
646
|
+
: 'native-step--failed';
|
|
615
647
|
let html = `<details class="filmstrip-step native-step expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
|
|
616
648
|
html += renderHeader('summary');
|
|
617
649
|
if (hasError) {
|
|
@@ -679,12 +711,31 @@ function renderAiInvocation(inv, childrenHtml) {
|
|
|
679
711
|
? '<span class="step-status-ok">✓</span>'
|
|
680
712
|
: '<span class="step-status-fail">✗</span>';
|
|
681
713
|
const kindBadge = `<span class="ai-invocation-badge ai-invocation-badge--${inv.kind}">${esc(AI_KIND_LABELS[inv.kind])}</span>`;
|
|
682
|
-
const
|
|
683
|
-
? '
|
|
684
|
-
:
|
|
714
|
+
const cacheState = inv.cacheHit
|
|
715
|
+
? 'hit'
|
|
716
|
+
: inv.cacheStored
|
|
717
|
+
? 'stored'
|
|
718
|
+
: 'miss';
|
|
719
|
+
const cacheLabel = {
|
|
720
|
+
hit: 'cache · hit',
|
|
721
|
+
stored: 'cache · stored',
|
|
722
|
+
miss: 'cache · miss',
|
|
723
|
+
};
|
|
724
|
+
const cacheTitle = {
|
|
725
|
+
hit: 'Replayed from the page-AI cache. No AI call this run.',
|
|
726
|
+
stored: 'Live AI run; the resulting locators/steps were recorded to the page-AI cache. The next run can replay them without calling the AI.',
|
|
727
|
+
miss: "Live AI run; nothing was recorded to the page-AI cache. The next run will hit the AI again. For asserts, this typically means the AI's structured Playwright locators didn't reproduce its screenshot verdict.",
|
|
728
|
+
};
|
|
729
|
+
const cacheBadge = `<span class="ai-cache-badge ai-cache-badge--${cacheState}" title="${esc(cacheTitle[cacheState])}">${cacheLabel[cacheState]}</span>`;
|
|
730
|
+
// For a passing assert whose structured-step verifier failed, surface
|
|
731
|
+
// *why* the cache outcome was `miss`. The header pill carries the
|
|
732
|
+
// at-a-glance signal; this body content is the technical detail.
|
|
733
|
+
// (When the assert itself failed, the regular failure path already
|
|
734
|
+
// covers it.)
|
|
735
|
+
const showVerifierDetail = inv.passed && inv.verification?.failed === true;
|
|
685
736
|
const hasError = !inv.passed && !!inv.error?.message;
|
|
686
737
|
const hasAssertSteps = !!inv.assertSteps && inv.assertSteps.length > 0;
|
|
687
|
-
const hasBody = hasError || !!childrenHtml || hasAssertSteps;
|
|
738
|
+
const hasBody = hasError || !!childrenHtml || hasAssertSteps || showVerifierDetail;
|
|
688
739
|
const renderHeader = (tag) => {
|
|
689
740
|
let header = `<${tag} class="filmstrip-header">`;
|
|
690
741
|
header +=
|
|
@@ -692,7 +743,7 @@ function renderAiInvocation(inv, childrenHtml) {
|
|
|
692
743
|
header += statusIcon;
|
|
693
744
|
header += `<span class="ai-invocation-title">${esc(inv.description)}</span>`;
|
|
694
745
|
header += kindBadge;
|
|
695
|
-
header +=
|
|
746
|
+
header += cacheBadge;
|
|
696
747
|
header += `</${tag}>`;
|
|
697
748
|
return header;
|
|
698
749
|
};
|
|
@@ -706,13 +757,20 @@ function renderAiInvocation(inv, childrenHtml) {
|
|
|
706
757
|
// by default so the contents are visible without an extra click.
|
|
707
758
|
const defaultOpen = !inv.passed || !!childrenHtml || hasAssertSteps;
|
|
708
759
|
const passClass = inv.passed
|
|
709
|
-
?
|
|
760
|
+
? showVerifierDetail
|
|
761
|
+
? 'ai-invocation--passed ai-invocation--cache-miss'
|
|
762
|
+
: 'ai-invocation--passed'
|
|
710
763
|
: 'ai-invocation--failed';
|
|
711
764
|
let html = `<details class="filmstrip-step ai-invocation expandable ${passClass}"${defaultOpen ? ' open' : ''}>`;
|
|
712
765
|
html += renderHeader('summary');
|
|
713
766
|
if (hasError) {
|
|
714
767
|
html += `<pre class="native-step-error">${ansiToHtml(inv.error.message)}</pre>`;
|
|
715
768
|
}
|
|
769
|
+
if (showVerifierDetail && inv.verification?.errorMessage) {
|
|
770
|
+
html +=
|
|
771
|
+
`<div class="ai-cache-miss-explainer">The AI’s screenshot verdict (passed) is what counts. Its structured Playwright steps did not reproduce that verdict against the live page — most often an over-broad locator — so they were not cached. The diverging check is highlighted below.</div>` +
|
|
772
|
+
`<pre class="ai-cache-miss-detail">${ansiToHtml(inv.verification.errorMessage)}</pre>`;
|
|
773
|
+
}
|
|
716
774
|
if (hasAssertSteps) {
|
|
717
775
|
const lines = inv
|
|
718
776
|
.assertSteps.map((s) => esc(formatAssertionStep(s)))
|
|
@@ -1070,6 +1128,28 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
|
|
|
1070
1128
|
}
|
|
1071
1129
|
return c;
|
|
1072
1130
|
};
|
|
1131
|
+
// A native step is part of an AssertTool cache-worthiness verification
|
|
1132
|
+
// (rather than a user-authored assertion) iff its time window falls
|
|
1133
|
+
// inside the `verification` window of some enclosing AI invocation.
|
|
1134
|
+
// `verifyWindows` is the ordered list of those windows; `inVerify`
|
|
1135
|
+
// checks membership without scanning the tree.
|
|
1136
|
+
const verifyWindows = [];
|
|
1137
|
+
for (const inv of aiInvocations) {
|
|
1138
|
+
if (inv.verification) {
|
|
1139
|
+
verifyWindows.push({
|
|
1140
|
+
start: inv.verification.startedAt,
|
|
1141
|
+
end: inv.verification.endedAt,
|
|
1142
|
+
});
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
const inVerify = (t, tEnd) => {
|
|
1146
|
+
for (const w of verifyWindows) {
|
|
1147
|
+
if (t >= w.start && tEnd <= w.end) {
|
|
1148
|
+
return true;
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
return false;
|
|
1152
|
+
};
|
|
1073
1153
|
const renderNode = (node) => {
|
|
1074
1154
|
if (node.kind === 'donobu') {
|
|
1075
1155
|
return renderFilmstripStep(node.ss, outputDir);
|
|
@@ -1083,7 +1163,7 @@ function renderSteps(steps, stepScreenshots, nativeSteps, aiInvocations, outputD
|
|
|
1083
1163
|
const childrenHtml = node.children.length > 0
|
|
1084
1164
|
? `<div class="native-step-children">${node.children.map(renderNode).join('')}</div>`
|
|
1085
1165
|
: '';
|
|
1086
|
-
return renderNativeStep(node.ns, childrenHtml);
|
|
1166
|
+
return renderNativeStep(node.ns, childrenHtml, inVerify(node.t, node.tEnd));
|
|
1087
1167
|
};
|
|
1088
1168
|
const stepCount = countNodes(roots);
|
|
1089
1169
|
let html = '<details class="steps-section"><summary>Steps (' +
|
|
@@ -1781,6 +1861,8 @@ body::before{content:'';position:fixed;top:-750px;left:50%;transform:translateX(
|
|
|
1781
1861
|
.filmstrip-summary{font-size:11px;color:var(--text-dim);margin-top:2px;padding-left:44px}
|
|
1782
1862
|
.step-status-ok{color:var(--green);font-size:12px;font-weight:bold}
|
|
1783
1863
|
.step-status-fail{color:var(--red);font-size:12px;font-weight:bold}
|
|
1864
|
+
.step-status-verified{color:#94a3b8;font-size:12px;font-weight:bold}
|
|
1865
|
+
.step-status-diverged{color:#fbbf24;font-size:14px;font-weight:bold;line-height:1}
|
|
1784
1866
|
.filmstrip-detail{display:none;padding:8px 0 4px 44px;flex-direction:row;gap:12px;align-items:flex-start}
|
|
1785
1867
|
.filmstrip-step.open .filmstrip-detail{display:flex}
|
|
1786
1868
|
.filmstrip-detail>a{flex-shrink:0;max-width:50%}
|
|
@@ -1840,6 +1922,8 @@ details.native-step>summary::-webkit-details-marker{display:none}
|
|
|
1840
1922
|
.native-step-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0}
|
|
1841
1923
|
.native-step-badge--expect{background:rgba(99,102,241,.12);color:#818cf8}
|
|
1842
1924
|
.native-step-badge--test\.step{background:rgba(16,185,129,.10);color:#34d399}
|
|
1925
|
+
.native-step-badge--verify{background:rgba(148,163,184,.12);color:#94a3b8}
|
|
1926
|
+
.native-step-badge--verify-diverged{background:rgba(245,158,11,.12);color:#fbbf24}
|
|
1843
1927
|
.native-step-location{font-size:10px;color:var(--text-dim);font-family:var(--mono);margin-left:auto;flex-shrink:0;white-space:nowrap}
|
|
1844
1928
|
details.native-step[open]>summary .native-step-chevron{transform:rotate(90deg)}
|
|
1845
1929
|
.native-step-error{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-muted)}
|
|
@@ -1860,7 +1944,17 @@ details.ai-invocation>summary::-webkit-details-marker{display:none}
|
|
|
1860
1944
|
.ai-invocation-badge--act{background:rgba(168,85,247,.12);color:#c084fc}
|
|
1861
1945
|
.ai-invocation-badge--assert{background:rgba(236,72,153,.12);color:#f472b6}
|
|
1862
1946
|
.ai-invocation-badge--locate{background:rgba(59,130,246,.12);color:#60a5fa}
|
|
1863
|
-
.ai-
|
|
1947
|
+
.ai-cache-badge{font-size:10px;font-weight:600;padding:1px 5px;border-radius:3px;white-space:nowrap;flex-shrink:0;font-family:var(--mono);cursor:help}
|
|
1948
|
+
.ai-cache-badge--hit{background:rgba(59,130,246,.12);color:#60a5fa}
|
|
1949
|
+
.ai-cache-badge--stored{background:rgba(52,211,153,.12);color:#34d399}
|
|
1950
|
+
.ai-cache-badge--miss{background:rgba(245,158,11,.12);color:#fbbf24}
|
|
1951
|
+
.ai-cache-miss-explainer{font-size:11px;color:var(--text-muted);padding:4px 0 2px 44px;line-height:1.45}
|
|
1952
|
+
.ai-cache-miss-detail{font-size:11px;font-family:var(--mono);padding:4px 0 2px 44px;margin:0;white-space:pre-wrap;word-break:break-word;color:var(--text-dim)}
|
|
1953
|
+
.ai-invocation--cache-miss>summary{box-shadow:inset 3px 0 0 0 rgba(245,158,11,.6)}
|
|
1954
|
+
.native-step--verify .snippet-line--target{background:rgba(148,163,184,.10)}
|
|
1955
|
+
.native-step--verify .snippet-line--target .snippet-linenum{color:#94a3b8}
|
|
1956
|
+
.native-step--verify-diverged .snippet-line--target{background:rgba(245,158,11,.10)}
|
|
1957
|
+
.native-step--verify-diverged .snippet-line--target .snippet-linenum{color:#fbbf24}
|
|
1864
1958
|
details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)}
|
|
1865
1959
|
.ai-assert-steps{font-size:11px;font-family:var(--mono);background:var(--bg);border:1px solid var(--border-subtle);border-radius:var(--radius);padding:8px 12px;margin:6px 0 2px 44px;color:var(--text-muted);white-space:pre-wrap;word-break:break-word;overflow-x:auto;max-height:240px;overflow-y:auto}
|
|
1866
1960
|
.snippet-line{display:flex;padding:1px 8px;white-space:pre}
|
package/dist/tools/AssertTool.js
CHANGED
|
@@ -207,18 +207,34 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
|
|
|
207
207
|
// When the AI assertion passes and structured steps were returned,
|
|
208
208
|
// verify the steps against the live page before considering them
|
|
209
209
|
// cacheable. If the steps fail, discard them but still return the
|
|
210
|
-
// passing AI result.
|
|
210
|
+
// passing AI result. The verification window is recorded so the HTML
|
|
211
|
+
// reporter can label its `expect()` calls as cache-worthiness checks
|
|
212
|
+
// rather than treating an internal locator mismatch as an assertion
|
|
213
|
+
// failure.
|
|
211
214
|
let verifiedSteps = assertionOutcome.output.playwrightAssertionSteps;
|
|
215
|
+
let verification;
|
|
212
216
|
if (assertPassed &&
|
|
213
217
|
Array.isArray(verifiedSteps) &&
|
|
214
218
|
verifiedSteps.length > 0) {
|
|
219
|
+
const verifyStartedAt = Date.now();
|
|
215
220
|
try {
|
|
216
221
|
const executor = (0, assertCache_1.buildAssertExecutor)(verifiedSteps);
|
|
217
222
|
await executor({ page: page, envData: context.envData });
|
|
223
|
+
verification = {
|
|
224
|
+
startedAt: verifyStartedAt,
|
|
225
|
+
endedAt: Date.now(),
|
|
226
|
+
failed: false,
|
|
227
|
+
};
|
|
218
228
|
}
|
|
219
229
|
catch (error) {
|
|
220
230
|
Logger_1.appLogger.debug(`Structured assertion steps failed verification for: "${parameters.assertionToTestFor}" — discarding steps. Error: ${error.message}`);
|
|
221
231
|
verifiedSteps = null;
|
|
232
|
+
verification = {
|
|
233
|
+
startedAt: verifyStartedAt,
|
|
234
|
+
endedAt: Date.now(),
|
|
235
|
+
failed: true,
|
|
236
|
+
errorMessage: error.message,
|
|
237
|
+
};
|
|
222
238
|
}
|
|
223
239
|
}
|
|
224
240
|
const result = {
|
|
@@ -227,6 +243,7 @@ careful positioning lost, etc. A screenshot of the webpage has also been provide
|
|
|
227
243
|
metadata: {
|
|
228
244
|
...assertionOutcome.output,
|
|
229
245
|
playwrightAssertionSteps: verifiedSteps,
|
|
246
|
+
verification,
|
|
230
247
|
attempt: attempt + 1,
|
|
231
248
|
},
|
|
232
249
|
};
|