donobu 5.46.0 → 5.48.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/donobu-cli.js +90 -34
- package/dist/esm/cli/donobu-cli.js +90 -34
- package/dist/esm/lib/test/testExtension.js +38 -0
- package/dist/esm/lib/test/utils/triageTestFailure.d.ts +27 -5
- package/dist/esm/lib/test/utils/triageTestFailure.js +80 -37
- package/dist/esm/reporter/render.js +108 -15
- package/dist/lib/test/testExtension.js +38 -0
- package/dist/lib/test/utils/triageTestFailure.d.ts +27 -5
- package/dist/lib/test/utils/triageTestFailure.js +80 -37
- package/dist/reporter/render.js +108 -15
- package/package.json +1 -1
|
@@ -37,6 +37,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
37
37
|
};
|
|
38
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
39
|
exports.TRIAGE_PERSISTENCE_FILE_IDS = exports.TreatmentPlan = exports.RemediationStepSchema = exports.FailureReasonSchema = exports.AdditionalDataRequestSchema = void 0;
|
|
40
|
+
exports.captureLivePageScreenshot = captureLivePageScreenshot;
|
|
40
41
|
exports.deriveHeuristicAssessment = deriveHeuristicAssessment;
|
|
41
42
|
exports.deriveHistoricalSignals = deriveHistoricalSignals;
|
|
42
43
|
exports.gatherTestFailureEvidence = gatherTestFailureEvidence;
|
|
@@ -79,9 +80,10 @@ const cacheLocator_1 = require("../../ai/cache/cacheLocator");
|
|
|
79
80
|
* history from the persistence layer.
|
|
80
81
|
* 3. Fetches **historical runs** of the same flow (by name) from the flows manager to
|
|
81
82
|
* detect flakiness, regression patterns, and prior self-heal success.
|
|
82
|
-
* 4. Captures the **failure screenshot** (
|
|
83
|
-
*
|
|
84
|
-
*
|
|
83
|
+
* 4. Captures the **failure screenshot** (a live screenshot taken at triage time, while
|
|
84
|
+
* the page is still open during teardown, so it reflects the true final state) and the
|
|
85
|
+
* **baseline screenshot** (last tool call screenshot from the most recent successful
|
|
86
|
+
* historical run) for visual comparison.
|
|
85
87
|
* 5. Reads the source of the failing test case for contextual grounding.
|
|
86
88
|
* 6. Runs the **heuristic classifier** (`deriveHeuristicAssessment`) which uses
|
|
87
89
|
* rule-based pattern matching over errors, tool calls, stale-cache indicators,
|
|
@@ -121,7 +123,7 @@ const cacheLocator_1 = require("../../ai/cache/cacheLocator");
|
|
|
121
123
|
* | Flow metadata | `DonobuExtendedPage._dnb` | Run mode, objective, allowed tools, timing |
|
|
122
124
|
* | Stale cache indicators | Derived from above | Whether page.ai cache staleness is the root cause |
|
|
123
125
|
* | Historical flow runs | `DonobuFlowsManager.getFlows` | Flakiness, regression patterns, prior self-heal |
|
|
124
|
-
* | Failure screenshot |
|
|
126
|
+
* | Failure screenshot | Live capture at triage time | True final visual state of the page when it failed |
|
|
125
127
|
* | Baseline screenshot | Last successful run's screenshot | Visual reference for what the page *should* look like |
|
|
126
128
|
* | Test source snippet | TypeScript AST parsing | The test's expectations and structure |
|
|
127
129
|
*
|
|
@@ -331,6 +333,14 @@ const TRIAGE_PERSISTENCE_FILE_IDS = {
|
|
|
331
333
|
evidence: 'triage-evidence.json',
|
|
332
334
|
failureScreenshot: 'triage-failure-screenshot.png',
|
|
333
335
|
baselineScreenshot: 'triage-baseline-screenshot.png',
|
|
336
|
+
/**
|
|
337
|
+
* Live screenshot of a flow's final visual state, captured at teardown while
|
|
338
|
+
* the page is still open. Persisted on successful runs so that a *later*
|
|
339
|
+
* failing run can use it as a true final-state baseline — symmetric with the
|
|
340
|
+
* failure screenshot, which is also a live end-of-test capture. Keyed per
|
|
341
|
+
* flow, like browser state.
|
|
342
|
+
*/
|
|
343
|
+
finalStateScreenshot: 'triage-final-state-screenshot.png',
|
|
334
344
|
};
|
|
335
345
|
exports.TRIAGE_PERSISTENCE_FILE_IDS = TRIAGE_PERSISTENCE_FILE_IDS;
|
|
336
346
|
/**
|
|
@@ -554,41 +564,59 @@ async function fetchFlowHistory(page) {
|
|
|
554
564
|
}
|
|
555
565
|
}
|
|
556
566
|
/**
|
|
557
|
-
*
|
|
558
|
-
*
|
|
559
|
-
*
|
|
567
|
+
* Captures a fresh screenshot of the page's current visual state. Called at
|
|
568
|
+
* teardown (failure triage and successful-run baseline capture) while the
|
|
569
|
+
* page/context is still open, so it reflects the true *end state* of the test.
|
|
570
|
+
*
|
|
571
|
+
* This is deliberately preferred over the last Donobu tool-call screenshot:
|
|
572
|
+
* Playwright `expect`/`waitFor` are not tool calls, so the last tool-call image
|
|
573
|
+
* can predate the failing assertion and capture a transient state (e.g. a
|
|
574
|
+
* loading spinner that has since resolved), which misleads the vision model.
|
|
575
|
+
* Fails open — returns null if the page is gone or unresponsive (crash, closed
|
|
576
|
+
* context, hang), in which case the caller proceeds without a screenshot.
|
|
560
577
|
*/
|
|
561
|
-
async function
|
|
562
|
-
const flowId = page._dnb?.donobuFlowMetadata?.id;
|
|
563
|
-
const persistence = page._dnb?.persistence;
|
|
564
|
-
if (!flowId || !persistence) {
|
|
565
|
-
return null;
|
|
566
|
-
}
|
|
578
|
+
async function captureLivePageScreenshot(page) {
|
|
567
579
|
try {
|
|
568
|
-
|
|
569
|
-
if (toolCalls.length === 0) {
|
|
570
|
-
return null;
|
|
571
|
-
}
|
|
572
|
-
// Walk backwards to find the last tool call with a screenshot
|
|
573
|
-
for (let i = toolCalls.length - 1; i >= 0; i--) {
|
|
574
|
-
const screenshotId = toolCalls[i].postCallImageId;
|
|
575
|
-
if (screenshotId) {
|
|
576
|
-
return await persistence.getScreenShot(flowId, screenshotId);
|
|
577
|
-
}
|
|
578
|
-
}
|
|
579
|
-
return null;
|
|
580
|
+
return await page.screenshot({ animations: 'disabled', timeout: 10000 });
|
|
580
581
|
}
|
|
581
582
|
catch (error) {
|
|
582
|
-
Logger_1.appLogger.debug(
|
|
583
|
+
Logger_1.appLogger.debug('Failed to capture live page screenshot; proceeding without it.', error);
|
|
583
584
|
return null;
|
|
584
585
|
}
|
|
585
586
|
}
|
|
586
587
|
/**
|
|
587
|
-
*
|
|
588
|
-
*
|
|
589
|
-
*
|
|
590
|
-
*
|
|
591
|
-
|
|
588
|
+
* The failure screenshot for the current run. Prefers the final-state
|
|
589
|
+
* screenshot persisted at teardown (the single source of truth shared with
|
|
590
|
+
* baselines), and falls back to a live capture when it is missing — e.g. triage
|
|
591
|
+
* invoked outside the standard teardown, or the teardown capture failed.
|
|
592
|
+
*/
|
|
593
|
+
async function fetchCurrentRunFinalStateScreenshot(page) {
|
|
594
|
+
const flowId = page._dnb?.donobuFlowMetadata?.id;
|
|
595
|
+
const persistence = page._dnb?.persistence;
|
|
596
|
+
if (flowId && persistence) {
|
|
597
|
+
try {
|
|
598
|
+
const persisted = await persistence.getFlowFile(flowId, TRIAGE_PERSISTENCE_FILE_IDS.finalStateScreenshot);
|
|
599
|
+
if (persisted) {
|
|
600
|
+
return persisted;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
catch (error) {
|
|
604
|
+
Logger_1.appLogger.debug(`Failed to read persisted final-state screenshot for flow ${flowId}; falling back to a live capture.`, error);
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
return captureLivePageScreenshot(page);
|
|
608
|
+
}
|
|
609
|
+
/**
|
|
610
|
+
* Loads a baseline screenshot from a historical successful run so the GPT
|
|
611
|
+
* triage agent can compare "what the page looked like when it last worked" vs
|
|
612
|
+
* "what it looks like now" to detect redesigns and stale-cache scenarios.
|
|
613
|
+
*
|
|
614
|
+
* Prefers the persisted final-state screenshot (a live end-of-test capture
|
|
615
|
+
* written on successful runs) so the baseline is symmetric with the live
|
|
616
|
+
* failure screenshot — both true end states. Falls back to the last tool-call
|
|
617
|
+
* image for runs that predate final-state capture; that image can be a
|
|
618
|
+
* mid-flow frame, so callers should treat such baselines as approximate.
|
|
619
|
+
* Fails open — returns null if no screenshot can be retrieved.
|
|
592
620
|
*/
|
|
593
621
|
async function fetchBaselineScreenshot(page, historicalFlowId) {
|
|
594
622
|
const persistence = page._dnb?.persistence;
|
|
@@ -596,10 +624,12 @@ async function fetchBaselineScreenshot(page, historicalFlowId) {
|
|
|
596
624
|
return null;
|
|
597
625
|
}
|
|
598
626
|
try {
|
|
599
|
-
const
|
|
600
|
-
if (
|
|
601
|
-
return
|
|
627
|
+
const finalState = await persistence.getFlowFile(historicalFlowId, TRIAGE_PERSISTENCE_FILE_IDS.finalStateScreenshot);
|
|
628
|
+
if (finalState) {
|
|
629
|
+
return finalState;
|
|
602
630
|
}
|
|
631
|
+
// Fallback for runs predating final-state capture: last tool-call image.
|
|
632
|
+
const toolCalls = await persistence.getToolCalls(historicalFlowId);
|
|
603
633
|
for (let i = toolCalls.length - 1; i >= 0; i--) {
|
|
604
634
|
const screenshotId = toolCalls[i].postCallImageId;
|
|
605
635
|
if (screenshotId) {
|
|
@@ -1601,7 +1631,7 @@ async function gatherTestFailureEvidence(testInfo, page, options = {}) {
|
|
|
1601
1631
|
// Capture screenshots for visual triage: current failure + baseline from last success
|
|
1602
1632
|
const lastSuccessfulRunId = failureContext.flowHistory?.lastSuccessfulRunId ?? null;
|
|
1603
1633
|
const [screenshotBuffer, baselineBuffer] = await Promise.all([
|
|
1604
|
-
|
|
1634
|
+
fetchCurrentRunFinalStateScreenshot(page),
|
|
1605
1635
|
lastSuccessfulRunId
|
|
1606
1636
|
? fetchBaselineScreenshot(page, lastSuccessfulRunId)
|
|
1607
1637
|
: Promise.resolve(null),
|
|
@@ -1805,10 +1835,20 @@ passed to each tool invocation. Use these to improve diagnosis:
|
|
|
1805
1835
|
|
|
1806
1836
|
SCREENSHOT EVIDENCE:
|
|
1807
1837
|
You may receive one or two screenshots:
|
|
1808
|
-
1. "FAILURE SCREENSHOT" —
|
|
1838
|
+
1. "FAILURE SCREENSHOT" — a live screenshot captured at triage time, immediately after the test
|
|
1839
|
+
failed and while the page was still open. It reflects the true FINAL visual state of the page.
|
|
1809
1840
|
2. "BASELINE SCREENSHOT" — the state of the page at the end of the most recent successful run of
|
|
1810
1841
|
this same flow. This serves as a visual reference for what the page *should* look like.
|
|
1811
1842
|
|
|
1843
|
+
IMPORTANT — a screenshot is a single moment in time, not a recording:
|
|
1844
|
+
- Describe only what the frame shows. Do NOT assert that a state persisted for a duration — e.g.
|
|
1845
|
+
"stuck on a loading spinner THROUGHOUT the test", "the page never loaded", "remained on X the
|
|
1846
|
+
whole time". A single frame cannot establish how long anything lasted.
|
|
1847
|
+
- Only claim a persistent or temporal condition when it is corroborated by NON-visual evidence:
|
|
1848
|
+
tool-call outcomes/durations, error messages, or timeouts in failureContext. Absent that, state
|
|
1849
|
+
the end condition factually (e.g. "the final screenshot shows a loading spinner") and let the
|
|
1850
|
+
other evidence determine duration and cause.
|
|
1851
|
+
|
|
1812
1852
|
When both screenshots are provided, compare them to:
|
|
1813
1853
|
- Detect UI changes (redesigns, layout shifts, new modals) that would explain selector or cache failures.
|
|
1814
1854
|
- Identify whether the failure screenshot shows a fundamentally different page state (error page, login wall)
|
|
@@ -1837,7 +1877,10 @@ When only the failure screenshot is provided (no baseline available), use it to:
|
|
|
1837
1877
|
if (evidence.failureScreenshotPath) {
|
|
1838
1878
|
try {
|
|
1839
1879
|
const failureBytes = await fs.readFile(evidence.failureScreenshotPath);
|
|
1840
|
-
userItems.push({
|
|
1880
|
+
userItems.push({
|
|
1881
|
+
type: 'text',
|
|
1882
|
+
text: 'FAILURE SCREENSHOT (live capture at triage time — true final state of the page):',
|
|
1883
|
+
}, { type: 'png', bytes: new Uint8Array(failureBytes) });
|
|
1841
1884
|
}
|
|
1842
1885
|
catch (screenshotError) {
|
|
1843
1886
|
Logger_1.appLogger.debug('Failed to load failure screenshot for GPT triage, proceeding with text only.', screenshotError);
|
|
@@ -449,6 +449,33 @@ const REASON_LABELS = {
|
|
|
449
449
|
function reasonCfg(reason) {
|
|
450
450
|
return REASON_LABELS[reason] ?? REASON_LABELS['UNKNOWN'];
|
|
451
451
|
}
|
|
452
|
+
// Triage-detail flags derived from a treatment plan. A test can carry several
|
|
453
|
+
// at once, so these form a multi-valued filter dimension (OR semantics).
|
|
454
|
+
// Declaration order is the display order in the filter menu and chips, and
|
|
455
|
+
// mirrors the flag order in `renderTriageCard`. Colors match `.triage-flag`.
|
|
456
|
+
const TRIAGE_LABELS = {
|
|
457
|
+
retryable: { label: 'Retryable', color: '#10b981' },
|
|
458
|
+
code: { label: 'Needs Code Change', color: '#f59e0b' },
|
|
459
|
+
product: { label: 'Needs Product Fix', color: '#ef4444' },
|
|
460
|
+
};
|
|
461
|
+
/** The triage-flag keys present on a test's treatment plan, in display order. */
|
|
462
|
+
function triageKeysOf(test) {
|
|
463
|
+
if (!test.plan) {
|
|
464
|
+
return [];
|
|
465
|
+
}
|
|
466
|
+
const p = test.plan.plan;
|
|
467
|
+
const keys = [];
|
|
468
|
+
if (p.shouldRetryAutomation) {
|
|
469
|
+
keys.push('retryable');
|
|
470
|
+
}
|
|
471
|
+
if (p.requiresCodeChange) {
|
|
472
|
+
keys.push('code');
|
|
473
|
+
}
|
|
474
|
+
if (p.requiresProductFix) {
|
|
475
|
+
keys.push('product');
|
|
476
|
+
}
|
|
477
|
+
return keys;
|
|
478
|
+
}
|
|
452
479
|
function renderAttachments(attachments, outputDir, stepScreenshots = []) {
|
|
453
480
|
const rendered = [];
|
|
454
481
|
for (const att of attachments) {
|
|
@@ -1680,7 +1707,7 @@ function renderHtml(report, triage, outputDir) {
|
|
|
1680
1707
|
? `<div class="flow-id-detail"><span class="detail-label">Flow ID</span><span class="flow-id-value">${esc(test.flowId)}<button class="copy-flow-id" data-flow-id="${esc(test.flowId)}" title="Copy flow ID"><svg viewBox="0 0 24 24"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg></button></span></div>`
|
|
1681
1708
|
: '';
|
|
1682
1709
|
testSectionsHtml += `
|
|
1683
|
-
<div class="test-card ${sc.label.toLowerCase().replace(/ /g, '')} ${expandableClass}" id="${testId}" data-status="${test.status}" data-file="${esc(test.file)}" data-search="${esc((displayFilePath + ' ' + test.specTitle).toLowerCase())}" data-tags="${esc(JSON.stringify(test.tags))}"${test.plan ? ` data-reason="${esc(test.plan.plan.failureReason)}"` : ''} ${hasDetails ? `data-detail="${testId}"` : ''}>
|
|
1710
|
+
<div class="test-card ${sc.label.toLowerCase().replace(/ /g, '')} ${expandableClass}" id="${testId}" data-status="${test.status}" data-file="${esc(test.file)}" data-search="${esc((displayFilePath + ' ' + test.specTitle).toLowerCase())}" data-tags="${esc(JSON.stringify(test.tags))}"${test.plan ? ` data-reason="${esc(test.plan.plan.failureReason)}"` : ''}${triageKeysOf(test).length ? ` data-triage="${esc(triageKeysOf(test).join(','))}"` : ''} ${hasDetails ? `data-detail="${testId}"` : ''}>
|
|
1684
1711
|
<div class="test-summary">
|
|
1685
1712
|
${chevron}
|
|
1686
1713
|
<span class="status-dot" style="background:${sc.color}" title="${sc.label}"></span>
|
|
@@ -1801,14 +1828,15 @@ body::before{content:'';position:fixed;top:-750px;left:50%;transform:translateX(
|
|
|
1801
1828
|
.add-tag-filter .add-tag-plus{font-size:15px;line-height:1}
|
|
1802
1829
|
.add-tag-filter:hover{background:var(--surface-raised);border-color:var(--text-dim);color:var(--text)}
|
|
1803
1830
|
.add-tag-filter.active{background:var(--accent);border-color:var(--accent);color:#fff}
|
|
1804
|
-
.tag-menu{position:absolute;top:calc(100% + 6px);
|
|
1831
|
+
.tag-menu{position:absolute;top:calc(100% + 6px);right:0;min-width:200px;max-width:320px;max-height:280px;overflow-y:auto;background:var(--surface-raised);border:1px solid var(--border);border-radius:var(--radius);box-shadow:0 8px 24px rgba(0,0,0,.4);z-index:20;padding:4px;display:none}
|
|
1805
1832
|
.tag-menu:not([hidden]){display:block}
|
|
1806
1833
|
.tag-menu-item{display:flex;align-items:center;justify-content:space-between;gap:8px;padding:6px 10px;font-size:12px;font-family:var(--mono);color:var(--text);background:transparent;border:none;border-radius:4px;cursor:pointer;text-align:left;width:100%;transition:background .15s}
|
|
1807
1834
|
.tag-menu-item:hover{background:var(--surface)}
|
|
1808
1835
|
.tag-menu-item .tag-menu-count{color:var(--text-muted);font-size:11px;font-family:var(--mono)}
|
|
1809
1836
|
.tag-menu-empty{padding:8px 10px;font-size:12px;color:var(--text-muted);font-style:italic}
|
|
1810
|
-
.tag-menu-section{padding:8px 10px
|
|
1837
|
+
.tag-menu-section{padding:8px 10px 2px;font-size:10px;font-weight:700;letter-spacing:.05em;text-transform:uppercase;color:var(--text-dim);font-family:inherit}
|
|
1811
1838
|
.tag-menu-section:not(:first-child){margin-top:4px;border-top:1px solid var(--border)}
|
|
1839
|
+
.tag-menu-hint{padding:0 10px 6px;font-size:11px;line-height:1.35;color:var(--text-muted);font-family:inherit;max-width:300px}
|
|
1812
1840
|
.active-tag-filters{display:inline-flex;align-items:center;gap:6px;flex-wrap:wrap}
|
|
1813
1841
|
.tag-chip{display:inline-flex;align-items:center;gap:6px;background:rgba(255,127,58,.12);border:1px solid rgba(255,127,58,.3);color:var(--accent);font-size:11px;font-family:var(--mono);padding:3px 4px 3px 8px;border-radius:4px}
|
|
1814
1842
|
.tag-chip-remove{background:transparent;border:none;color:inherit;cursor:pointer;font-size:14px;line-height:1;padding:0 4px;font-family:inherit;opacity:.7;transition:opacity .15s}
|
|
@@ -2163,7 +2191,7 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2163
2191
|
</label>
|
|
2164
2192
|
<div class="tag-filter-controls" data-tag-filter-controls hidden>
|
|
2165
2193
|
<div class="tag-filter-trigger-wrap">
|
|
2166
|
-
<button class="add-tag-filter" data-add-tag-filter title="Filter by tag or
|
|
2194
|
+
<button class="add-tag-filter" data-add-tag-filter title="Filter by tag, diagnosis, or triage"><span class="add-tag-plus">+</span> Filter</button>
|
|
2167
2195
|
<div class="tag-menu" data-tag-menu hidden></div>
|
|
2168
2196
|
</div>
|
|
2169
2197
|
<div class="active-tag-filters" data-active-tag-filters></div>
|
|
@@ -2191,16 +2219,22 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2191
2219
|
// tags — multi-select AND; card must carry every active tag.
|
|
2192
2220
|
// reasons — multi-select OR; card.data-reason must match any active reason
|
|
2193
2221
|
// (a card has at most one diagnosis, so AND would always be 0/1).
|
|
2194
|
-
//
|
|
2222
|
+
// triage — multi-select OR; card.data-triage (a comma list) must contain
|
|
2223
|
+
// any active flag (a card can carry several triage flags).
|
|
2224
|
+
// "Clear Filters" wipes all of them.
|
|
2195
2225
|
var activeStatus=null;
|
|
2196
2226
|
var activeTags=new Set();
|
|
2197
2227
|
var activeReasons=new Set();
|
|
2228
|
+
var activeTriage=new Set();
|
|
2198
2229
|
var activeSearch=''; // lowercase substring match against data-search
|
|
2199
2230
|
var allTags=[];
|
|
2200
2231
|
var allReasons=[]; // ordered list of REASON keys present in the report
|
|
2232
|
+
var allTriage=[]; // ordered list of TRIAGE keys present in the report
|
|
2201
2233
|
var REASON_LABELS=${JSON.stringify(REASON_LABELS)};
|
|
2234
|
+
var TRIAGE_LABELS=${JSON.stringify(TRIAGE_LABELS)};
|
|
2202
2235
|
|
|
2203
2236
|
function cardTags(card){var raw=card.getAttribute('data-tags');if(!raw)return [];try{var v=JSON.parse(raw);return Array.isArray(v)?v:[]}catch(_){return []}}
|
|
2237
|
+
function cardTriage(card){var raw=card.getAttribute('data-triage');return raw?raw.split(','):[]}
|
|
2204
2238
|
|
|
2205
2239
|
// Faceted-search counts. Each filter option's badge shows "how many tests
|
|
2206
2240
|
// would this option contribute given the rest of the filters." The semantics
|
|
@@ -2208,8 +2242,9 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2208
2242
|
// - Status pills (single-select replace): ignore current activeStatus.
|
|
2209
2243
|
// - Tag menu items (multi-select AND): use ALL current filters.
|
|
2210
2244
|
// - Reason menu items (multi-select OR): ignore current activeReasons.
|
|
2245
|
+
// - Triage menu items (multi-select OR): ignore current activeTriage.
|
|
2211
2246
|
// Search is free-form and not counted.
|
|
2212
|
-
function cardsMatching(ignoreStatus,ignoreTags,ignoreReasons){
|
|
2247
|
+
function cardsMatching(ignoreStatus,ignoreTags,ignoreReasons,ignoreTriage){
|
|
2213
2248
|
var out=[];
|
|
2214
2249
|
document.querySelectorAll('.test-card').forEach(function(card){
|
|
2215
2250
|
var statusOk=ignoreStatus||activeStatus===null||card.getAttribute('data-status')===activeStatus;
|
|
@@ -2219,19 +2254,23 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2219
2254
|
activeTags.forEach(function(w){if(t.indexOf(w)===-1)tagsOk=false});
|
|
2220
2255
|
}
|
|
2221
2256
|
var reasonOk=ignoreReasons||activeReasons.size===0||activeReasons.has(card.getAttribute('data-reason')||'');
|
|
2257
|
+
var triageOk=true;
|
|
2258
|
+
if(!ignoreTriage&&activeTriage.size>0){
|
|
2259
|
+
var ct=cardTriage(card);triageOk=ct.some(function(k){return activeTriage.has(k)});
|
|
2260
|
+
}
|
|
2222
2261
|
var searchOk=activeSearch.length===0||(card.getAttribute('data-search')||'').indexOf(activeSearch)!==-1;
|
|
2223
|
-
if(statusOk&&tagsOk&&reasonOk&&searchOk)out.push(card);
|
|
2262
|
+
if(statusOk&&tagsOk&&reasonOk&&triageOk&&searchOk)out.push(card);
|
|
2224
2263
|
});
|
|
2225
2264
|
return out;
|
|
2226
2265
|
}
|
|
2227
2266
|
function tagCount(t){
|
|
2228
|
-
var pool=cardsMatching(false,false,false);
|
|
2267
|
+
var pool=cardsMatching(false,false,false,false);
|
|
2229
2268
|
var n=0;for(var i=0;i<pool.length;i++){if(cardTags(pool[i]).indexOf(t)!==-1)n++}
|
|
2230
2269
|
return n;
|
|
2231
2270
|
}
|
|
2232
2271
|
|
|
2233
2272
|
function applyFilters(){
|
|
2234
|
-
var anyActive=activeStatus!==null||activeTags.size>0||activeReasons.size>0||activeSearch.length>0;
|
|
2273
|
+
var anyActive=activeStatus!==null||activeTags.size>0||activeReasons.size>0||activeTriage.size>0||activeSearch.length>0;
|
|
2235
2274
|
document.querySelector('.clear-filter').classList.toggle('visible',anyActive);
|
|
2236
2275
|
var visibleTests=0;
|
|
2237
2276
|
var visibleFiles=Object.create(null);
|
|
@@ -2247,12 +2286,16 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2247
2286
|
var r=card.getAttribute('data-reason')||'';
|
|
2248
2287
|
reasonOk=activeReasons.has(r);
|
|
2249
2288
|
}
|
|
2289
|
+
var triageOk=true;
|
|
2290
|
+
if(activeTriage.size>0){
|
|
2291
|
+
var ct=cardTriage(card);triageOk=ct.some(function(k){return activeTriage.has(k)});
|
|
2292
|
+
}
|
|
2250
2293
|
var searchOk=true;
|
|
2251
2294
|
if(activeSearch.length>0){
|
|
2252
2295
|
var hay=card.getAttribute('data-search')||'';
|
|
2253
2296
|
searchOk=hay.indexOf(activeSearch)!==-1;
|
|
2254
2297
|
}
|
|
2255
|
-
var hide=!(statusOk&&tagsOk&&reasonOk&&searchOk);
|
|
2298
|
+
var hide=!(statusOk&&tagsOk&&reasonOk&&triageOk&&searchOk);
|
|
2256
2299
|
card.classList.toggle('hidden-by-filter',hide);
|
|
2257
2300
|
if(!hide){
|
|
2258
2301
|
visibleTests++;
|
|
@@ -2295,6 +2338,7 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2295
2338
|
if(activeStatus)p.set('status',activeStatus);
|
|
2296
2339
|
activeTags.forEach(function(t){p.append('tag',t)});
|
|
2297
2340
|
activeReasons.forEach(function(r){p.append('reason',r)});
|
|
2341
|
+
activeTriage.forEach(function(t){p.append('triage',t)});
|
|
2298
2342
|
if(activeSearch)p.set('q',activeSearch);
|
|
2299
2343
|
var qs=p.toString();
|
|
2300
2344
|
var next=location.pathname+(qs?'?'+qs:'')+(location.hash||'');
|
|
@@ -2340,19 +2384,37 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2340
2384
|
chip.appendChild(label);chip.appendChild(btn);
|
|
2341
2385
|
c.appendChild(chip);
|
|
2342
2386
|
});
|
|
2387
|
+
activeTriage.forEach(function(t){
|
|
2388
|
+
var meta=TRIAGE_LABELS[t];if(!meta)return;
|
|
2389
|
+
var chip=document.createElement('span');chip.className='tag-chip reason-chip';
|
|
2390
|
+
chip.style.background=hexToRgba(meta.color,0.14);
|
|
2391
|
+
chip.style.borderColor=hexToRgba(meta.color,0.4);
|
|
2392
|
+
chip.style.color=meta.color;
|
|
2393
|
+
var label=document.createElement('span');label.textContent=meta.label;
|
|
2394
|
+
var btn=document.createElement('button');btn.className='tag-chip-remove';btn.setAttribute('data-remove-triage',t);btn.setAttribute('title','Remove filter');btn.textContent='×';
|
|
2395
|
+
chip.appendChild(label);chip.appendChild(btn);
|
|
2396
|
+
c.appendChild(chip);
|
|
2397
|
+
});
|
|
2343
2398
|
}
|
|
2344
2399
|
function addTag(t){if(!t||activeTags.has(t))return;activeTags.add(t);renderActiveChips();applyFilters()}
|
|
2345
2400
|
function removeTag(t){if(!activeTags.delete(t))return;renderActiveChips();applyFilters()}
|
|
2346
2401
|
function addReason(r){if(!r||activeReasons.has(r))return;activeReasons.add(r);renderActiveChips();applyFilters()}
|
|
2347
2402
|
function removeReason(r){if(!activeReasons.delete(r))return;renderActiveChips();applyFilters()}
|
|
2403
|
+
function addTriage(t){if(!t||activeTriage.has(t))return;activeTriage.add(t);renderActiveChips();applyFilters()}
|
|
2404
|
+
function removeTriage(t){if(!activeTriage.delete(t))return;renderActiveChips();applyFilters()}
|
|
2348
2405
|
|
|
2349
2406
|
function reasonCount(r){
|
|
2350
|
-
var pool=cardsMatching(false,false,true);
|
|
2407
|
+
var pool=cardsMatching(false,false,true,false);
|
|
2351
2408
|
var n=0;for(var i=0;i<pool.length;i++){if(pool[i].getAttribute('data-reason')===r)n++}
|
|
2352
2409
|
return n;
|
|
2353
2410
|
}
|
|
2411
|
+
function triageCount(t){
|
|
2412
|
+
var pool=cardsMatching(false,false,false,true);
|
|
2413
|
+
var n=0;for(var i=0;i<pool.length;i++){if(cardTriage(pool[i]).indexOf(t)!==-1)n++}
|
|
2414
|
+
return n;
|
|
2415
|
+
}
|
|
2354
2416
|
function updateStatPillCounts(){
|
|
2355
|
-
var pool=cardsMatching(true,false,false);
|
|
2417
|
+
var pool=cardsMatching(true,false,false,false);
|
|
2356
2418
|
var counts=Object.create(null);
|
|
2357
2419
|
for(var i=0;i<pool.length;i++){var s=pool[i].getAttribute('data-status');counts[s]=(counts[s]||0)+1}
|
|
2358
2420
|
document.querySelectorAll('.stat-pill[data-filter]').forEach(function(pill){
|
|
@@ -2372,9 +2434,11 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2372
2434
|
// to an empty view, so they're not useful to offer.
|
|
2373
2435
|
var tagsWithCounts=allTags.filter(function(t){return !activeTags.has(t)}).map(function(t){return {key:t,count:tagCount(t)}}).filter(function(x){return x.count>0});
|
|
2374
2436
|
var reasonsWithCounts=allReasons.filter(function(r){return !activeReasons.has(r)}).map(function(r){return {key:r,count:reasonCount(r)}}).filter(function(x){return x.count>0});
|
|
2437
|
+
var triageWithCounts=allTriage.filter(function(t){return !activeTriage.has(t)}).map(function(t){return {key:t,count:triageCount(t)}}).filter(function(x){return x.count>0});
|
|
2375
2438
|
var added=false;
|
|
2376
2439
|
if(allTags.length>0){
|
|
2377
2440
|
var hT=document.createElement('div');hT.className='tag-menu-section';hT.textContent='Tags';menu.appendChild(hT);
|
|
2441
|
+
var hintT=document.createElement('div');hintT.className='tag-menu-hint';hintT.textContent='Labels you put on tests in code (e.g. @smoke). Match all selected.';menu.appendChild(hintT);
|
|
2378
2442
|
if(tagsWithCounts.length===0){
|
|
2379
2443
|
var emptyT=document.createElement('div');emptyT.className='tag-menu-empty';emptyT.textContent=allTags.length===activeTags.size?'All tags selected':'No matching tags';menu.appendChild(emptyT);
|
|
2380
2444
|
}else{
|
|
@@ -2390,6 +2454,7 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2390
2454
|
}
|
|
2391
2455
|
if(allReasons.length>0){
|
|
2392
2456
|
var hR=document.createElement('div');hR.className='tag-menu-section';hR.textContent='Diagnoses';menu.appendChild(hR);
|
|
2457
|
+
var hintR=document.createElement('div');hintR.className='tag-menu-hint';hintR.textContent='Why a test failed — the AI\\'s single root-cause assessment.';menu.appendChild(hintR);
|
|
2393
2458
|
if(reasonsWithCounts.length===0){
|
|
2394
2459
|
var emptyR=document.createElement('div');emptyR.className='tag-menu-empty';emptyR.textContent=allReasons.length===activeReasons.size?'All diagnoses selected':'No matching diagnoses';menu.appendChild(emptyR);
|
|
2395
2460
|
}else{
|
|
@@ -2404,6 +2469,23 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2404
2469
|
}
|
|
2405
2470
|
added=true;
|
|
2406
2471
|
}
|
|
2472
|
+
if(allTriage.length>0){
|
|
2473
|
+
var hTr=document.createElement('div');hTr.className='tag-menu-section';hTr.textContent='Triage';menu.appendChild(hTr);
|
|
2474
|
+
var hintTr=document.createElement('div');hintTr.className='tag-menu-hint';hintTr.textContent='What the failure calls for — a test can need more than one.';menu.appendChild(hintTr);
|
|
2475
|
+
if(triageWithCounts.length===0){
|
|
2476
|
+
var emptyTr=document.createElement('div');emptyTr.className='tag-menu-empty';emptyTr.textContent=allTriage.length===activeTriage.size?'All triage flags selected':'No matching triage flags';menu.appendChild(emptyTr);
|
|
2477
|
+
}else{
|
|
2478
|
+
triageWithCounts.forEach(function(x){
|
|
2479
|
+
var meta=TRIAGE_LABELS[x.key];if(!meta)return;
|
|
2480
|
+
var item=document.createElement('button');item.className='tag-menu-item';item.setAttribute('data-triage-menu-item',x.key);
|
|
2481
|
+
var label=document.createElement('span');label.textContent=meta.label;label.style.color=meta.color;
|
|
2482
|
+
var count=document.createElement('span');count.className='tag-menu-count';count.textContent=x.count;
|
|
2483
|
+
item.appendChild(label);item.appendChild(count);
|
|
2484
|
+
menu.appendChild(item);
|
|
2485
|
+
});
|
|
2486
|
+
}
|
|
2487
|
+
added=true;
|
|
2488
|
+
}
|
|
2407
2489
|
if(!added){
|
|
2408
2490
|
var empty=document.createElement('div');empty.className='tag-menu-empty';empty.textContent='No filters available';menu.appendChild(empty);
|
|
2409
2491
|
}
|
|
@@ -2421,6 +2503,7 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2421
2503
|
activeStatus=null;
|
|
2422
2504
|
activeTags.clear();
|
|
2423
2505
|
activeReasons.clear();
|
|
2506
|
+
activeTriage.clear();
|
|
2424
2507
|
activeSearch='';
|
|
2425
2508
|
document.querySelectorAll('.stat-pill').forEach(function(p){p.classList.remove('active')});
|
|
2426
2509
|
var searchInput=document.querySelector('[data-filter-search]');
|
|
@@ -2470,10 +2553,14 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2470
2553
|
if(tagItem){addTag(tagItem.getAttribute('data-tag-menu-item'));closeTagMenu();return}
|
|
2471
2554
|
var reasonItem=e.target.closest('[data-reason-menu-item]');
|
|
2472
2555
|
if(reasonItem){addReason(reasonItem.getAttribute('data-reason-menu-item'));closeTagMenu();return}
|
|
2556
|
+
var triageItem=e.target.closest('[data-triage-menu-item]');
|
|
2557
|
+
if(triageItem){addTriage(triageItem.getAttribute('data-triage-menu-item'));closeTagMenu();return}
|
|
2473
2558
|
var tagRemove=e.target.closest('[data-remove-tag]');
|
|
2474
2559
|
if(tagRemove){removeTag(tagRemove.getAttribute('data-remove-tag'));return}
|
|
2475
2560
|
var reasonRemove=e.target.closest('[data-remove-reason]');
|
|
2476
2561
|
if(reasonRemove){removeReason(reasonRemove.getAttribute('data-remove-reason'));return}
|
|
2562
|
+
var triageRemove=e.target.closest('[data-remove-triage]');
|
|
2563
|
+
if(triageRemove){removeTriage(triageRemove.getAttribute('data-remove-triage'));return}
|
|
2477
2564
|
// Stat pill filter
|
|
2478
2565
|
var pill=e.target.closest('.stat-pill[data-filter]');
|
|
2479
2566
|
if(pill){toggleStatus(pill.getAttribute('data-filter'));return}
|
|
@@ -2523,18 +2610,22 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2523
2610
|
(function(){
|
|
2524
2611
|
var seenTags=Object.create(null);
|
|
2525
2612
|
var seenReasons=Object.create(null);
|
|
2613
|
+
var seenTriage=Object.create(null);
|
|
2526
2614
|
document.querySelectorAll('.test-card').forEach(function(card){
|
|
2527
2615
|
var raw=card.getAttribute('data-tags');
|
|
2528
2616
|
if(raw){try{var tags=JSON.parse(raw);if(Array.isArray(tags)){tags.forEach(function(t){if(typeof t==='string'&&t)seenTags[t]=true})}}catch(_){}}
|
|
2529
2617
|
var r=card.getAttribute('data-reason');
|
|
2530
2618
|
if(r)seenReasons[r]=true;
|
|
2619
|
+
cardTriage(card).forEach(function(t){if(t)seenTriage[t]=true});
|
|
2531
2620
|
});
|
|
2532
2621
|
allTags=Object.keys(seenTags).sort();
|
|
2533
2622
|
// Preserve the REASON_LABELS declaration order rather than alphabetical —
|
|
2534
2623
|
// they're already arranged from most-frequent/specific to UNKNOWN catch-all.
|
|
2535
2624
|
allReasons=Object.keys(REASON_LABELS).filter(function(r){return seenReasons[r]});
|
|
2625
|
+
// Preserve TRIAGE_LABELS declaration order (retryable → code → product).
|
|
2626
|
+
allTriage=Object.keys(TRIAGE_LABELS).filter(function(t){return seenTriage[t]});
|
|
2536
2627
|
var controls=document.querySelector('[data-tag-filter-controls]');
|
|
2537
|
-
if(controls&&(allTags.length>0||allReasons.length>0))controls.hidden=false;
|
|
2628
|
+
if(controls&&(allTags.length>0||allReasons.length>0||allTriage.length>0))controls.hidden=false;
|
|
2538
2629
|
})();
|
|
2539
2630
|
|
|
2540
2631
|
// Seed filter state from ?status=...&tag=...&reason=... so shared URLs
|
|
@@ -2554,6 +2645,8 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2554
2645
|
p.getAll('tag').forEach(function(t){if(tagSet[t])activeTags.add(t)});
|
|
2555
2646
|
var reasonSet={};allReasons.forEach(function(r){reasonSet[r]=true});
|
|
2556
2647
|
p.getAll('reason').forEach(function(r){if(reasonSet[r])activeReasons.add(r)});
|
|
2648
|
+
var triageSet={};allTriage.forEach(function(t){triageSet[t]=true});
|
|
2649
|
+
p.getAll('triage').forEach(function(t){if(triageSet[t])activeTriage.add(t)});
|
|
2557
2650
|
var q=p.get('q');
|
|
2558
2651
|
var searchInput=document.querySelector('[data-filter-search]');
|
|
2559
2652
|
if(q){
|
|
@@ -2566,8 +2659,8 @@ details.ai-invocation[open]>summary .native-step-chevron{transform:rotate(90deg)
|
|
|
2566
2659
|
applyFilters();
|
|
2567
2660
|
});
|
|
2568
2661
|
}
|
|
2569
|
-
if(activeTags.size>0||activeReasons.size>0)renderActiveChips();
|
|
2570
|
-
if(activeStatus!==null||activeTags.size>0||activeReasons.size>0||activeSearch.length>0)applyFilters();
|
|
2662
|
+
if(activeTags.size>0||activeReasons.size>0||activeTriage.size>0)renderActiveChips();
|
|
2663
|
+
if(activeStatus!==null||activeTags.size>0||activeReasons.size>0||activeTriage.size>0||activeSearch.length>0)applyFilters();
|
|
2571
2664
|
})();
|
|
2572
2665
|
|
|
2573
2666
|
// Open #?testId=<id> deep links to the matching test card. Used by the
|
|
@@ -843,6 +843,40 @@ async function attachStepScreenshots(sharedState, testInfo) {
|
|
|
843
843
|
contentType: 'application/json',
|
|
844
844
|
});
|
|
845
845
|
}
|
|
846
|
+
/**
|
|
847
|
+
* Capture a live screenshot of the flow's final visual state at teardown (page
|
|
848
|
+
* still open) and persist it as a per-flow file — the single source of truth
|
|
849
|
+
* for "what the page looked like when this run ended." It is read both as the
|
|
850
|
+
* current run's failure screenshot (when this run failed) and as the baseline
|
|
851
|
+
* for a later failing run (when this run succeeded), keeping the two symmetric.
|
|
852
|
+
* See `fetchBaselineScreenshot` / `gatherTestFailureEvidence` in
|
|
853
|
+
* triageTestFailure.ts.
|
|
854
|
+
*
|
|
855
|
+
* Runs for any meaningful end state; skipped only for `skipped` tests (no real
|
|
856
|
+
* page state), when triage is disabled, or for V1 (legacy self-heal) tests.
|
|
857
|
+
* Best-effort and fails open.
|
|
858
|
+
*/
|
|
859
|
+
async function captureAndPersistFinalState(page, testInfo) {
|
|
860
|
+
if (testInfo.status === 'skipped' ||
|
|
861
|
+
process.env.DONOBU_TRIAGE_DISABLED === '1' ||
|
|
862
|
+
isV1Test(testInfo)) {
|
|
863
|
+
return;
|
|
864
|
+
}
|
|
865
|
+
const flowId = page._dnb?.donobuFlowMetadata?.id;
|
|
866
|
+
const persistence = page._dnb?.persistence;
|
|
867
|
+
if (!flowId || !persistence) {
|
|
868
|
+
return;
|
|
869
|
+
}
|
|
870
|
+
try {
|
|
871
|
+
const screenshot = await (0, triageTestFailure_1.captureLivePageScreenshot)(page);
|
|
872
|
+
if (screenshot) {
|
|
873
|
+
await persistence.setFlowFile(flowId, triageTestFailure_1.TRIAGE_PERSISTENCE_FILE_IDS.finalStateScreenshot, screenshot);
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
catch (error) {
|
|
877
|
+
Logger_1.appLogger.error(`Failed to persist final-state screenshot for flow ${flowId}.`, error);
|
|
878
|
+
}
|
|
879
|
+
}
|
|
846
880
|
async function finalizeTest(page, testInfo, logBuffer, videoOption) {
|
|
847
881
|
const sharedState = page._dnb;
|
|
848
882
|
// Kick off video persistence early in teardown. The actual file copy is
|
|
@@ -919,6 +953,10 @@ async function finalizeTest(page, testInfo, logBuffer, videoOption) {
|
|
|
919
953
|
catch (error) {
|
|
920
954
|
Logger_1.appLogger.error(`Error during cleanup for test ${testInfo.title}:`, error);
|
|
921
955
|
}
|
|
956
|
+
// Capture the flow's final visual state before the status-specific handling
|
|
957
|
+
// below: triage (failed branch) reads it as the failure screenshot, and a
|
|
958
|
+
// future failing run reads a successful run's copy as its baseline.
|
|
959
|
+
await captureAndPersistFinalState(page, testInfo);
|
|
922
960
|
if (testInfo.status === 'failed') {
|
|
923
961
|
if (isV1Test(testInfo)) {
|
|
924
962
|
if (isV1SelfHealingEnabled(testInfo) &&
|
|
@@ -28,9 +28,10 @@ import type { DonobuExtendedPage } from '../../page/DonobuExtendedPage';
|
|
|
28
28
|
* history from the persistence layer.
|
|
29
29
|
* 3. Fetches **historical runs** of the same flow (by name) from the flows manager to
|
|
30
30
|
* detect flakiness, regression patterns, and prior self-heal success.
|
|
31
|
-
* 4. Captures the **failure screenshot** (
|
|
32
|
-
*
|
|
33
|
-
*
|
|
31
|
+
* 4. Captures the **failure screenshot** (a live screenshot taken at triage time, while
|
|
32
|
+
* the page is still open during teardown, so it reflects the true final state) and the
|
|
33
|
+
* **baseline screenshot** (last tool call screenshot from the most recent successful
|
|
34
|
+
* historical run) for visual comparison.
|
|
34
35
|
* 5. Reads the source of the failing test case for contextual grounding.
|
|
35
36
|
* 6. Runs the **heuristic classifier** (`deriveHeuristicAssessment`) which uses
|
|
36
37
|
* rule-based pattern matching over errors, tool calls, stale-cache indicators,
|
|
@@ -70,7 +71,7 @@ import type { DonobuExtendedPage } from '../../page/DonobuExtendedPage';
|
|
|
70
71
|
* | Flow metadata | `DonobuExtendedPage._dnb` | Run mode, objective, allowed tools, timing |
|
|
71
72
|
* | Stale cache indicators | Derived from above | Whether page.ai cache staleness is the root cause |
|
|
72
73
|
* | Historical flow runs | `DonobuFlowsManager.getFlows` | Flakiness, regression patterns, prior self-heal |
|
|
73
|
-
* | Failure screenshot |
|
|
74
|
+
* | Failure screenshot | Live capture at triage time | True final visual state of the page when it failed |
|
|
74
75
|
* | Baseline screenshot | Last successful run's screenshot | Visual reference for what the page *should* look like |
|
|
75
76
|
* | Test source snippet | TypeScript AST parsing | The test's expectations and structure |
|
|
76
77
|
*
|
|
@@ -408,6 +409,14 @@ declare const TRIAGE_PERSISTENCE_FILE_IDS: {
|
|
|
408
409
|
readonly evidence: "triage-evidence.json";
|
|
409
410
|
readonly failureScreenshot: "triage-failure-screenshot.png";
|
|
410
411
|
readonly baselineScreenshot: "triage-baseline-screenshot.png";
|
|
412
|
+
/**
|
|
413
|
+
* Live screenshot of a flow's final visual state, captured at teardown while
|
|
414
|
+
* the page is still open. Persisted on successful runs so that a *later*
|
|
415
|
+
* failing run can use it as a true final-state baseline — symmetric with the
|
|
416
|
+
* failure screenshot, which is also a live end-of-test capture. Keyed per
|
|
417
|
+
* flow, like browser state.
|
|
418
|
+
*/
|
|
419
|
+
readonly finalStateScreenshot: "triage-final-state-screenshot.png";
|
|
411
420
|
};
|
|
412
421
|
/**
|
|
413
422
|
* Compresses a set of historical flow runs into an aggregate summary compact
|
|
@@ -420,6 +429,19 @@ declare function summarizeFlowHistory(flowName: string, flows: FlowMetadata[]):
|
|
|
420
429
|
* success, and whether the page.ai cache was recently validated.
|
|
421
430
|
*/
|
|
422
431
|
declare function deriveHistoricalSignals(history: FlowHistorySummary): HistoricalSignals;
|
|
432
|
+
/**
|
|
433
|
+
* Captures a fresh screenshot of the page's current visual state. Called at
|
|
434
|
+
* teardown (failure triage and successful-run baseline capture) while the
|
|
435
|
+
* page/context is still open, so it reflects the true *end state* of the test.
|
|
436
|
+
*
|
|
437
|
+
* This is deliberately preferred over the last Donobu tool-call screenshot:
|
|
438
|
+
* Playwright `expect`/`waitFor` are not tool calls, so the last tool-call image
|
|
439
|
+
* can predate the failing assertion and capture a transient state (e.g. a
|
|
440
|
+
* loading spinner that has since resolved), which misleads the vision model.
|
|
441
|
+
* Fails open — returns null if the page is gone or unresponsive (crash, closed
|
|
442
|
+
* context, hang), in which case the caller proceeds without a screenshot.
|
|
443
|
+
*/
|
|
444
|
+
declare function captureLivePageScreenshot(page: DonobuExtendedPage): Promise<Buffer | null>;
|
|
423
445
|
/**
|
|
424
446
|
* Builds the heuristic triage assessment by combining rule-based inference,
|
|
425
447
|
* contextual flags, and derived remediation guidance ahead of GPT enrichment.
|
|
@@ -432,5 +454,5 @@ declare function deriveHeuristicAssessment(testInfo: TestInfo, errorSummaries: E
|
|
|
432
454
|
declare function reconcileTreatmentPlan(plan: z.infer<typeof TreatmentPlan>, heuristics: HeuristicAssessment): z.infer<typeof TreatmentPlan>;
|
|
433
455
|
declare function gatherTestFailureEvidence(testInfo: TestInfo, page: DonobuExtendedPage, options?: GatherTestFailureEvidenceOptions): Promise<GatherTestFailureEvidenceResult | null>;
|
|
434
456
|
declare function generateTreatmentPlanFromEvidence(gptClient: GptClient, evidence: FailureEvidenceRecord): Promise<z.infer<typeof TreatmentPlan>>;
|
|
435
|
-
export { type AdditionalDataRequest, AdditionalDataRequestSchema, type AutomationDirectives, deriveHeuristicAssessment, deriveHistoricalSignals, type ErrorSummary, type FailureEvidenceRecord, type FailureReason, FailureReasonSchema, type FlowHistorySummary, gatherTestFailureEvidence, type GatherTestFailureEvidenceOptions, type GatherTestFailureEvidenceResult, generateTreatmentPlanFromEvidence, type HeuristicAssessment, type HistoricalFlowRun, type HistoricalSignals, reconcileTreatmentPlan, type RemediationCategory, type RemediationStep, RemediationStepSchema, type SanitizedFlowMetadata, type SummarizedToolCall, summarizeFlowHistory, TreatmentPlan, TRIAGE_PERSISTENCE_FILE_IDS, };
|
|
457
|
+
export { type AdditionalDataRequest, AdditionalDataRequestSchema, type AutomationDirectives, captureLivePageScreenshot, deriveHeuristicAssessment, deriveHistoricalSignals, type ErrorSummary, type FailureEvidenceRecord, type FailureReason, FailureReasonSchema, type FlowHistorySummary, gatherTestFailureEvidence, type GatherTestFailureEvidenceOptions, type GatherTestFailureEvidenceResult, generateTreatmentPlanFromEvidence, type HeuristicAssessment, type HistoricalFlowRun, type HistoricalSignals, reconcileTreatmentPlan, type RemediationCategory, type RemediationStep, RemediationStepSchema, type SanitizedFlowMetadata, type SummarizedToolCall, summarizeFlowHistory, TreatmentPlan, TRIAGE_PERSISTENCE_FILE_IDS, };
|
|
436
458
|
//# sourceMappingURL=triageTestFailure.d.ts.map
|