npm - donobu - Versions diffs - 2.46.6 → 2.47.0 - Mend

donobu 2.46.6 → 2.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/dist/assets/generated/version +1 -1
package/dist/cli/donobu-cli.d.ts +3 -0
package/dist/cli/donobu-cli.d.ts.map +1 -0
package/dist/cli/donobu-cli.js +1493 -0
package/dist/cli/donobu-cli.js.map +1 -0
package/dist/cli/playwright-json-to-markdown.js +43 -22
package/dist/cli/playwright-json-to-markdown.js.map +1 -1
package/dist/envVars.d.ts +23 -0
package/dist/envVars.d.ts.map +1 -1
package/dist/envVars.js +13 -0
package/dist/envVars.js.map +1 -1
package/dist/esm/assets/generated/version +1 -1
package/dist/esm/cli/donobu-cli.d.ts +3 -0
package/dist/esm/cli/donobu-cli.d.ts.map +1 -0
package/dist/esm/cli/donobu-cli.js +1493 -0
package/dist/esm/cli/donobu-cli.js.map +1 -0
package/dist/esm/cli/playwright-json-to-markdown.js +43 -22
package/dist/esm/cli/playwright-json-to-markdown.js.map +1 -1
package/dist/esm/envVars.d.ts +23 -0
package/dist/esm/envVars.d.ts.map +1 -1
package/dist/esm/envVars.js +13 -0
package/dist/esm/envVars.js.map +1 -1
package/dist/esm/lib/DonobuExtendedPage.d.ts +3 -0
package/dist/esm/lib/DonobuExtendedPage.d.ts.map +1 -1
package/dist/esm/lib/PageAi.d.ts.map +1 -1
package/dist/esm/lib/PageAi.js +7 -1
package/dist/esm/lib/PageAi.js.map +1 -1
package/dist/esm/lib/testExtension.d.ts.map +1 -1
package/dist/esm/lib/testExtension.js +53 -9
package/dist/esm/lib/testExtension.js.map +1 -1
package/dist/esm/lib/utils/triageTestFailure.d.ts +231 -0
package/dist/esm/lib/utils/triageTestFailure.d.ts.map +1 -0
package/dist/esm/lib/utils/triageTestFailure.js +1267 -0
package/dist/esm/lib/utils/triageTestFailure.js.map +1 -0
package/dist/lib/DonobuExtendedPage.d.ts +3 -0
package/dist/lib/DonobuExtendedPage.d.ts.map +1 -1
package/dist/lib/PageAi.d.ts.map +1 -1
package/dist/lib/PageAi.js +7 -1
package/dist/lib/PageAi.js.map +1 -1
package/dist/lib/testExtension.d.ts.map +1 -1
package/dist/lib/testExtension.js +53 -9
package/dist/lib/testExtension.js.map +1 -1
package/dist/lib/utils/triageTestFailure.d.ts +231 -0
package/dist/lib/utils/triageTestFailure.d.ts.map +1 -0
package/dist/lib/utils/triageTestFailure.js +1267 -0
package/dist/lib/utils/triageTestFailure.js.map +1 -0
package/package.json +2 -1

package/dist/esm/lib/utils/triageTestFailure.js ADDED Viewed

@@ -0,0 +1,1267 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.TreatmentPlan = exports.AdditionalDataRequestSchema = exports.RemediationStepSchema = exports.FailureReasonSchema = void 0;
+exports.buildTreatmentPlanFromHeuristics = buildTreatmentPlanFromHeuristics;
+exports.gatherTestFailureEvidence = gatherTestFailureEvidence;
+exports.generateTreatmentPlanFromEvidence = generateTreatmentPlanFromEvidence;
+const fs = __importStar(require("fs"));
+const path = __importStar(require("path"));
+const crypto_1 = require("crypto");
+const Logger_1 = require("../../utils/Logger");
+const AnalyzePageTextTool_1 = require("../../tools/AnalyzePageTextTool");
+const MarkObjectiveCompleteTool_1 = require("../../tools/MarkObjectiveCompleteTool");
+const MarkObjectiveNotCompletableTool_1 = require("../../tools/MarkObjectiveNotCompletableTool");
+const SummarizeLearningsTool_1 = require("../../tools/SummarizeLearningsTool");
+const v4_1 = require("zod/v4");
+const typescript_1 = __importDefault(require("typescript"));
+/**
+ * Utilities for transforming a Playwright test failure into a structured treatment plan
+ * that Donobu Studio can surface to engineers or automation. The flow pivots between:
+ *  1. Gathering heuristics from the failing run (errors, attachments, Donobu AI history).
+ *  2. Asking the GPT-based triage agent for a canonical plan compliant with the schema.
+ *  3. Falling back to heuristics when the LLM cannot respond, so automation still receives
+ *     next steps. The helper functions below exist to keep these stages composable.
+ */
+exports.FailureReasonSchema = v4_1.z
+    .enum([
+    'UNKNOWN',
+    'AUTOMATION_SCRIPT_ISSUE',
+    'SELECTOR_REGRESSION',
+    'TIMING_OR_SYNCHRONISATION',
+    'ASSERTION_DRIFT',
+    'APPLICATION_DEFECT',
+    'AUTHENTICATION_FAILURE',
+    'ENVIRONMENT_CONFIGURATION',
+    'TEST_DATA_UNAVAILABLE',
+    'NETWORK_OR_DEPENDENCY',
+])
+    .describe(`UNKNOWN: Triggered when no concrete root cause can be inferred.
+AUTOMATION_SCRIPT_ISSUE: The scripted steps are incorrect or incomplete.
+SELECTOR_REGRESSION: UI element locators have changed and need updates.
+TIMING_OR_SYNCHRONISATION: Wait conditions or timing assumptions failed.
+ASSERTION_DRIFT: The expected outcomes in assertions no longer match reality.
+APPLICATION_DEFECT: The product behaviour is broken and must be fixed upstream.
+AUTHENTICATION_FAILURE: Login, MFA, or session preconditions were not met.
+ENVIRONMENT_CONFIGURATION: Test infra or env vars are misconfigured.
+TEST_DATA_UNAVAILABLE: Seed data or fixtures are missing or expired.
+NETWORK_OR_DEPENDENCY: External services or network connectivity failed.`);
+const RemediationCategorySchema = v4_1.z
+    .enum([
+    'RETRY_AUTOMATION',
+    'UPDATE_TEST_LOGIC',
+    'UPDATE_SELECTORS',
+    'ADJUST_TIMING',
+    'REFINE_ASSERTIONS',
+    'FIX_APPLICATION',
+    'VALIDATE_AUTHENTICATION',
+    'CHECK_ENVIRONMENT',
+    'REFRESH_TEST_DATA',
+    'STABILIZE_DEPENDENCIES',
+    'ESCALATE_MANUAL_REVIEW',
+    'UNKNOWN',
+])
+    .describe('Categorises the type of remediation that should be attempted so that downstream systems can pick appropriate playbooks.');
+exports.RemediationStepSchema = v4_1.z.object({
+    category: RemediationCategorySchema,
+    summary: v4_1.z
+        .string()
+        .describe('Short actionable label describing what must happen next.'),
+    details: v4_1.z
+        .string()
+        .describe('Specific guidance for performing the remediation step.'),
+});
+exports.AdditionalDataRequestSchema = v4_1.z.object({
+    description: v4_1.z
+        .string()
+        .describe('Information that would materially help confirm the root cause.'),
+    suggestedSources: v4_1.z
+        .array(v4_1.z.string())
+        .describe('Where to look for the requested data.')
+        .default([]),
+});
+const AutomationDirectivesSchema = v4_1.z
+    .object({
+    clearPageAiCache: v4_1.z
+        .boolean()
+        .describe('When true, clear cached Page.AI selectors before attempting an automated retry.')
+        .optional(),
+    targetTestFile: v4_1.z
+        .string()
+        .describe('Relative path to the Playwright spec that should be re-run when applying this plan.')
+        .optional(),
+    targetProject: v4_1.z
+        .string()
+        .describe('Playwright project name that should be used when re-running automation for this failure.')
+        .optional(),
+    additionalPlaywrightArgs: v4_1.z
+        .array(v4_1.z.string())
+        .describe('Extra Playwright CLI arguments the orchestrator should append when applying this plan.')
+        .optional(),
+})
+    .partial();
+exports.TreatmentPlan = v4_1.z.object({
+    failureSummary: v4_1.z
+        .string()
+        .describe('A summary of the nature of the test failure'),
+    failureReason: exports.FailureReasonSchema,
+    confidence: v4_1.z
+        .number()
+        .min(0)
+        .max(1)
+        .describe('Confidence score between 0 and 1 estimating how likely the failureReason classification is correct.'),
+    observedIndicators: v4_1.z
+        .array(v4_1.z.string())
+        .describe('Signals and observations that led to the triage conclusion.')
+        .default([]),
+    remediationSteps: v4_1.z
+        .array(exports.RemediationStepSchema)
+        .describe('Ordered remediation steps to attempt.')
+        .default([]),
+    additionalDataRequests: v4_1.z
+        .array(exports.AdditionalDataRequestSchema)
+        .describe('Extra context that would help if remediation stalls.')
+        .default([]),
+    shouldRetryAutomation: v4_1.z
+        .boolean()
+        .describe('True if the automation framework should attempt another run.'),
+    requiresCodeChange: v4_1.z
+        .boolean()
+        .describe('True when the Playwright test code likely needs updates.'),
+    requiresProductFix: v4_1.z
+        .boolean()
+        .describe('True when an application-level defect is suspected.'),
+    notes: v4_1.z
+        .string()
+        .describe('Optional free-form notes that do not fit the structured fields.')
+        .optional(),
+    automationDirectives: AutomationDirectivesSchema.optional(),
+});
+const MAX_SERIALIZED_STRING_LENGTH = 10000;
+const MAX_TOOL_CALLS_TO_INCLUDE = 12;
+const TRIAGE_EVIDENCE_SCHEMA_VERSION = 1;
+/**
+ * Ensures text blobs captured from tool calls or error messages fit within storage
+ * limits without losing useful context by padding or truncation artifacts.
+ */
+function truncateString(value, maxLength) {
+    if (value.length <= maxLength) {
+        return value;
+    }
+    if (maxLength <= 3) {
+        return value.slice(0, maxLength);
+    }
+    return `${value.slice(0, maxLength - 3)}...`;
+}
+/**
+ * Serialises arbitrary values while constraining nested strings and the overall
+ * payload so that GPT prompts and persistence never exceed downstream quotas.
+ */
+function safeStringify(value, maxLength, stringMaxLength = 500) {
+    try {
+        const serialized = JSON.stringify(value, (_, innerValue) => {
+            if (typeof innerValue === 'string') {
+                return truncateString(innerValue, stringMaxLength);
+            }
+            return innerValue;
+        }, 2);
+        return truncateString(serialized, maxLength);
+    }
+    catch (error) {
+        const err = error;
+        return `Failed to stringify: ${err.name}: ${err.message}`;
+    }
+}
+/**
+ * Strips sensitive or oversized fields from the persisted flow metadata so the
+ * triage agent receives only the contextual attributes it can safely reason over.
+ */
+function sanitizeFlowMetadata(metadata) {
+    if (!metadata) {
+        return null;
+    }
+    return {
+        id: metadata.id,
+        name: metadata.name,
+        runMode: metadata.runMode,
+        state: metadata.state,
+        targetWebsite: metadata.targetWebsite,
+        overallObjective: metadata.overallObjective,
+        allowedTools: metadata.allowedTools,
+        envVars: metadata.envVars,
+        startedAt: metadata.startedAt,
+        completedAt: metadata.completedAt,
+        maxToolCalls: metadata.maxToolCalls,
+        gptConfigName: metadata.gptConfigName,
+        defaultMessageDuration: metadata.defaultMessageDuration,
+        resultSummary: metadata.result ? JSON.stringify(metadata.result) : null,
+    };
+}
+/**
+ * Condenses the Donobu tool invocation history so the most recent calls and their
+ * outcomes can influence the triage decision without overwhelming the prompt.
+ */
+function summarizeToolCalls(toolCalls) {
+    return toolCalls.slice(-MAX_TOOL_CALLS_TO_INCLUDE).map((toolCall) => ({
+        id: toolCall.id,
+        toolName: toolCall.toolName,
+        success: toolCall.outcome.isSuccessful,
+        outcomeSummary: toolCall.outcome.forLlm,
+        durationMs: toolCall.completedAt - toolCall.startedAt,
+        page: toolCall.page,
+        startedAtIso: new Date(toolCall.startedAt).toISOString(),
+        completedAtIso: new Date(toolCall.completedAt).toISOString(),
+    }));
+}
+/**
+ * Loads the failing Playwright test file and extracts the statement block that
+ * defines the target test case so the triage agent can corroborate expectations.
+ */
+async function extractTestCaseSnippet(testFilePath, testName) {
+    if (!testFilePath) {
+        return null;
+    }
+    try {
+        const sourceCode = await fs.promises.readFile(testFilePath, 'utf8');
+        const sourceFile = typescript_1.default.createSourceFile(testFilePath, sourceCode, typescript_1.default.ScriptTarget.Latest, true);
+        let snippet = null;
+        const visit = (node) => {
+            if (snippet) {
+                return;
+            }
+            if (typescript_1.default.isExpressionStatement(node) &&
+                typescript_1.default.isCallExpression(node.expression)) {
+                const expression = node.expression.expression;
+                if ((typescript_1.default.isIdentifier(expression) &&
+                    (expression.text === 'test' || expression.text === 'it')) ||
+                    (typescript_1.default.isPropertyAccessExpression(expression) &&
+                        typescript_1.default.isIdentifier(expression.expression) &&
+                        expression.expression.text === 'test')) {
+                    const args = node.expression.arguments;
+                    if (args.length > 0 && typescript_1.default.isStringLiteral(args[0])) {
+                        const title = args[0].text;
+                        if (title === testName ||
+                            testName.includes(title) ||
+                            title.includes(testName)) {
+                            snippet = sourceCode.substring(node.pos, node.end).trim();
+                            return;
+                        }
+                    }
+                }
+            }
+            typescript_1.default.forEachChild(node, visit);
+        };
+        visit(sourceFile);
+        if (!snippet) {
+            return null;
+        }
+        return truncateString(snippet, MAX_SERIALIZED_STRING_LENGTH);
+    }
+    catch (error) {
+        Logger_1.appLogger.warn(`Failed to extract test case snippet from ${testFilePath}`, error);
+        return null;
+    }
+}
+/**
+ * Normalises the heterogeneous error structures Playwright can emit into
+ * concise summaries that the LLM can rank and cross-reference with history.
+ */
+function buildErrorSummaries(testInfo) {
+    const rawErrors = (testInfo.errors ?? []).length > 0
+        ? testInfo.errors
+        : testInfo.error
+            ? [testInfo.error]
+            : [];
+    return rawErrors.map((err) => {
+        const summary = {};
+        if (typeof err?.message === 'string') {
+            summary.message = truncateString(err.message, 2000);
+        }
+        if (typeof err?.stack === 'string') {
+            summary.stack = truncateString(err.stack, 2000);
+        }
+        if (err?.name) {
+            summary.name = String(err.name);
+        }
+        if (err?.value !== undefined) {
+            summary.value = safeStringify(err.value, 2000);
+        }
+        if (err?.actual !== undefined) {
+            summary.actual = safeStringify(err.actual, 1000);
+        }
+        if (err?.expected !== undefined) {
+            summary.expected = safeStringify(err.expected, 1000);
+        }
+        if (err?.location) {
+            summary.location = safeStringify(err.location, 500);
+        }
+        if (typeof err?.snippet === 'string') {
+            summary.snippet = truncateString(err.snippet, 1000);
+        }
+        return summary;
+    });
+}
+/**
+ * Translates an inferred failure reason into a sequenced set of remediation
+ * actions so downstream automation and humans receive concrete next steps.
+ */
+function remediationStepsForReason(reason, context = {}) {
+    switch (reason) {
+        case 'AUTOMATION_SCRIPT_ISSUE':
+            return [
+                {
+                    category: 'UPDATE_TEST_LOGIC',
+                    summary: 'Inspect the failing automation logic.',
+                    details: `Review the Playwright test and any Donobu tool invocations around the failure.
+Align the scripted steps with the intended business flow.`,
+                },
+                {
+                    category: 'RETRY_AUTOMATION',
+                    summary: 'Retry after updating the automation.',
+                    details: 'Re-run the test or Donobu flow once the script adjustments are in place to validate the fix.',
+                },
+            ];
+        case 'SELECTOR_REGRESSION':
+            if (context.occurredDuringPageAi) {
+                return [
+                    {
+                        category: 'RETRY_AUTOMATION',
+                        summary: 'Delete the test cache and retry the test.',
+                        details: `Delete the cached donobu.json entry for this test so page.ai recalculates selectors against the live DOM,
+then rerun the automation to verify recovery.`,
+                    },
+                    {
+                        category: 'UPDATE_SELECTORS',
+                        summary: 'Update selectors if the autonomous retry still fails.',
+                        details: 'If cache invalidation and autonomous retry still fail, fall back to manually adjusting the selector strategy.',
+                    },
+                ];
+            }
+            else {
+                return [
+                    {
+                        category: 'UPDATE_SELECTORS',
+                        summary: 'Refresh selectors for the affected elements.',
+                        details: 'Use page.find failovers or Playwright locators to update the targeting strategy for the broken element.',
+                    },
+                    {
+                        category: 'RETRY_AUTOMATION',
+                        summary: 'Validate selectors by re-running the test.',
+                        details: 'Execute the test or self-healing run to confirm the new selectors resolve the regression.',
+                    },
+                ];
+            }
+        case 'TIMING_OR_SYNCHRONISATION':
+            return [
+                {
+                    category: 'ADJUST_TIMING',
+                    summary: 'Stabilise async waits and retry logic.',
+                    details: 'Add explicit waits, polling, or guard conditions so the automation aligns with the application response times.',
+                },
+                {
+                    category: 'RETRY_AUTOMATION',
+                    summary: 'Run the test after timing adjustments.',
+                    details: 'Execute an automation retry to ensure the timing changes eliminate the flake.',
+                },
+            ];
+        case 'ASSERTION_DRIFT':
+            return [
+                {
+                    category: 'REFINE_ASSERTIONS',
+                    summary: 'Revisit expected outcomes and test assertions.',
+                    details: 'Cross-check the assertion expectations against the latest product behaviour and update the checks accordingly.',
+                },
+                {
+                    category: 'RETRY_AUTOMATION',
+                    summary: 'Confirm updated assertions.',
+                    details: 'Execute the test again once assertions have been updated to verify alignment with the application.',
+                },
+            ];
+        case 'APPLICATION_DEFECT':
+            return [
+                {
+                    category: 'FIX_APPLICATION',
+                    summary: 'Log and prioritise the suspected product defect.',
+                    details: 'Capture reproduction steps using the failing automation and escalate to the owning development team.',
+                },
+                {
+                    category: 'ESCALATE_MANUAL_REVIEW',
+                    summary: 'Coordinate QA verification of the fix.',
+                    details: 'Have QA validate the defect manually and confirm once the product change is deployed.',
+                },
+                {
+                    category: 'RETRY_AUTOMATION',
+                    summary: 'Re-run automation after the product fix.',
+                    details: 'Execute the test to confirm the application change resolves the failure.',
+                },
+            ];
+        case 'AUTHENTICATION_FAILURE':
+            return [
+                {
+                    category: 'VALIDATE_AUTHENTICATION',
+                    summary: 'Verify credentials and auth flows.',
+                    details: 'Check login secrets, MFA configuration, and session state preconditions for the test environment.',
+                },
+                {
+                    category: 'RETRY_AUTOMATION',
+                    summary: 'Run after auth prerequisites are restored.',
+                    details: 'Execute the test once authentication is confirmed to be working.',
+                },
+            ];
+        case 'ENVIRONMENT_CONFIGURATION':
+            return [
+                {
+                    category: 'CHECK_ENVIRONMENT',
+                    summary: 'Inspect environment and configuration.',
+                    details: 'Validate environment variables, feature flags, and infrastructure dependencies referenced by the test.',
+                },
+                {
+                    category: 'RETRY_AUTOMATION',
+                    summary: 'Re-run once environment is stable.',
+                    details: 'Execute automation after configuration corrections to confirm stability.',
+                },
+            ];
+        case 'TEST_DATA_UNAVAILABLE':
+            return [
+                {
+                    category: 'REFRESH_TEST_DATA',
+                    summary: 'Restore or seed required test data.',
+                    details: 'Populate fixtures, reset accounts, or refresh records relied upon by the test.',
+                },
+                {
+                    category: 'RETRY_AUTOMATION',
+                    summary: 'Run after data restoration.',
+                    details: 'Execute automation with the refreshed data to ensure the flow passes.',
+                },
+            ];
+        case 'NETWORK_OR_DEPENDENCY':
+            return [
+                {
+                    category: 'STABILIZE_DEPENDENCIES',
+                    summary: 'Check external services or network health.',
+                    details: 'Verify the availability and latency of downstream services, APIs, or network connections.',
+                },
+                {
+                    category: 'RETRY_AUTOMATION',
+                    summary: 'Retry once dependencies recover.',
+                    details: 'Re-run the test when network conditions or dependency status return to normal.',
+                },
+            ];
+        case 'UNKNOWN':
+        default:
+            return [
+                {
+                    category: 'ESCALATE_MANUAL_REVIEW',
+                    summary: 'Perform deeper manual triage.',
+                    details: 'Inspect Playwright traces, Donobu tool history, and application logs to narrow down the root cause.',
+                },
+                {
+                    category: 'RETRY_AUTOMATION',
+                    summary: 'Retry once additional context is gathered.',
+                    details: 'After manual analysis, attempt another automation run to see if the issue reproduces consistently.',
+                },
+            ];
+    }
+}
+/**
+ * Specifies follow-up context the triage agent should ask for when evidence is
+ * thin, keeping human responders focused on the data that unblocks a fix fastest.
+ */
+function additionalDataRequestsForReason(reason, _context = {}) {
+    switch (reason) {
+        case 'SELECTOR_REGRESSION':
+            return [
+                {
+                    description: 'Collect DOM snapshots or screenshots around the failing selector.',
+                    suggestedSources: [
+                        'Playwright trace viewer',
+                        'Donobu tool call screenshots',
+                    ],
+                },
+            ];
+        case 'TIMING_OR_SYNCHRONISATION':
+            return [
+                {
+                    description: 'Gather network and performance timings for the affected actions.',
+                    suggestedSources: [
+                        'Browser devtools performance logs',
+                        'Backend request metrics',
+                    ],
+                },
+            ];
+        case 'APPLICATION_DEFECT':
+            return [
+                {
+                    description: 'Capture backend logs or Sentry events around the failure window.',
+                    suggestedSources: ['Application logging platform', 'APM traces'],
+                },
+            ];
+        case 'AUTHENTICATION_FAILURE':
+            return [
+                {
+                    description: 'Validate authentication tokens and secrets used by the test.',
+                    suggestedSources: ['Secret manager', 'Identity provider logs'],
+                },
+            ];
+        case 'ENVIRONMENT_CONFIGURATION':
+            return [
+                {
+                    description: 'Review environment variable values and feature flag states.',
+                    suggestedSources: ['Deployment configuration', 'Infra dashboards'],
+                },
+            ];
+        case 'TEST_DATA_UNAVAILABLE':
+            return [
+                {
+                    description: 'Check the lifecycle of the test accounts or fixtures.',
+                    suggestedSources: [
+                        'Test data management system',
+                        'Database snapshots',
+                    ],
+                },
+            ];
+        case 'NETWORK_OR_DEPENDENCY':
+            return [
+                {
+                    description: 'Inspect dependency uptime and recent incidents.',
+                    suggestedSources: ['Status pages', 'Network monitoring dashboards'],
+                },
+            ];
+        default:
+            return [
+                {
+                    description: 'Review Playwright trace, Donobu flow metadata, and browser console logs.',
+                    suggestedSources: [
+                        'Playwright trace viewer',
+                        'Donobu persistence layer',
+                    ],
+                },
+            ];
+    }
+}
+/**
+ * Applies lightweight heuristics across Playwright errors and Donobu tool logs
+ * to produce a first-pass failure classification and supporting evidence trail.
+ */
+function inferFailureReason(errorSummaries, toolCalls) {
+    const combinedText = [
+        ...errorSummaries.map((err) => err.message ?? ''),
+        ...errorSummaries.map((err) => err.stack ?? ''),
+        ...toolCalls.map((tc) => tc.outcomeSummary),
+    ]
+        .filter(Boolean)
+        .join('\n')
+        .toLowerCase();
+    const evidence = [];
+    const matches = (pattern) => pattern.test(combinedText);
+    if (matches(/(selector|locator|element|node).*(not found|failed|undefined)/i)) {
+        evidence.push('Automation reported a missing selector or locator.');
+        return {
+            reason: 'SELECTOR_REGRESSION',
+            evidence,
+            confidence: 0.65,
+        };
+    }
+    if (matches(/timed out|timeout|wait.*exceeded|waiting for/i) ||
+        matches(/promise.*did not resolve/i)) {
+        evidence.push('Timeout or waiting condition was detected in the failure.');
+        return {
+            reason: 'TIMING_OR_SYNCHRONISATION',
+            evidence,
+            confidence: 0.6,
+        };
+    }
+    if (matches(/expect(ed)?|AssertionError|toEqual|toBe|received|expected/i) &&
+        !matches(/network|timeout/)) {
+        evidence.push('Assertion mismatch detected in error details.');
+        return {
+            reason: 'ASSERTION_DRIFT',
+            evidence,
+            confidence: 0.55,
+        };
+    }
+    if (matches(/401|403|unauthori[sz]ed|forbidden|login|credential|token/i)) {
+        evidence.push('Authentication-related error message detected.');
+        return {
+            reason: 'AUTHENTICATION_FAILURE',
+            evidence,
+            confidence: 0.6,
+        };
+    }
+    if (matches(/env(var|iron)|environment variable|configuration|config/i) ||
+        matches(/missing .*config|misconfig/i)) {
+        evidence.push('Environment configuration issue referenced in failure text.');
+        return {
+            reason: 'ENVIRONMENT_CONFIGURATION',
+            evidence,
+            confidence: 0.55,
+        };
+    }
+    if (matches(/test data|fixture|seed data|record not found|no data/i) ||
+        matches(/entity.*not found/)) {
+        evidence.push('Missing or stale test data referenced.');
+        return {
+            reason: 'TEST_DATA_UNAVAILABLE',
+            evidence,
+            confidence: 0.55,
+        };
+    }
+    if (matches(/ECONN|ENOTFOUND|EAI_AGAIN|network|socket hang up|connection/i) ||
+        matches(/502|503|504|gateway|dns/i)) {
+        evidence.push('Network or dependency outage detected.');
+        return {
+            reason: 'NETWORK_OR_DEPENDENCY',
+            evidence,
+            confidence: 0.6,
+        };
+    }
+    if (matches(/500|internal server error|TypeError|ReferenceError|Unhandled/i)) {
+        evidence.push('Application-side error or exception detected.');
+        return {
+            reason: 'APPLICATION_DEFECT',
+            evidence,
+            confidence: 0.6,
+        };
+    }
+    if (combinedText.trim().length > 0) {
+        evidence.push('Falling back to automation script issue from generic error content.');
+        return {
+            reason: 'AUTOMATION_SCRIPT_ISSUE',
+            evidence,
+            confidence: 0.4,
+        };
+    }
+    evidence.push('No diagnostic text available, marking as unknown.');
+    return {
+        reason: 'UNKNOWN',
+        evidence,
+        confidence: 0.2,
+    };
+}
+const PAGE_AI_STACK_MARKERS = [
+    'page.ai',
+    'pageairunner',
+    'pageaiexception',
+    'donobuflow',
+    'donobuextendedpage.ai',
+];
+const PAGE_AI_TOOL_MARKERS = new Set([
+    AnalyzePageTextTool_1.AnalyzePageTextTool.NAME,
+    SummarizeLearningsTool_1.SummarizeLearningsTool.NAME,
+    MarkObjectiveCompleteTool_1.MarkObjectiveCompleteTool.NAME,
+    MarkObjectiveNotCompletableTool_1.MarkObjectiveNotCompletableTool.NAME,
+]);
+/**
+ * Detects whether the failure manifested during Donobu's autonomous page.ai
+ * routines, signalling that cached selectors or AI-driven steps may need resets.
+ */
+function didFailureOccurDuringPageAi(errorSummaries, toolCalls) {
+    const stackIndicator = errorSummaries.some((err) => {
+        const blob = `${err.message ?? ''}\n${err.stack ?? ''}`.toLowerCase();
+        return PAGE_AI_STACK_MARKERS.some((marker) => blob.includes(marker));
+    });
+    if (stackIndicator) {
+        return true;
+    }
+    return toolCalls.some((tc) => PAGE_AI_TOOL_MARKERS.has(tc.toolName));
+}
+/**
+ * Analyzes multiple signals to determine if the failure is likely caused by stale
+ * page.ai instruction cache versus a legitimate test failure. This nuanced detection
+ * helps differentiate between:
+ *   - Stale cache: cached actions succeeded but were semantically wrong (clicked wrong elements)
+ *   - Legitimate failure: cache was correct, but assertions reveal real issues
+ *
+ * The hardest case: page.ai uses stale cache, actions succeed (selectors still exist),
+ * but the page was redesigned so we're interacting with wrong elements. This manifests
+ * as successful page.ai execution followed by assertion failures about unexpected state.
+ */
+function analyzeStaleCacheIndicators(testInfo, errorSummaries, toolCalls, flowMetadata) {
+    // Check if the flow ran in DETERMINISTIC mode (meaning cache was used)
+    const usedDeterministicMode = flowMetadata?.runMode === 'DETERMINISTIC';
+    // Check if this is a retry attempt (cache would have been invalidated)
+    const isRetryAttempt = testInfo.retry > 0;
+    // Check if selector issues occurred during page.ai tool execution
+    const selectorFailedDuringPageAi = didFailureOccurDuringPageAi(errorSummaries, toolCalls) &&
+        toolCalls.some((tc) => {
+            const isPageAiTool = PAGE_AI_TOOL_MARKERS.has(tc.toolName);
+            const hasSelectorIssue = !tc.success &&
+                /(selector|locator|element|node).*(not found|failed|undefined)/i.test(tc.outcomeSummary);
+            return isPageAiTool && hasSelectorIssue;
+        });
+    // Check if tool calls show selector issues (more broadly)
+    const toolCallsShowSelectorIssues = toolCalls.some((tc) => !tc.success &&
+        /(selector|locator|element|node).*(not found|failed|undefined|visible|attached)/i.test(tc.outcomeSummary));
+    // Check for quick failure pattern (DETERMINISTIC mode failures are typically fast)
+    // When cache is stale, the first cached action often fails quickly
+    const quickFailurePattern = usedDeterministicMode &&
+        testInfo.duration < 5000 && // Failed in less than 5 seconds
+        toolCalls.length > 0 &&
+        toolCalls.length < 5; // Few tool calls before failure
+    // Check if page.ai completed successfully but subsequent assertions failed
+    const pageAiToolCalls = toolCalls.filter((tc) => PAGE_AI_TOOL_MARKERS.has(tc.toolName));
+    const hasPageAiCalls = pageAiToolCalls.length > 0;
+    const allPageAiCallsSucceeded = hasPageAiCalls && pageAiToolCalls.every((tc) => tc.success);
+    const hasPostPageAiFailure = errorSummaries.length > 0 &&
+        errorSummaries.some((err) => {
+            const blob = `${err.message ?? ''}\n${err.stack ?? ''}`.toLowerCase();
+            return (/expect(ed)?|assertion/i.test(blob) &&
+                !PAGE_AI_STACK_MARKERS.some((marker) => blob.includes(marker)));
+        });
+    const assertionsFailedAfterSuccessfulPageAi = allPageAiCallsSucceeded && hasPostPageAiFailure;
+    // Check if failure occurred after page.ai completed (not during)
+    const failedAfterPageAiCompleted = hasPageAiCalls &&
+        !didFailureOccurDuringPageAi(errorSummaries, toolCalls) &&
+        allPageAiCallsSucceeded;
+    return {
+        usedDeterministicMode,
+        selectorFailedDuringPageAi,
+        failedAfterPageAiCompleted,
+        isRetryAttempt,
+        quickFailurePattern,
+        toolCallsShowSelectorIssues,
+        assertionsFailedAfterSuccessfulPageAi,
+    };
+}
+/**
+ * Maps each failure reason to downstream orchestration attributes and adapts
+ * them when the failure happened during page.ai execution.
+ */
+function reasonAttributesFor(reason, context) {
+    const { occurredDuringPageAi } = context;
+    const base = {
+        UNKNOWN: {
+            shouldRetry: true,
+            requiresCodeChange: false,
+            requiresProductFix: false,
+        },
+        AUTOMATION_SCRIPT_ISSUE: {
+            shouldRetry: false,
+            requiresCodeChange: true,
+            requiresProductFix: false,
+        },
+        SELECTOR_REGRESSION: {
+            shouldRetry: false,
+            requiresCodeChange: true,
+            requiresProductFix: false,
+        },
+        TIMING_OR_SYNCHRONISATION: {
+            shouldRetry: false,
+            requiresCodeChange: true,
+            requiresProductFix: false,
+        },
+        ASSERTION_DRIFT: {
+            shouldRetry: false,
+            requiresCodeChange: true,
+            requiresProductFix: false,
+        },
+        APPLICATION_DEFECT: {
+            shouldRetry: false,
+            requiresCodeChange: false,
+            requiresProductFix: true,
+        },
+        AUTHENTICATION_FAILURE: {
+            shouldRetry: false,
+            requiresCodeChange: false,
+            requiresProductFix: false,
+        },
+        ENVIRONMENT_CONFIGURATION: {
+            shouldRetry: false,
+            requiresCodeChange: false,
+            requiresProductFix: false,
+        },
+        TEST_DATA_UNAVAILABLE: {
+            shouldRetry: false,
+            requiresCodeChange: false,
+            requiresProductFix: false,
+        },
+        NETWORK_OR_DEPENDENCY: {
+            shouldRetry: true,
+            requiresCodeChange: false,
+            requiresProductFix: false,
+        },
+    };
+    const attributes = { ...base[reason] };
+    if (reason === 'SELECTOR_REGRESSION' && occurredDuringPageAi) {
+        attributes.shouldRetry = true;
+        attributes.requiresCodeChange = false;
+    }
+    return attributes;
+}
+/**
+ * Builds the heuristic triage assessment by combining rule-based inference,
+ * contextual flags, and derived remediation guidance ahead of GPT enrichment.
+ */
+function deriveHeuristicAssessment(testInfo, errorSummaries, toolCalls, flowMetadata) {
+    const inference = inferFailureReason(errorSummaries, toolCalls);
+    const occurredDuringPageAi = didFailureOccurDuringPageAi(errorSummaries, toolCalls);
+    const staleCacheIndicators = analyzeStaleCacheIndicators(testInfo, errorSummaries, toolCalls, flowMetadata);
+    const primaryErrorMessage = errorSummaries.find((err) => err.message)?.message ??
+        `Test "${testInfo.title}" failed without an explicit error message.`;
+    // Compute a stale cache likelihood score based on multiple indicators
+    const staleCacheScore = computeStaleCacheScore(staleCacheIndicators);
+    // Adjust the initial inference based on stale cache analysis
+    let finalReason = inference.reason;
+    let finalConfidence = inference.confidence;
+    const evidence = [...inference.evidence];
+    const notes = [];
+    // If we have strong evidence of stale cache, upgrade to SELECTOR_REGRESSION
+    // with higher confidence, or adjust existing SELECTOR_REGRESSION assessment
+    if (staleCacheScore.isLikelyStaleCache &&
+        inference.reason === 'SELECTOR_REGRESSION') {
+        // Boost confidence when multiple stale cache indicators align
+        finalConfidence = Math.min(0.85, inference.confidence + 0.2);
+        evidence.push(`Strong stale cache indicators: ${staleCacheScore.supportingEvidence.join(', ')}`);
+        notes.push(`High confidence stale cache scenario detected (score: ${staleCacheScore.score.toFixed(2)}). Cache invalidation + autonomous retry strongly recommended.`);
+    }
+    else if (staleCacheScore.isLikelyStaleCache &&
+        staleCacheIndicators.toolCallsShowSelectorIssues) {
+        // Even if not initially classified as SELECTOR_REGRESSION, upgrade if stale cache is likely
+        finalReason = 'SELECTOR_REGRESSION';
+        finalConfidence = 0.75;
+        evidence.push('Reclassified as SELECTOR_REGRESSION based on stale cache analysis.');
+        evidence.push(...staleCacheScore.supportingEvidence);
+        notes.push(`Stale cache detected (score: ${staleCacheScore.score.toFixed(2)}). The cached page.ai instructions are likely outdated.`);
+    }
+    else if (staleCacheScore.isLikelyLegitimateFailure) {
+        // We have strong evidence this is NOT a stale cache issue
+        evidence.push(`Stale cache unlikely: ${staleCacheScore.contradictingEvidence.join(', ')}`);
+        if (inference.reason === 'SELECTOR_REGRESSION' && occurredDuringPageAi) {
+            // Downgrade confidence if we think cache is NOT the issue
+            finalConfidence = Math.max(0.4, inference.confidence - 0.15);
+            notes.push('Selector regression detected, but stale cache is unlikely. This may be a legitimate test failure requiring manual review.');
+        }
+    }
+    if (testInfo.status === 'timedOut') {
+        notes.push('Playwright marked the test as timed out.');
+    }
+    // Legacy note for backward compatibility
+    if (inference.reason === 'SELECTOR_REGRESSION' &&
+        occurredDuringPageAi &&
+        !staleCacheScore.isLikelyLegitimateFailure) {
+        notes.push('Selector failure occurred while executing page.ai steps. Deleting the cached tool calls enables a fresh autonomous attempt.');
+        evidence.push('Stack trace or tool history indicates selector failure inside a page.ai call.');
+    }
+    const attributes = reasonAttributesFor(finalReason, {
+        occurredDuringPageAi,
+    });
+    return {
+        failureReason: finalReason,
+        evidence,
+        confidence: finalConfidence,
+        failureSummary: primaryErrorMessage,
+        shouldRetryAutomation: attributes.shouldRetry,
+        requiresCodeChange: attributes.requiresCodeChange,
+        requiresProductFix: attributes.requiresProductFix,
+        remediationSteps: remediationStepsForReason(finalReason, {
+            flowMetadata,
+            occurredDuringPageAi,
+        }),
+        additionalDataRequests: additionalDataRequestsForReason(finalReason, {
+            flowMetadata,
+            occurredDuringPageAi,
+        }),
+        notes: notes.length > 0 ? notes.join(' ') : undefined,
+        occurredDuringPageAi,
+        staleCacheIndicators,
+    };
+}
+/**
+ * Computes a composite score indicating the likelihood that a failure is due to
+ * stale cache versus a legitimate test issue. Returns both the score and supporting
+ * evidence for transparency.
+ *
+ * KEY INSIGHT: The hardest case to detect is when stale cache causes semantic errors:
+ * - Cached selectors still exist (no selector errors thrown)
+ * - Actions execute "successfully" (clicks, inputs work)
+ * - BUT we're interacting with WRONG elements due to page redesign
+ * - This manifests as: page.ai succeeds + assertions fail with unexpected state
+ *
+ * This is ambiguous! Could be stale cache OR legitimate test failure. We use additional
+ * context like retry status and DETERMINISTIC mode to increase confidence.
+ */
+function computeStaleCacheScore(indicators) {
+    let score = 0;
+    const supportingEvidence = [];
+    const contradictingEvidence = [];
+    // STRONG positive indicator: Used DETERMINISTIC mode
+    // This is a prerequisite - if no cache was used, can't be stale cache issue
+    if (indicators.usedDeterministicMode) {
+        score += 0.4;
+        supportingEvidence.push('Used DETERMINISTIC mode (cache was active and could be stale)');
+    }
+    else {
+        // Strong negative - if no cache used, cannot be stale cache issue
+        score -= 0.5;
+        contradictingEvidence.push('Did not use DETERMINISTIC mode (no cache was active)');
+    }
+    // Obvious stale cache: selector fails during execution
+    if (indicators.selectorFailedDuringPageAi) {
+        score += 0.3;
+        supportingEvidence.push('Selector failed during page.ai execution (cached selector no longer exists)');
+    }
+    if (indicators.quickFailurePattern) {
+        score += 0.1;
+        supportingEvidence.push('Quick failure pattern (typical of deterministic replay issues)');
+    }
+    if (indicators.toolCallsShowSelectorIssues && !indicators.isRetryAttempt) {
+        score += 0.15;
+        supportingEvidence.push('Tool calls show selector issues on first attempt');
+    }
+    // THE CRITICAL CASE: page.ai succeeded but assertions failed
+    // This is AMBIGUOUS - could be stale cache OR legitimate failure
+    // We treat it as MILD evidence of stale cache when combined with DETERMINISTIC mode
+    if (indicators.assertionsFailedAfterSuccessfulPageAi &&
+        indicators.usedDeterministicMode &&
+        !indicators.isRetryAttempt) {
+        score += 0.25;
+        supportingEvidence.push('CRITICAL: Page.ai succeeded but assertions failed (cached actions may have interacted with wrong elements due to page redesign)');
+        supportingEvidence.push('This is ambiguous - could be stale cache OR legitimate test failure. Retry with fresh cache is low-cost way to rule out staleness.');
+    }
+    // STRONG negative indicator: Already retried (cache was invalidated)
+    // If we already ran with fresh cache and still failed, NOT a stale cache issue
+    if (indicators.isRetryAttempt) {
+        score -= 0.8;
+        contradictingEvidence.push('STRONG: Already on retry attempt (cache was invalidated, this is NOT a stale cache issue)');
+    }
+    // Normalize score to 0-1 range
+    const normalizedScore = Math.max(0, Math.min(1, score));
+    // Adjust thresholds based on the ambiguous nature of some scenarios
+    // We want to be conservative - if uncertain, recommend retry (low cost, high value)
+    return {
+        score: normalizedScore,
+        isLikelyStaleCache: normalizedScore >= 0.5, // Lower threshold for retry recommendation
+        isLikelyLegitimateFailure: normalizedScore <= 0.2, // High threshold for ruling out cache
+        supportingEvidence,
+        contradictingEvidence,
+    };
+}
+/**
+ * Provides a canonical retry instruction that flushes page.ai caches before
+ * automation proceeds, ensuring we do not stack manual work on stale selectors.
+ */
+function buildPageAiSelectorRetryStep() {
+    return {
+        category: 'RETRY_AUTOMATION',
+        summary: 'Delete the test cache and retry the test.',
+        details: `Remove the cached donobu.json entry so page.ai regenerates selectors against the live DOM,
+then rerun the automation to confirm recovery before escalating.`,
+    };
+}
+function deriveAutomationDirectives(base, heuristics, resolvedFailureReason) {
+    const directives = {
+        ...(base ?? {}),
+    };
+    const staleSelectorRegression = heuristics.occurredDuringPageAi &&
+        (resolvedFailureReason === 'SELECTOR_REGRESSION' ||
+            heuristics.failureReason === 'SELECTOR_REGRESSION');
+    if (staleSelectorRegression) {
+        directives.clearPageAiCache = true;
+    }
+    return Object.keys(directives).length > 0 ? directives : undefined;
+}
+/**
+ * Aligns the GPT-authored treatment plan with heuristic safeguards, especially
+ * for page.ai regressions where we prefer automated retries over manual toil.
+ */
+function reconcileTreatmentPlan(plan, heuristics) {
+    const adjusted = {
+        ...plan,
+        remediationSteps: [...plan.remediationSteps],
+        additionalDataRequests: [...plan.additionalDataRequests],
+        observedIndicators: [...plan.observedIndicators],
+    };
+    if (heuristics.occurredDuringPageAi &&
+        plan.failureReason === 'SELECTOR_REGRESSION') {
+        adjusted.shouldRetryAutomation = true;
+        adjusted.requiresCodeChange = false;
+        const retryStep = buildPageAiSelectorRetryStep();
+        const hasRetryStep = adjusted.remediationSteps.some((step) => step.summary.toLowerCase() === retryStep.summary.toLowerCase() ||
+            step.details.toLowerCase().includes('donobu') ||
+            step.details.toLowerCase().includes('page.ai'));
+        if (!hasRetryStep) {
+            adjusted.remediationSteps = [retryStep, ...adjusted.remediationSteps];
+        }
+        else {
+            adjusted.remediationSteps = adjusted.remediationSteps.map((step) => step.summary.toLowerCase() === retryStep.summary.toLowerCase()
+                ? { ...retryStep }
+                : step);
+        }
+    }
+    adjusted.automationDirectives = deriveAutomationDirectives(plan.automationDirectives, heuristics, adjusted.failureReason);
+    return adjusted;
+}
+/**
+ * Ensures callers always receive a complete treatment plan even when the GPT
+ * orchestration fails, leaning entirely on the heuristic assessment data.
+ */
+function buildFallbackTreatmentPlan(heuristics) {
+    return {
+        failureSummary: heuristics.failureSummary,
+        failureReason: heuristics.failureReason,
+        confidence: heuristics.confidence,
+        observedIndicators: heuristics.evidence,
+        remediationSteps: heuristics.remediationSteps,
+        additionalDataRequests: heuristics.additionalDataRequests,
+        shouldRetryAutomation: heuristics.shouldRetryAutomation,
+        requiresCodeChange: heuristics.requiresCodeChange,
+        requiresProductFix: heuristics.requiresProductFix,
+        notes: heuristics.notes,
+        automationDirectives: deriveAutomationDirectives(undefined, heuristics, heuristics.failureReason),
+    };
+}
+function sanitizeFilenameSegment(value, maxLength = 48) {
+    const sanitized = value
+        .toLowerCase()
+        .replace(/[^a-z0-9]+/gi, '-')
+        .replace(/-+/g, '-')
+        .replace(/^-|-$/g, '')
+        .slice(0, maxLength);
+    return sanitized.length > 0 ? sanitized : 'test';
+}
+function resolveRunDirectory(testInfo, options) {
+    if (options?.runDirectory) {
+        return options.runDirectory;
+    }
+    const envRunDir = process.env.DONOBU_TRIAGE_RUN_DIR;
+    if (envRunDir && envRunDir.trim().length > 0) {
+        return envRunDir;
+    }
+    const envBaseDir = process.env.DONOBU_TRIAGE_OUTPUT_BASE_DIR;
+    if (envBaseDir && envBaseDir.trim().length > 0) {
+        const runId = process.env.DONOBU_TRIAGE_RUN_ID ?? 'adhoc-run';
+        return path.join(envBaseDir, runId);
+    }
+    const configOutputDir = typeof testInfo.config?.outputDir === 'string'
+        ? testInfo.config.outputDir
+        : undefined;
+    const fallbackBase = configOutputDir
+        ? path.resolve(configOutputDir)
+        : path.resolve(process.cwd(), 'test-results');
+    return path.join(fallbackBase, 'donobu-triage');
+}
+async function collectFailureContext(testInfo, page) {
+    const attachments = testInfo.attachments?.map((attachment) => ({
+        name: attachment.name,
+        contentType: attachment.contentType,
+        path: 'path' in attachment ? (attachment.path ?? null) : null,
+    })) ?? [];
+    const errorSummaries = buildErrorSummaries(testInfo);
+    let flowMetadata = null;
+    let recentToolCalls = [];
+    const flowId = page._dnb?.donobuFlowMetadata?.id;
+    const persistence = page._dnb?.persistence;
+    if (flowId && persistence) {
+        try {
+            flowMetadata = await persistence.getFlowMetadataById(flowId);
+        }
+        catch (error) {
+            Logger_1.appLogger.warn(`Failed to load persisted flow metadata for ${flowId}, using in-memory snapshot.`, error);
+            flowMetadata = page._dnb?.donobuFlowMetadata ?? null;
+        }
+        try {
+            const toolCalls = await persistence.getToolCalls(flowId);
+            recentToolCalls = summarizeToolCalls(toolCalls);
+        }
+        catch (error) {
+            Logger_1.appLogger.warn(`Failed to fetch tool call history for flow ${flowId}.`, error);
+            recentToolCalls = [];
+        }
+    }
+    else {
+        flowMetadata = page._dnb?.donobuFlowMetadata ?? null;
+    }
+    const sanitizedMetadata = sanitizeFlowMetadata(flowMetadata);
+    const heuristics = deriveHeuristicAssessment(testInfo, errorSummaries, recentToolCalls, sanitizedMetadata);
+    const testSnippet = await extractTestCaseSnippet(testInfo.file, testInfo.title);
+    return {
+        testCase: {
+            title: testInfo.title,
+            file: testInfo.file,
+            projectName: testInfo.project.name,
+            status: testInfo.status,
+            expectedStatus: testInfo.expectedStatus,
+            retry: testInfo.retry,
+            repeatEachIndex: testInfo.repeatEachIndex,
+            workerIndex: testInfo.workerIndex,
+            timeout: testInfo.timeout,
+            duration: testInfo.duration,
+            annotations: testInfo.annotations,
+        },
+        failure: {
+            errors: errorSummaries,
+            attachments: attachments,
+        },
+        donobuFlow: {
+            metadata: sanitizedMetadata,
+            recentToolCalls: recentToolCalls,
+        },
+        testSnippet,
+        heuristics,
+    };
+}
+function createEvidenceFilePath(testInfo, runDirectory, evidenceId) {
+    const projectSegment = sanitizeFilenameSegment(testInfo.project.name);
+    const titleSegment = sanitizeFilenameSegment(testInfo.title);
+    const workerSegment = `w${testInfo.workerIndex}-r${testInfo.retry}-e${testInfo.repeatEachIndex}`;
+    const filename = `failure-evidence-${projectSegment}-${titleSegment}-${workerSegment}-${evidenceId}.json`;
+    return path.join(runDirectory, filename);
+}
+function buildTreatmentPlanFromHeuristics(evidence) {
+    return buildFallbackTreatmentPlan(evidence.failureContext.heuristics);
+}
+async function gatherTestFailureEvidence(testInfo, page, options = {}) {
+    if (!options.force && process.env.DONOBU_TRIAGE_DISABLED === '1') {
+        Logger_1.appLogger.debug('Skipping Donobu triage evidence gathering because DONOBU_TRIAGE_DISABLED=1.');
+        return null;
+    }
+    const failureContext = await collectFailureContext(testInfo, page);
+    const runDirectory = resolveRunDirectory(testInfo, options);
+    const evidenceId = (0, crypto_1.randomUUID)();
+    const evidence = {
+        schemaVersion: TRIAGE_EVIDENCE_SCHEMA_VERSION,
+        evidenceId,
+        runId: process.env.DONOBU_TRIAGE_RUN_ID ?? null,
+        runDirectory,
+        collectedAtIso: new Date().toISOString(),
+        failureContext,
+    };
+    const persistToDisk = options.persistToDisk ?? true;
+    let filePath = null;
+    if (persistToDisk) {
+        try {
+            await fs.promises.mkdir(runDirectory, { recursive: true });
+            filePath = createEvidenceFilePath(testInfo, runDirectory, evidenceId);
+            await fs.promises.writeFile(filePath, JSON.stringify(evidence, null, 2), 'utf8');
+            try {
+                await testInfo.attach('donobu-triage-evidence', {
+                    path: filePath,
+                    contentType: 'application/json',
+                });
+            }
+            catch (attachError) {
+                Logger_1.appLogger.debug('Failed to attach Donobu triage evidence to Playwright report.', attachError);
+            }
+            Logger_1.appLogger.debug(`Persisted Donobu triage evidence for "${testInfo.title}" to ${filePath}.`);
+        }
+        catch (error) {
+            Logger_1.appLogger.error(`Failed to persist Donobu triage evidence for "${testInfo.title}" to ${runDirectory}.`, error);
+            filePath = null;
+        }
+    }
+    return {
+        evidence,
+        filePath,
+    };
+}
+async function generateTreatmentPlanFromEvidence(gptClient, evidence) {
+    const { failureContext } = evidence;
+    const heuristics = failureContext.heuristics;
+    const systemPrompt = `You are a specialist for triaging website test failures.
+Analyse Playwright test failures that may mix deterministic code and autonomous web test agents.
+Use the provided heuristics as hints, but override them when the evidence indicates a better diagnosis.
+Produce a rigorous treatment plan following the schema.
+Failure reason definitions:
+${exports.FailureReasonSchema.description}
+Instructions:
+- Return a JSON object that satisfies the TreatmentPlan schema.
+- Select failureReason from the enumerated options ONLY.
+- Populate remediationSteps in priority order, keeping descriptions concise but actionable.
+- If evidence is thin, lower confidence rather than guessing.
+- Set shouldRetryAutomation to true only when a retry is likely to succeed without code or environment changes.
+- requiresCodeChange should capture updates needed to Playwright or Donobu automation.
+- requiresProductFix should be true only when the application itself is broken.
+- Use additionalDataRequests to call out missing data that would materially help.
+CRITICAL - Stale Cache Detection:
+The heuristics include staleCacheIndicators that help distinguish between:
+  (A) Stale cache scenario: page.ai used cached instructions that are semantically wrong → need cache deletion + retry
+  (B) Legitimate failure: page.ai cache was correct, but assertions reveal real issues → need code/expectation fixes
+THE HARDEST CASE TO DETECT:
+When page.ai uses stale cache, actions may SUCCEED (selectors exist, clicks work) BUT interact with WRONG elements
+due to page redesign. This manifests as: page.ai completes successfully + assertions fail with unexpected state.
+This is AMBIGUOUS - could be stale cache OR legitimate test failure!
+Key indicators for stale cache (scenario A):
+  - heuristics.staleCacheIndicators.usedDeterministicMode === true (cache was active)
+  - heuristics.staleCacheIndicators.assertionsFailedAfterSuccessfulPageAi === true + usedDeterministicMode === true + isRetryAttempt === false
+    (CRITICAL: page.ai succeeded but assertions failed - may have clicked wrong elements)
+  - heuristics.staleCacheIndicators.selectorFailedDuringPageAi === true (cached selector no longer exists)
+  - heuristics.staleCacheIndicators.quickFailurePattern === true (fast failure typical of stale cache)
+Key indicators AGAINST stale cache (scenario B - legitimate failure):
+  - heuristics.staleCacheIndicators.isRetryAttempt === true (STRONGEST: already retried with fresh cache, NOT stale cache)
+  - heuristics.staleCacheIndicators.usedDeterministicMode === false (no cache was used at all)
+RECOMMENDATION STRATEGY:
+When assertionsFailedAfterSuccessfulPageAi + usedDeterministicMode + !isRetryAttempt:
+  → Recommend cache deletion + retry as FIRST step (low cost, rules out staleness)
+  → Even though ambiguous, retry is safe and informative
+When isRetryAttempt === true:
+  → Do NOT recommend cache deletion (already done)
+  → Focus on code review and legitimate failure investigation`;
+    const instructions = JSON.stringify(failureContext);
+    try {
+        const response = await gptClient.getStructuredOutput([
+            {
+                type: 'system',
+                text: systemPrompt,
+            },
+            {
+                type: 'user',
+                items: [
+                    {
+                        type: 'text',
+                        text: instructions,
+                    },
+                ],
+            },
+        ], exports.TreatmentPlan);
+        const plan = exports.TreatmentPlan.parse(response.output);
+        return reconcileTreatmentPlan(plan, heuristics);
+    }
+    catch (error) {
+        Logger_1.appLogger.warn(`GPT-driven triage failed for evidence ${evidence.evidenceId}, returning heuristic fallback treatment plan.`, error);
+        return buildFallbackTreatmentPlan(heuristics);
+    }
+}
+//# sourceMappingURL=triageTestFailure.js.map