npm - @skyramp/mcp - Versions diffs - 0.2.1-rc.1 → 0.2.2 - Mend

@skyramp/mcp 0.2.1-rc.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/build/tools/test-management/actionsTool.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { z } from "zod";
 import { logger } from "../../utils/logger.js";
 import { StateManager, } from "../../utils/AnalysisStateManager.js";
 import { TestSource, DriftAction, RecommendationPriority, EstimatedWork } from "../../types/TestAnalysis.js";
+import { TestType } from "../../types/TestTypes.js";
 import * as fs from "fs";
 import * as path from "path";
 import { AnalyticsService } from "../../services/AnalyticsService.js";
@@ -43,6 +44,22 @@ export function computeRenamedTestFile(testFile, renames) {
     }
     return newFilePath;
 }
+/**
+ * Select test types to generate based on HTTP method.
+ */
+function selectTestTypesForEndpoint(method) {
+    switch (method.toUpperCase()) {
+        case "POST":
+        case "PUT":
+        case "PATCH":
+            return [TestType.INTEGRATION, TestType.CONTRACT];
+        case "DELETE":
+            return [TestType.INTEGRATION, TestType.SMOKE];
+        case "GET":
+        default:
+            return [TestType.CONTRACT, TestType.SMOKE];
+    }
+}
 const recommendationSchema = z.object({
     testFile: z
         .string()
@@ -100,17 +117,20 @@ export function registerActionsTool(server) {
             idempotentHint: false,
             openWorldHint: true,
         },
-        description: `Execute test maintenance actions — final step of the unified Test Health Analysis Flow.
+        description: `Execute test maintenance and generation actions — final step of the unified Test Health Analysis Flow.
 **PREREQUISITE:** Call \`skyramp_analyze_changes\` (produces the stateFile), then \`skyramp_analyze_test_health\` (runs the drift assessment). This tool reads the stateFile from \`skyramp_analyze_changes\`.
+Call this tool after completing the drift assessment. It executes maintenance actions automatically from the stateFile — no user confirmation required.
 **EXECUTING ACTIONS:**
-- UPDATE: Reads each test file and emits targeted per-file edit instructions driven by \`updateInstructions\` and \`renamedEndpoints\`
-- REGENERATE: Reads the existing file for context (endpoint, auth, test type), then instructs the LLM to call the appropriate generation tool (e.g. \`skyramp_integration_test_generation\`) with \`outputDir\` + \`output\` matching the existing file to overwrite it
-- IGNORE / VERIFY / DELETE: Passed through and summarised — no file reads, no automated edits
+- UPDATE: Tests with drift — emits targeted per-file edit instructions driven by updateInstructions and renamedEndpoints
+- REGENERATE: Emits file-level summary; follow up by calling the appropriate generation tool (e.g. skyramp_integration_test_generation) with the same filename to overwrite
+- VERIFY: Emits file-level summary for human review — no automated edits
+- ADD: Auto-generates tests for new endpoints via LLM instructions
 **OUTPUT:**
-Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM_INSTRUCTIONS\` block for automated execution.
+Comprehensive report with executed actions, summary, and instructions for ADD recommendations
 `,
         inputSchema: actionsSchema,
     }, async (args) => {
@@ -126,22 +146,17 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
                 errorResult = toolError(`State file is empty or invalid: ${args.stateFile}. Call skyramp_analyze_changes first to generate a valid state file.`);
                 return errorResult;
             }
+            // External tests must not be candidates for UPDATE/REGENERATE/DELETE actions.
+            // Default source to Skyramp for backwards compat with state files created before the source field existed.
+            const testAnalysisResults = (stateData.existingTests || []).filter((t) => (t.source ?? TestSource.Skyramp) !== TestSource.External);
+            const newEndpoints = stateData.newEndpoints || [];
             // Resolve repo root for path normalization and security checks.
             const repoRoot = repositoryPath ? path.resolve(repositoryPath) : "";
-            // Relevant external (user-written) tests: UPDATE is permitted; REGENERATE/DELETE
-            // are report-only (the LLM may recommend them but this tool will not apply them).
-            // Paths are stored relative to repositoryPath in the state file — re-absolutize.
-            const relevantExternalPaths = new Set((stateData.repositoryAnalysis?.relevantExternalTestPaths ?? []).map((p) => path.isAbsolute(p) ? p : path.resolve(repoRoot, p)));
-            // Allowlist: Skyramp-generated tests + relevant external tests.
-            // Using an allowlist (not a blocklist) catches hallucinated paths the LLM
-            // may supply that are not in the scanned catalog at all.
-            const testAnalysisResults = (stateData.existingTests || []);
-            const skyrampTestFiles = new Set(testAnalysisResults
-                .filter((t) => (t.source ?? TestSource.Skyramp) !== TestSource.External)
-                .map((t) => t.testFile));
-            const externalTestFiles = new Set(testAnalysisResults
-                .filter((t) => (t.source ?? TestSource.Skyramp) === TestSource.External)
-                .map((t) => t.testFile));
+            // Set of non-external (Skyramp-generated) test file paths — the only files
+            // that may receive UPDATE/REGENERATE/DELETE actions. Using the allowlist rather
+            // than a blocklist catches both external tests AND hallucinated paths the LLM
+            // may supply that are not present in the scanned catalog at all.
+            const skyrampTestFiles = new Set(testAnalysisResults.map((t) => t.testFile));
             // ── Build recommendations from LLM-supplied drift assessment ──
             // The LLM performs the drift assessment in context after skyramp_analyze_test_health
             // and passes results here directly — analyzeTestHealthTool never writes assessment
@@ -150,55 +165,23 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
             (args.recommendations ?? []).forEach((rec) => {
                 // Schema requires absolute paths; resolve any relative paths defensively
                 // against repoRoot in case the LLM sends a relative path despite the schema.
-                // Normalize via path.resolve to collapse any `..` segments before the
-                // traversal guard — otherwise "/repo/../etc/passwd" would pass startsWith.
-                const rawFile = path.isAbsolute(rec.testFile)
+                const resolvedFile = path.isAbsolute(rec.testFile)
                     ? rec.testFile
                     : repoRoot
                         ? path.resolve(repoRoot, rec.testFile)
                         : rec.testFile;
-                const resolvedFile = path.resolve(rawFile);
                 // Reject files outside the repo root (path-traversal guard).
-                // Exception: files already in the scanned test catalog (externalTestFiles / skyrampTestFiles)
-                // may legitimately live in a separate testsRepoDir outside repositoryPath — catalog
-                // membership is a sufficient provenance check for those paths.
-                const isInCatalog = skyrampTestFiles.has(resolvedFile) || skyrampTestFiles.has(rec.testFile)
-                    || externalTestFiles.has(resolvedFile) || externalTestFiles.has(rec.testFile);
-                if (repoRoot && !isInCatalog && !resolvedFile.startsWith(repoRoot + path.sep) && resolvedFile !== repoRoot) {
+                if (repoRoot && !resolvedFile.startsWith(repoRoot + path.sep) && resolvedFile !== repoRoot) {
                     logger.warning(`Skipping recommendation for path outside repo root: ${rec.testFile}`);
                     return;
                 }
-                // Guard: only files present in the scanned test catalog may receive any
-                // recommendation. Hallucinated paths (not in either set) are rejected for
-                // all actions, including VERIFY and IGNORE, to keep the report consistent
-                // with what was actually discovered.
-                const isSkyramp = skyrampTestFiles.has(resolvedFile) || skyrampTestFiles.has(rec.testFile);
-                const isRelevantExternal = (externalTestFiles.has(resolvedFile) || externalTestFiles.has(rec.testFile)) &&
-                    (relevantExternalPaths.has(resolvedFile) || relevantExternalPaths.has(rec.testFile));
-                const isInAnyKnownCatalog = isSkyramp || isRelevantExternal
-                    || externalTestFiles.has(resolvedFile) || externalTestFiles.has(rec.testFile);
-                if (!isInAnyKnownCatalog) {
-                    logger.warning(`Skipping ${rec.action} for unknown test (not in scanned catalog): ${rec.testFile}`);
-                    return;
-                }
+                // Guard: only Skyramp-generated tests may receive UPDATE/REGENERATE/DELETE.
+                // Using an allowlist (skyrampTestFiles) rather than a blocklist catches both
+                // external tests and hallucinated paths the LLM may supply that are not in
+                // the scanned catalog. IGNORE/VERIFY are informational and pass through.
                 const isActionable = [DriftAction.Update, DriftAction.Regenerate, DriftAction.Delete].includes(rec.action);
-                if (isActionable && !isSkyramp && !isRelevantExternal) {
-                    logger.warning(`Skipping ${rec.action} for irrelevant external test: ${rec.testFile}`);
-                    return;
-                }
-                // REGENERATE and DELETE on external tests are report-only — convert to VERIFY so
-                // the finding surfaces to the developer without touching the file.
-                if (isRelevantExternal && !isSkyramp &&
-                    (rec.action === DriftAction.Regenerate || rec.action === DriftAction.Delete)) {
-                    recommendations.push({
-                        testFile: resolvedFile,
-                        action: DriftAction.Verify,
-                        priority: rec.priority ?? RecommendationPriority.Medium,
-                        rationale: `[external test — needs manual review] ${rec.rationale ?? ""}`.trimEnd(),
-                        estimatedWork: rec.estimatedWork ?? EstimatedWork.Small,
-                        updateInstructions: "",
-                        renamedEndpoints: [],
-                    });
+                if (isActionable && !skyrampTestFiles.has(resolvedFile) && !skyrampTestFiles.has(rec.testFile)) {
+                    logger.warning(`Skipping ${rec.action} for non-Skyramp or unknown test: ${rec.testFile}`);
                     return;
                 }
                 recommendations.push({
@@ -211,7 +194,7 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
                     renamedEndpoints: rec.renamedEndpoints ?? [],
                 });
             });
-            // ── Process UPDATE and REGENERATE recommendations ──
+            // ── Process UPDATE recommendations ──
             // Deduplicate by testFile — keep the highest-priority entry when the LLM
             // repeats a file. Priority order: high > medium > low.
             const priorityRank = {
@@ -219,44 +202,28 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
                 [RecommendationPriority.Medium]: 1,
                 [RecommendationPriority.Low]: 0,
             };
-            // Build per-file winner maps. REGENERATE beats UPDATE for the same file —
-            // if the LLM emits both, keep REGENERATE (higher severity) and drop UPDATE.
             const updateByFile = new Map();
-            const regenerateByFile = new Map();
             for (const rec of recommendations) {
-                if (rec.action === DriftAction.Regenerate) {
-                    const existing = regenerateByFile.get(rec.testFile);
-                    if (!existing || priorityRank[rec.priority] > priorityRank[existing.priority]) {
-                        regenerateByFile.set(rec.testFile, rec);
-                    }
-                }
-                else if (rec.action === DriftAction.Update) {
-                    // Only add to updateByFile if no REGENERATE exists for this file.
-                    if (!regenerateByFile.has(rec.testFile)) {
-                        const existing = updateByFile.get(rec.testFile);
-                        if (!existing || priorityRank[rec.priority] > priorityRank[existing.priority]) {
-                            updateByFile.set(rec.testFile, rec);
-                        }
-                    }
+                if (rec.action !== DriftAction.Update)
+                    continue;
+                const existing = updateByFile.get(rec.testFile);
+                if (!existing || priorityRank[rec.priority] > priorityRank[existing.priority]) {
+                    updateByFile.set(rec.testFile, rec);
                 }
             }
-            // Second pass: drop any UPDATE entries for files that ended up with REGENERATE
-            // (handles ordering where UPDATE was inserted before REGENERATE was seen).
-            for (const file of regenerateByFile.keys()) {
-                updateByFile.delete(file);
-            }
             const updateRecommendations = Array.from(updateByFile.values());
-            const regenerateRecommendations = Array.from(regenerateByFile.values());
             const fileInstructions = [];
             const testFilesToUpdate = [];
             const testFileContentMap = new Map();
-            // ── UPDATE: read file, emit targeted edit instructions ──
             for (const rec of updateRecommendations) {
                 if (!rec.testFile) {
                     logger.warning("Recommendation missing testFile", rec);
                     continue;
                 }
                 testFilesToUpdate.push(rec.testFile);
+                const testData = testAnalysisResults.find((t) => t.testFile === rec.testFile);
+                const driftData = testData?.drift;
+                const driftChanges = driftData?.changes || [];
                 let testFileContent = "";
                 try {
                     testFileContent = fs.readFileSync(rec.testFile, "utf-8");
@@ -269,6 +236,9 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
                 const renames = rec.renamedEndpoints || [];
                 const isRenameUpdate = renames.length > 0;
                 let instruction = `\n### ${rec.testFile}\n\n`;
+                instruction += `**Priority:** ${rec.priority} | `;
+                instruction += `**Estimated Effort:** ${rec.estimatedWork || EstimatedWork.Small}\n\n`;
+                instruction += `**Why Update Needed:** ${rec.rationale}\n\n`;
                 if (isRenameUpdate) {
                     instruction += `**Endpoint Rename Detected — Path Substitution Required:**\n\n`;
                     instruction += `| Old Path | New Path | Method |\n`;
@@ -294,41 +264,79 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
                     instruction += `Preserve all existing test logic — only add or adjust what is described above.\n\n`;
                 }
                 else if (!isRenameUpdate) {
-                    const fallbackRationale = rec.rationale ?? "";
-                    if (fallbackRationale) {
-                        instruction += `**Why:** ${fallbackRationale}\n\n`;
-                    }
-                    instruction += `**Action:** Update this test file based on the rationale above. `;
+                    instruction += `**Action:** Update this test file per the rationale above. `;
                     instruction += `Match the assertion style already used in the file. `;
                     instruction += `Preserve all existing test logic — only add or adjust the minimum required assertions.\n\n`;
                 }
-                fileInstructions.push(instruction);
-            }
-            // ── REGENERATE: read file for context, emit overwrite instructions ──
-            const regenerateInstructions = [];
-            const testFilesToRegenerate = [];
-            const regenerateContentMap = new Map();
-            for (const rec of regenerateRecommendations) {
-                if (!rec.testFile) {
-                    logger.warning("Recommendation missing testFile", rec);
-                    continue;
-                }
-                testFilesToRegenerate.push(rec.testFile);
-                let existingContent = "";
-                try {
-                    existingContent = fs.readFileSync(rec.testFile, "utf-8");
-                    regenerateContentMap.set(rec.testFile, existingContent);
+                if (driftData) {
+                    instruction += `**Analysis:**\n`;
+                    instruction += `- Changes Detected: ${driftData.changes?.length || 0}\n`;
+                    instruction += `- Affected Files: ${driftData.affectedFiles.files?.length || 0}\n\n`;
                 }
-                catch (error) {
-                    logger.warning(`Could not read file for REGENERATE context ${rec.testFile}: ${error.message}`);
+                if (driftChanges.length > 0) {
+                    instruction += `**Changes Detected:**\n`;
+                    driftChanges.forEach((change) => {
+                        instruction += `**${change.type}** (Severity: ${change.severity}): ${change.description}\n`;
+                        if (change.details) {
+                            instruction += `   └─ ${change.details}\n`;
+                        }
+                        if (change.file) {
+                            instruction += `   └─ In: \`${change.file}\`\n`;
+                        }
+                    });
+                    instruction += `\n`;
                 }
-                let instruction = `\n### ${rec.testFile}\n\n`;
-                instruction += `**Action: REGENERATE** — the response shape changed too drastically for targeted edits.\n\n`;
-                if (rec.updateInstructions) {
-                    instruction += `**What changed:**\n\n${rec.updateInstructions}\n\n`;
+                // File content is provided in LLM_INSTRUCTIONS.update_context.current_content — omit here to avoid duplication.
+                fileInstructions.push(instruction);
+            }
+            // ── Build ADD section for new endpoints ──
+            const wsBaseUrl = stateData.repositoryAnalysis?.wsBaseUrl || "";
+            const wsSchemaPath = stateData.repositoryAnalysis?.wsSchemaPath || "";
+            const primaryLanguage = stateData.repositoryAnalysis?.projectMeta?.primaryLanguage ||
+                "python";
+            const primaryFramework = stateData.repositoryAnalysis?.projectMeta?.primaryFramework ||
+                "pytest";
+            // Determine output directory from workspace config or repo path
+            const outputDir = repositoryPath
+                ? path.join(repositoryPath, "tests", "skyramp")
+                : "./tests/skyramp";
+            const addSummaryLines = [];
+            const llmToolCalls = [];
+            for (const ep of newEndpoints) {
+                const testTypes = selectTestTypesForEndpoint(ep.method);
+                const endpointURL = wsBaseUrl
+                    ? wsBaseUrl.replace(/\/$/, "") + ep.path
+                    : ep.path;
+                addSummaryLines.push(`- ${ep.method} ${ep.path} → ${testTypes.join(", ")} tests`);
+                for (const testType of testTypes) {
+                    let toolName = "";
+                    switch (testType) {
+                        case TestType.CONTRACT:
+                            toolName = "skyramp_contract_test_generation";
+                            break;
+                        case TestType.INTEGRATION:
+                            toolName = "skyramp_integration_test_generation";
+                            break;
+                        case TestType.SMOKE:
+                            toolName = "skyramp_smoke_test_generation";
+                            break;
+                        default:
+                            toolName = "skyramp_contract_test_generation";
+                    }
+                    llmToolCalls.push({
+                        tool: toolName,
+                        params: {
+                            endpointURL,
+                            method: ep.method,
+                            language: primaryLanguage,
+                            framework: primaryFramework,
+                            outputDir,
+                            ...(wsSchemaPath ? { apiSchema: wsSchemaPath } : {}),
+                        },
+                        endpoint: `${ep.method} ${ep.path}`,
+                        testType: testType,
+                    });
                 }
-                instruction += `Call the appropriate generation tool (e.g. \`skyramp_integration_test_generation\`, \`skyramp_contract_test_generation\`) with \`outputDir: "${path.dirname(rec.testFile)}"\` and \`output: "${path.basename(rec.testFile)}"\` to overwrite this file from scratch. Use the existing file content in \`LLM_INSTRUCTIONS.regenerate_context\` for context on the endpoint, auth pattern, and test structure — replicate the test type and language.\n\n`;
-                regenerateInstructions.push(instruction);
             }
             // ── Build response text ──
             let responseText = `# Test Actions Report\n\n`;
@@ -340,26 +348,37 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
                 responseText += `\n---\n`;
                 responseText += fileInstructions.join("\n---\n");
             }
-            if (regenerateRecommendations.length > 0) {
-                responseText += `\n## Tests Requiring Regeneration (${regenerateRecommendations.length})\n\n`;
-                testFilesToRegenerate.forEach((file, idx) => {
-                    responseText += `${idx + 1}. \`${file}\`\n`;
+            if (newEndpoints.length > 0) {
+                responseText += `\n## New Endpoint Tests to Generate (${newEndpoints.length} endpoints)\n\n`;
+                addSummaryLines.forEach((line) => {
+                    responseText += `${line}\n`;
                 });
-                responseText += `\n---\n`;
-                responseText += regenerateInstructions.join("\n---\n");
+                responseText += `\nThe following tests will be generated automatically.\n`;
             }
-            const otherRecs = recommendations.filter((rec) => rec.action !== DriftAction.Update && rec.action !== DriftAction.Regenerate);
-            if (otherRecs.length > 0) {
-                responseText += `\n## Other Findings (${otherRecs.length})\n\n`;
-                otherRecs.forEach((rec) => {
-                    responseText += `- **${rec.testFile}** — Action: ${rec.action}, Priority: ${rec.priority}`;
-                    if (rec.rationale)
-                        responseText += ` — ${rec.rationale}`;
-                    responseText += `\n`;
-                });
+            if (updateRecommendations.length === 0 && newEndpoints.length === 0) {
+                const otherRecs = recommendations.filter((rec) => rec.action !== DriftAction.Update);
+                if (otherRecs.length > 0) {
+                    responseText += `## Recommendations (${otherRecs.length})\n\n`;
+                    otherRecs.forEach((rec) => {
+                        responseText += `- **${rec.testFile}** — Action: ${rec.action}, Priority: ${rec.priority}\n`;
+                        responseText += `  ${rec.rationale}\n`;
+                    });
+                }
+                else {
+                    responseText += `No action required. All existing tests appear healthy.\n`;
+                }
+            }
+            responseText += `\n\n## Next Steps\n\n`;
+            responseText += `The AI assistant will:\n`;
+            let stepNumber = 1;
+            if (updateRecommendations.length > 0) {
+                responseText += `${stepNumber++}. Review the changes and issues for each test\n`;
+                responseText += `${stepNumber++}. Update test files to fix compatibility issues\n`;
+                responseText += `${stepNumber++}. Preserve original test logic and structure\n`;
+                responseText += `${stepNumber++}. Show you the changes made\n`;
             }
-            else if (updateRecommendations.length === 0 && regenerateRecommendations.length === 0) {
-                responseText += `No action required. All existing tests appear healthy.\n`;
+            if (newEndpoints.length > 0) {
+                responseText += `${stepNumber++}. Generate new tests for new endpoints\n`;
             }
             responseText += `\n**This tool is currently in Early Preview stage. Please verify the results.**\n`;
             // ── Build LLM instructions for UPDATE ──
@@ -378,8 +397,6 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
                 auto_proceed: true,
                 files_to_update: testFilesToUpdate,
                 update_count: updateRecommendations.length,
-                files_to_regenerate: testFilesToRegenerate,
-                regenerate_count: regenerateRecommendations.length,
             };
             if (uniqueRenames.length > 0) {
                 llmInstructionsObj.endpoint_renames = uniqueRenames;
@@ -411,22 +428,6 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
                 llmInstructionsObj.update_strategy =
                     "For each file in update_context, apply the changes described in context to the provided current_content. Write the result using the Edit tool. Do NOT re-read the file first. Match the assertion style already used in the file. Preserve all existing test logic. After applying all edits, call skyramp_enhance_assertions with each updated file path to strengthen the assertions.";
             }
-            // REGENERATE context: existing file content gives the generation tool the
-            // endpoint URL, auth pattern, test type, and language to replicate.
-            const regenerateContextFiles = [];
-            for (const rec of regenerateRecommendations) {
-                const existing_content = regenerateContentMap.get(rec.testFile);
-                regenerateContextFiles.push({
-                    file: rec.testFile,
-                    rationale: rec.updateInstructions || rec.rationale,
-                    ...(existing_content !== undefined && { existing_content }),
-                });
-            }
-            if (regenerateContextFiles.length > 0) {
-                llmInstructionsObj.regenerate_context = regenerateContextFiles;
-                llmInstructionsObj.regenerate_strategy =
-                    "For each file in regenerate_context, call the appropriate generation tool (skyramp_integration_test_generation or skyramp_contract_test_generation) with outputDir set to the file's directory and output set to the filename. Use existing_content to determine the test type, endpoint, auth pattern, and language. The generation tool will overwrite the file. Do NOT use skyramp_ui_test_generation here — UI test regeneration requires a recorded trace (playwrightInput) and must be handled separately.";
-            }
             const llmInstructions = `<!-- LLM_INSTRUCTIONS:\n${JSON.stringify(llmInstructionsObj, null, 2)}\n-->\n`;
             const contentBlocks = [
                 {
@@ -444,6 +445,23 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
                     },
                 },
             ];
+            // ── Build ADD instructions for new endpoints ──
+            if (newEndpoints.length > 0 && llmToolCalls.length > 0) {
+                const addInstructionsObj = {
+                    workflow: "add_tests_for_new_endpoints",
+                    auto_proceed: true,
+                    instruction: "Call each tool in tool_calls immediately and in order. Do NOT ask for confirmation. If endpointURL is a bare path, read .skyramp/workspace.yml for baseUrl and prepend it.",
+                    tool_calls: llmToolCalls,
+                };
+                const addInstructions = `<!-- LLM_INSTRUCTIONS:\n${JSON.stringify(addInstructionsObj, null, 2)}\n-->\n`;
+                contentBlocks.push({
+                    type: "text",
+                    text: addInstructions,
+                    annotations: {
+                        audience: ["assistant"],
+                    },
+                });
+            }
             return {
                 content: contentBlocks,
             };

package/build/tools/test-management/analyzeChangesTool.js CHANGED Viewed

@@ -10,7 +10,7 @@ import { parseWorkspaceAuthType, getDefaultAuthHeader, WorkspaceAuthType, readWo
 import { AnalyticsService } from "../../services/AnalyticsService.js";
 import { StateManager, registerSession, storeSessionData, setTestsRepoDir, } from "../../utils/AnalysisStateManager.js";
 import { buildRecommendationPrompt } from "../../prompts/test-recommendation/test-recommendation-prompt.js";
-import { isFrontendFile, isTestFile } from "../../prompts/test-recommendation/scopeAssessment.js";
+import { hasFlutterSdkDep, isFrontendFile, isTestFile } from "../../prompts/test-recommendation/scopeAssessment.js";
 import { enumerateCandidateUiPages } from "../../utils/uiPageEnumerator.js";
 import { MAX_RECOMMENDATIONS, MAX_TESTS_TO_GENERATE } from "../../prompts/test-recommendation/recommendationSections.js";
 import { TestDiscoveryService } from "../../services/TestDiscoveryService.js";
@@ -538,14 +538,6 @@ Combines API endpoint scanning, branch diff computation, and test discovery into
             }
             await sendProgress(50, 100, "Discovering existing tests...");
             // ── Step 3: Discover existing tests ──
-            // Resolve testDir to scope the file scan — prefer explicit testsRepoDir param,
-            // then workspace.yml testDirectory. repositoryPath remains the repo root for
-            // git operations; testDir only limits which files are classified as tests.
-            const wsConfigEarly = await readWorkspaceConfigRaw(params.repositoryPath);
-            const wsTestDir = wsConfigEarly?.services?.[0]?.testDirectory;
-            const testDir = params.testsRepoDir
-                ?? (params.testDirectory ? path.resolve(params.repositoryPath, params.testDirectory) : undefined)
-                ?? (wsTestDir ? path.resolve(params.repositoryPath, wsTestDir) : undefined);
             // Compute changedResources from classified endpoints for test discovery filtering.
             // undefined    → full-repo mode (no diff context)
             // []           → PR mode, no endpoints found → skip external tests
@@ -585,7 +577,8 @@ Combines API endpoint scanning, branch diff computation, and test discovery into
             try {
                 const testDiscoveryService = new TestDiscoveryService();
                 setTestsRepoDir(params.testsRepoDir);
-                const discoveryResult = await testDiscoveryService.discoverTests(testDir ?? params.repositoryPath, { changedResources });
+                const testScanPath = params.testsRepoDir ?? params.repositoryPath;
+                const discoveryResult = await testDiscoveryService.discoverTests(testScanPath, { changedResources });
                 existingTests = discoveryResult.tests.map((test) => ({
                     testFile: test.testFile,
                     testType: test.testType,
@@ -1130,7 +1123,13 @@ Combines API endpoint scanning, branch diff computation, and test discovery into
                 // pass isFrontendFile (any .ts under a frontend directory matches
                 // the tier-3 rule) but aren't UI source we'd want to ground page
                 // enumeration in.
-                const frontendFiles = changedFiles.filter((f) => isFrontendFile(f) && !isTestFile(f));
+                // Compute hasFlutterSdkDep once and pass it through so .dart files
+                // in a Flutter project are recognised as frontend. See Confluence
+                // "Flutter support in Testbot" — this is the second budget-driving
+                // call site that must thread the flag (the other is
+                // uiAnalyzeChangesTool). Both must agree to avoid silent divergence.
+                const flutterSdk = hasFlutterSdkDep(params.repositoryPath);
+                const frontendFiles = changedFiles.filter((f) => isFrontendFile(f, { hasFlutterSdkDep: flutterSdk }) && !isTestFile(f));
                 if (frontendFiles.length === 0)
                     return undefined;
                 const candidateUiPages = await enumerateCandidateUiPages(params.repositoryPath, frontendFiles);
@@ -1189,7 +1188,6 @@ Combines API endpoint scanning, branch diff computation, and test discovery into
                     sessionId,
                     routerMountContext,
                     candidateRouteFiles,
-                    relevantExternalTestPaths,
                 },
             };
             // Clean up old state files (>24 hours) before creating new one

package/build/tools/test-management/analyzeTestHealthTool.js CHANGED Viewed

@@ -15,17 +15,16 @@ export function registerAnalyzeTestHealthTool(server) {
             idempotentHint: true,
             openWorldHint: false,
         },
-        description: `Generate drift assessment instructions for existing tests — second step of the unified Test Health Analysis Flow.
+        description: `Generate drift and health assessment instructions for existing tests — second step of the unified Test Health Analysis Flow.
 **PREREQUISITE:** Call \`skyramp_analyze_changes\` first to get a stateFile.
-Returns a structured prompt for the LLM to assess each existing test against the branch diff and assign one of: UPDATE / REGENERATE / VERIFY / DELETE / IGNORE.
+This tool reads existing tests, the branch diff, and scanned endpoints from the stateFile,
+then returns a structured prompt for the LLM to assess each test for drift and health.
-Includes both Skyramp-generated tests and user-written (external) tests that are relevant to the PR's changed endpoints. For external tests, UPDATE is applied automatically; REGENERATE and DELETE are surfaced as report-only findings for the developer.
+The LLM follows the returned prompt to assign drift details and actions (UPDATE / REGENERATE / VERIFY / DELETE / IGNORE) for each test, then calls \`skyramp_actions\`.
-The LLM follows the returned prompt (Action Decision Tree pre-scan → Endpoint Existence → Response Shape → Additive Fields → Auth/AuthZ → Behavioral Contract → Assign Action → Update Execution Rules), then calls \`skyramp_actions\` with its \`recommendations[]\`.
-(Optional) Execute tests using \`skyramp_execute_test\` with \`stateFile\` before \`skyramp_actions\` to validate live.`,
+(Optional) Execute tests using \`skyramp_execute_test\` with \`stateFile\` parameter before \`skyramp_actions\` to validate tests live.`,
         inputSchema: {
             stateFile: z
                 .string()
@@ -46,24 +45,14 @@ The LLM follows the returned prompt (Action Decision Tree pre-scan → Endpoint
             if (!stateData) {
                 return toolError(`State file is empty or invalid: ${args.stateFile}. Call skyramp_analyze_changes first to generate a valid state file.`);
             }
+            // Only Skyramp tests are candidates for drift analysis and maintenance actions.
+            // External (user-written) tests are used only for recommendation deduplication.
+            // Default source to Skyramp for backwards compat with state files created before the source field existed.
+            const existingTests = (stateData.existingTests || []).filter((t) => (t.source ?? TestSource.Skyramp) !== TestSource.External);
+            logger.info(`Loaded ${existingTests.length} existing Skyramp tests from state file (excluded external)`);
             if (!repositoryPath || typeof repositoryPath !== "string") {
                 return toolError(`repositoryPath not found in state file metadata. The state file was likely created by an older version — re-run skyramp_analyze_changes to regenerate it.`);
             }
-            // Skyramp tests: full drift analysis + all actions permitted.
-            // Relevant external tests (user-written, relevant to this PR's endpoints): drift analysis
-            //   + UPDATE only — REGENERATE and DELETE are report-only (enforced in skyramp_actions).
-            // Other external tests: excluded entirely (deduplication only, not analysed).
-            // relevantExternalTestPaths are stored relative to repositoryPath in the state file.
-            // Re-absolutize here so has() comparisons against t.testFile (absolute) work correctly.
-            const relevantExternalPaths = new Set((stateData.repositoryAnalysis?.relevantExternalTestPaths ?? []).map((p) => path.isAbsolute(p) ? p : path.resolve(repositoryPath, p)));
-            const existingTests = (stateData.existingTests || []).filter((t) => {
-                if ((t.source ?? TestSource.Skyramp) !== TestSource.External)
-                    return true;
-                return relevantExternalPaths.has(t.testFile);
-            });
-            const skyrampCount = existingTests.filter((t) => (t.source ?? TestSource.Skyramp) !== TestSource.External).length;
-            const externalCount = existingTests.length - skyrampCount;
-            logger.info(`Loaded ${skyrampCount} Skyramp + ${externalCount} relevant external tests from state file`);
             const absoluteRepoPath = path.resolve(repositoryPath);
             const scannedEndpoints = stateData.repositoryAnalysis?.skeletonEndpoints || [];
             const routerMountContext = stateData.repositoryAnalysis?.routerMountContext;
@@ -87,7 +76,6 @@ The LLM follows the returned prompt (Action Decision Tree pre-scan → Endpoint
                 routerMountContext,
                 candidateRouteFiles,
                 diffFilePath,
-                relevantExternalTestPaths: [...relevantExternalPaths],
             });
             return {
                 structuredContent: { prompt: promptText },

package/build/tools/test-management/uiAnalyzeChangesTool.js CHANGED Viewed

@@ -3,7 +3,7 @@ import * as path from "path";
 import { z } from "zod";
 import { logger } from "../../utils/logger.js";
 import { enumerateCandidateUiPages } from "../../utils/uiPageEnumerator.js";
-import { isFrontendFile, isTestFile } from "../../prompts/test-recommendation/scopeAssessment.js";
+import { hasFlutterSdkDep, isFrontendFile, isTestFile } from "../../prompts/test-recommendation/scopeAssessment.js";
 import { parseChangedFilesFromDiff } from "../../utils/branchDiff.js";
 import { toolText } from "../../utils/utils.js";
 import { isTestbotEnabled } from "../../utils/featureFlags.js";
@@ -49,7 +49,13 @@ export async function runUiAnalyzeChanges(params) {
             instructions: DIFF_FILE_MISSING_INSTRUCTIONS,
         };
     }
-    const frontendFiles = changedFiles.filter((f) => isFrontendFile(f) && !isTestFile(f));
+    // Compute hasFlutterSdkDep once at the tool boundary; pass into isFrontendFile
+    // so .dart files are recognised as frontend in Flutter repos. See Confluence
+    // "Flutter support in Testbot" — this is one of two budget-driving call sites
+    // that must thread the flag (the other is analyzeChangesTool). Without it,
+    // a Flutter PR shows zero frontend files and never enters the UI pipeline.
+    const flutterSdk = hasFlutterSdkDep(repoPath);
+    const frontendFiles = changedFiles.filter((f) => isFrontendFile(f, { hasFlutterSdkDep: flutterSdk }) && !isTestFile(f));
     if (frontendFiles.length === 0) {
         const uiContext = {
             changedFrontendFiles: [],