npm - @skyramp/mcp - Versions diffs - 0.1.0 → 0.1.1 - Mend

@skyramp/mcp 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/build/prompts/test-recommendation/test-recommendation-prompt.js +146 -27
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +202 -5
package/build/prompts/testbot/testbot-prompts.js +10 -9
package/build/services/TestDiscoveryService.js +417 -58
package/build/services/TestDiscoveryService.test.js +361 -0
package/build/tools/test-management/actionsTool.js +4 -1
package/build/tools/test-management/analyzeChangesTool.js +76 -9
package/build/tools/test-management/analyzeTestHealthTool.js +6 -2
package/build/types/RepositoryAnalysis.js +1 -0
package/build/types/TestAnalysis.js +6 -1
package/build/utils/routeParsers.js +7 -0
package/build/utils/routeParsers.test.js +29 -1
package/package.json +1 -1

package/build/prompts/test-recommendation/test-recommendation-prompt.js CHANGED Viewed

@@ -2,6 +2,7 @@ import * as crypto from "crypto";
 import { AnalysisScope, isDiff, } from "../../types/RepositoryAnalysis.js";
 import { WorkspaceAuthType, getDefaultAuthHeader, AUTH_MIDDLEWARE_PATTERNS_STR } from "../../utils/workspaceAuth.js";
 import { logger } from "../../utils/logger.js";
+import { extractResourceFromPath } from "../../utils/routeParsers.js";
 import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildFewShotExamples, buildVerificationChecklist, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
 import { CATEGORY_PRIORITY, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
 import { buildScopeAssessmentSection, isFrontendFile } from "./scopeAssessment.js";
@@ -10,15 +11,16 @@ function formatTestLocations(locs) {
     if (entries.length === 0)
         return "";
     const rows = entries.map(([type, files]) => `| ${type.padEnd(12)} | ${files} |`).join("\n");
-    return ("\n**Existing Skyramp test coverage:**\n" +
+    return ("\n**Existing test coverage (Skyramp + external):**\n" +
         "| Test type    | File (covers: endpoints)                                |\n" +
         "|--------------|---------------------------------------------------------|\n" +
         rows + "\n\n" +
         "**Deduplication rule (apply this table before generating anything):**\n" +
-        "- Contract test: if the HTTP method + path already appears in a `covers:` entry of type `contract` → UPDATE that file, do NOT create a new one.\n" +
-        "- Integration test: if the primary (last mutating) step's method + path already appears in a `covers:` entry of type `integration` → UPDATE, do NOT create a new one.\n" +
+        "- `[external]` tests: if a resource is covered by an `[external]` test, do NOT create a new test for the same HTTP method + resource + test type (e.g. an external integration test covering `POST /orders` blocks any new `POST` integration test on the `orders` resource). Do NOT attempt to UPDATE, REGENERATE, or DELETE external tests — they are user-maintained.\n" +
+        "- `[skyramp]` contract test: if the HTTP method + path already appears in a `[skyramp]` `covers:` entry of type `contract` → UPDATE that file, do NOT create a new one.\n" +
+        "- `[skyramp]` integration test: if the primary (last mutating) step's method + path already appears in a `[skyramp]` `covers:` entry of type `integration` → UPDATE, do NOT create a new one.\n" +
         "- UI/E2E test: always create a new file — traces are distinct recordings.\n" +
-        "For contract and integration tests: if in doubt, prefer UPDATE over creating a duplicate.");
+        "For `[skyramp]` contract and integration tests: if in doubt, prefer UPDATE over creating a duplicate.");
 }
 // ── Priority-tier ordering (replaces numeric CATEGORY_WEIGHTS) ──
 // Categories map to HIGH / MEDIUM / LOW tiers.
@@ -49,24 +51,87 @@ function computeTiebreakerSeed(endpoints, diffFiles) {
     return crypto.createHash("sha256").update(canonical).digest("hex").slice(0, 8);
 }
 // ── Helpers ──
-const SKIP_SEGMENTS_SET = new Set(["api", "v1", "v2", "v3", "public"]);
-function extractResourceFromPath(path) {
-    const segments = path.split("/").filter(Boolean);
-    const nonParam = segments.filter(s => !s.startsWith("{") && !SKIP_SEGMENTS_SET.has(s));
-    return nonParam[nonParam.length - 1] || "unknown";
-}
-function scenarioCoverageKey(scenario) {
+/** Resolve the primary step and inferred test type for a scenario. */
+function resolvePrimaryStep(scenario) {
     const testType = scenario.testType ?? (scenario.steps.length === 1 ? "contract" : "integration");
     const mutatingSteps = scenario.steps.filter(st => ["POST", "PUT", "PATCH", "DELETE"].includes(st.method));
     // Use the last mutating step — earlier steps are typically prerequisite setup
     // (e.g. POST /products before PATCH /orders), while the final mutation is the
     // primary action under test.
     const primaryStep = mutatingSteps[mutatingSteps.length - 1] ?? scenario.steps[scenario.steps.length - 1];
+    return { primaryStep, testType };
+}
+function scenarioCoverageKey(scenario) {
+    const { primaryStep, testType } = resolvePrimaryStep(scenario);
     const resource = extractResourceFromPath(primaryStep?.path ?? "");
     return `${resource}::${testType}`;
 }
+/**
+ * Method-aware coverage key for external test dedup.
+ * Unlike scenarioCoverageKey (resource::testType), this includes the HTTP method
+ * so that e.g. an external test covering "GET /orders" doesn't block generating
+ * a test for "PUT /orders" — a different operation on the same resource.
+ */
+function externalDedupKey(scenario) {
+    const { primaryStep, testType } = resolvePrimaryStep(scenario);
+    const method = primaryStep?.method ?? "GET";
+    const resource = extractResourceFromPath(primaryStep?.path ?? "");
+    return `${method}::${resource}::${testType}`;
+}
+/**
+ * Build a set of coverage keys from external (non-Skyramp) tests.
+ * Parses `testLocations` entries tagged with `[external]` to extract the
+ * method-aware `METHOD::resource::testType` keys they cover. This allows
+ * programmatic filtering of scenarios that duplicate external test coverage
+ * while preserving distinct operations on the same resource (for example,
+ * `GET::orders::integration` vs `PUT::orders::integration`) — complementing
+ * the prompt-level Step 0 dedup instructions with an algorithmic guarantee.
+ *
+ * Format of testLocations: Record<testType, "file1 [external] (covers: GET /api/v1/orders, POST /api/v1/orders), file2 (covers: ...)">
+ */
+function buildExternalCoverageSet(testLocations) {
+    const coverage = new Set();
+    let externalWithoutCoverage = 0;
+    for (const [testType, fileList] of Object.entries(testLocations)) {
+        // Count external files with no covers clause — these fall back to prompt-level dedup only
+        const externalCount = (fileList.match(/\[external\]/g) || []).length;
+        const coveredCount = (fileList.match(/\[external\]\s*\(covers:/g) || []).length;
+        externalWithoutCoverage += externalCount - coveredCount;
+        // Match all "[external] (covers: ...)" segments in the file list string.
+        // Each match captures the covers clause for one external test file.
+        for (const m of fileList.matchAll(/\[external\]\s*\(covers:\s*([^)]+)\)/g)) {
+            const endpoints = m[1].split(",").map(e => e.trim());
+            for (const ep of endpoints) {
+                // ep is "METHOD /path" e.g. "GET /api/v1/orders/{order_id}"
+                const spaceIdx = ep.indexOf(" ");
+                if (spaceIdx < 0)
+                    continue;
+                const method = ep.slice(0, spaceIdx).toUpperCase();
+                const epPath = ep.slice(spaceIdx + 1);
+                const resource = extractResourceFromPath(epPath);
+                if (resource !== "unknown") {
+                    // Method-aware key: "GET::orders::integration" — matches externalDedupKey() format.
+                    // When testType is "unknown" (heuristic failed), emit keys for both integration and
+                    // contract to avoid silent misses — conservative over-blocking is preferable.
+                    if (testType === "unknown") {
+                        coverage.add(`${method}::${resource}::integration`);
+                        coverage.add(`${method}::${resource}::contract`);
+                    }
+                    else {
+                        coverage.add(`${method}::${resource}::${testType}`);
+                    }
+                }
+            }
+        }
+    }
+    if (externalWithoutCoverage > 0) {
+        logger.info(`${externalWithoutCoverage} external test file(s) have no extractable endpoint coverage — ` +
+            `programmatic dedup skipped for these; Step 0 semantic check is the fallback.`);
+    }
+    return coverage;
+}
 // ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
-function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject = false, isFrontendOnlyProject = false) {
+function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject = false, isFrontendOnlyProject = false, externalCoverage = new Set()) {
     // Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
     const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
     const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
@@ -90,11 +155,22 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
     const TYPE_LABEL = {
         e2e: "E2E", ui: "UI", integration: "Integration", contract: "Contract",
     };
+    // Filter out scenarios already covered by external tests before slicing.
+    const scoredFiltered = externalCoverage.size > 0
+        ? scored.filter(item => {
+            const key = externalDedupKey(item.scenario);
+            if (externalCoverage.has(key)) {
+                logger.info(`External dedup (full-repo): skipping "${item.scenario.scenarioName}" (${key})`);
+                return false;
+            }
+            return true;
+        })
+        : scored;
     // For full-stack repos, carve out E2E and UI slots before filling with backend tests.
     const backendSlotCount = isFrontendProject
         ? Math.max(0, topN - minE2ESlots - minUISlots)
         : topN;
-    const allItems = scored.slice(0, backendSlotCount);
+    const allItems = scoredFiltered.slice(0, backendSlotCount);
     const byType = new Map();
     for (const t of TYPE_ORDER)
         byType.set(t, []);
@@ -304,7 +380,7 @@ ${repoSupplementNote}
 Cross-check every endpoint path against the Router Mounting / Nesting section in the analysis above. Sub-routers may be mounted at nested prefixes — e.g. a reviews router with \`@router.get("/")\` may actually be \`GET /api/v1/products/{product_id}/reviews\` if mounted under that prefix. Always use the fully-qualified nested path in tool calls, not the path as it appears in the route file alone.
 **Existing test files (check before assigning output filenames):**
-See the Existing Tests section above. If a recommendation's primary resource already has a test file listed there, prefer passing an explicit \`output\` filename (e.g. \`output: "orders_integration_test.py"\`) to update the existing file rather than creating a duplicate.
+See the Existing Tests section above. If a recommendation's primary resource already has a \`[skyramp]\` test file listed there, prefer passing an explicit \`output\` filename (e.g. \`output: "orders_integration_test.py"\`) to update the existing file rather than creating a duplicate. Do NOT update \`[external]\` test files — they are user-maintained.
 Before filling in tool call parameters for each item, use the analysis data already provided above (endpoint interactions, source context) first. Only read the route handler source code directly when the analysis data does not contain the specific value you need:
 - Required request body fields (POST/PUT/PATCH) — use field names from the analysis interactions; read source only if they show \`{}\` or are missing
@@ -324,7 +400,7 @@ ${buildTestQualityCriteria()}
 | Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
 </enrichment_notes>`;
 }
-function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
+function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, externalCoverage = new Set(), relevantExternalTestPaths = []) {
     const frontendUrl = "<frontend_url>";
     // Slot allocation:
     // - UI-only PR: all GENERATE slots are UI placeholders (no pre-ranked backend scenarios)
@@ -335,8 +411,21 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
         : hasFrontendChanges
             ? Math.max(0, maxGen - 1)
             : maxGen;
-    const generateItems = scored.slice(0, Math.min(backendGenerateCount, scored.length));
-    const rawAdditionalItems = scored.slice(backendGenerateCount, topN);
+    // Filter out scenarios whose primary method + resource + test type is already covered by external tests.
+    // Method-aware: an external test covering GET /orders won't block PUT /orders scenarios.
+    // This is the programmatic complement to the prompt-level Step 0 dedup instructions.
+    const scoredAfterExternalDedup = externalCoverage.size > 0
+        ? scored.filter(item => {
+            const key = externalDedupKey(item.scenario);
+            if (externalCoverage.has(key)) {
+                logger.info(`External dedup: skipping "${item.scenario.scenarioName}" (${key}) — covered by external test`);
+                return false;
+            }
+            return true;
+        })
+        : scored;
+    const generateItems = scoredAfterExternalDedup.slice(0, Math.min(backendGenerateCount, scoredAfterExternalDedup.length));
+    const rawAdditionalItems = scoredAfterExternalDedup.slice(backendGenerateCount, topN);
     // Filter additional items whose primary resource + test type already appear in GENERATE
     const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
     const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
@@ -485,20 +574,24 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
 Derive scenario names and steps from the actual changed frontend files. If your Budget Plan calls for 0% UI/E2E, omit this entirely.` : "";
     const supplementNote = `\n**If your Budget Plan total exceeds the pre-ranked items listed above:** draft additional tests from source-code enrichment (Step 1). For each new or changed endpoint, identify boundary or variation scenarios — formula parameters, search/filter constraints, required field validation. Only after exhausting PR-specific scenarios, add generic patterns (auth boundary → 401, non-existent ID → 404). Do NOT supplement with tests whose endpoint + test type match a GENERATE item.`;
     // ── PR / branch-diff mode: execution plan ────────────────────────────────
+    const externalTestFilesList = relevantExternalTestPaths.length > 0
+        ? `**Read these external test files first** (paths are relative to the \`repositoryPath\` you passed to \`skyramp_analyze_changes\` — prepend it to get the absolute path). Determine exactly which HTTP methods + paths each one covers. This is the definitive source of truth for external coverage:\n${relevantExternalTestPaths.map(p => `- \`${p}\``).join("\n")}\n\n`
+        : "";
     return `## Execution Plan
 Seed: ${seed} | Endpoints: ${endpointCount} | Max: ${maxGen} generate + up to ${Math.max(topN - maxGen, 0)} additional (your Budget Plan determines the exact count)
 ${buildScopeAssessmentSection(topN, maxGen, isUIOnlyPR)}
-**Step 0 — Existing-test cross-check (before executing anything)**
-For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
-- **Contract tests**: If an existing contract test already covers that resource path → UPDATE the existing file instead of creating a new one. This does NOT count toward \`newTestsCreated\` — backfill from ADDITIONAL candidates to fill the open ADD slot using this priority order:
+**Step 0 — External test coverage verification (before executing anything)**
+${externalTestFilesList}For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
+- **\`[external]\` tests**: If the endpoint is already covered by an \`[external]\` test of the same type → skip the resource entirely (do NOT create or update). Backfill from ADDITIONAL using the priority order below:
   1. **BUG-CATCHING TESTS FIRST (CRITICAL)**: If source code analysis revealed a bug, logic error, or incorrect formula (e.g. discount math adding instead of subtracting, off-by-one errors, missing validation), CREATE A TEST THAT EXPOSES IT. The test SHOULD FAIL — that's the point. Document the bug. Example: if discount formula is wrong, test with discount=20% and assert correct math. If no bug found, skip to #2.
   2. **PR-endpoint edge cases**: Look for integration test candidates covering error paths, boundary values, or alternative scenarios for the SAME endpoints changed in the PR diff. If no suitable candidate exists in ADDITIONAL, derive one from your source-code enrichment findings.
   3. **Same-resource other scenarios**: Other HTTP methods or flows on the same resource group touched by the PR.
   4. **Cross-resource workflows involving the PR endpoint**: Integration scenarios that include the PR's changed endpoint as one of the steps.
-  5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted or would only produce UPDATEs (not new files).
+  5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted.
   **Avoid backfilling with a test for a completely unrelated resource (e.g. \`POST /reviews\` when the PR only changes \`/orders\`) if any PR-endpoint edge-case integration test is feasible.**
+- **Contract tests (\`[skyramp]\`)**: If an existing \`[skyramp]\` contract test already covers that resource path → UPDATE the existing test file instead of creating a new one. A new test case is a new test even if the file already exists — count it toward \`newTestsCreated\`.
 - **Integration/scenario tests**: Always generate as a new file via the scenario pipeline, even if an existing integration test covers the same resource. A new multi-step scenario is a distinct test. Count it toward \`newTestsCreated\`.
 - **UI tests**: Always generate as a new file. Count toward \`newTestsCreated\`.
@@ -589,6 +682,8 @@ ${supplementNote}
 - **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
 - **Only add** new recommendations for code paths introduced since the last run.`;
 }
+// Exported for testing — these are the core dedup primitives.
+export { buildExternalCoverageSet, externalDedupKey };
 export function buildRecommendationPrompt(analysis, analysisScope = AnalysisScope.FullRepo, topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType, workspaceAuthScheme, maxGenerateOverride, sessionId) {
     const isDiffScope = isDiff(analysisScope);
     const diffContext = analysis.branchDiffContext;
@@ -653,7 +748,7 @@ Output should be concise and immediately actionable.`
 ## Source Priority
 When information conflicts, prefer: **Traces** (actual behavior) > **Code** (implemented behavior) > **Spec/Docs** (documented behavior).
 `;
-    // Compact fingerprint of what Skyramp has already tested in this repo.
+    // Compact fingerprint of tests already covering endpoints in this repo (Skyramp + external).
     // Re-derived fresh each run from test files on disk — no separate persistence needed.
     const testLocations = analysis.existingTests?.testLocations ?? {};
     const testFingerprint = (() => {
@@ -663,13 +758,17 @@ When information conflicts, prefer: **Traces** (actual behavior) > **Code** (imp
         // Each value is a comma-joined list of "file (covers: ep1, ep2)" entries — one per file.
         // Count files by splitting on "), " boundaries (each entry ends with ")").
         let totalFiles = 0;
+        let externalCount = 0;
         const byType = new Map();
         for (const [type, fileList] of entries) {
             // Use matchAll to extract covers from ALL files of this type, not just the first.
             const allEndpoints = [...fileList.matchAll(/covers:\s*([^)]+)/g)].map(m => m[1].trim());
-            // Count files: strip "(covers: ...)" clauses then split on ", " to count entries,
+            // Count files: strip "[external]", "[skyramp]", and "(covers: ...)" clauses then split on ", " to count entries,
             // correctly handling both files-with-covers and files-without-covers (e.g. UI tests).
-            totalFiles += fileList.replace(/\s*\(covers:[^)]*\)/g, "").split(", ").filter(s => s.trim().length > 0).length;
+            const strippedList = fileList.replace(/\s*\[(?:external|skyramp)\]/g, "").replace(/\s*\(covers:[^)]*\)/g, "");
+            totalFiles += strippedList.split(", ").filter(s => s.trim().length > 0).length;
+            // Count external files from [external] annotations
+            externalCount += (fileList.match(/\[external\]/g) || []).length;
             if (!byType.has(type))
                 byType.set(type, []);
             byType.get(type).push(...allEndpoints);
@@ -677,7 +776,11 @@ When information conflicts, prefer: **Traces** (actual behavior) > **Code** (imp
         const lines = [...byType.entries()]
             .filter(([, eps]) => eps.length > 0)
             .map(([type, eps]) => `  ${type}: ${[...new Set(eps)].join(", ")}`);
-        return `\nSkyramp tests already in this repo (${totalFiles} files):\n${lines.join("\n")}\n(Use this to focus on coverage gaps, not re-testing what already exists.)`;
+        const skyrampCount = totalFiles - externalCount;
+        const breakdown = externalCount > 0
+            ? `${skyrampCount} Skyramp + ${externalCount} external`
+            : `${totalFiles} files`;
+        return `\nTests already covering endpoints in this repo (${breakdown}):\n${lines.join("\n")}\n(Use this to focus on coverage gaps. External tests block new recommendations but cannot be updated.)`;
     })();
     const repoContext = `
 Repository: ${analysis.metadata.repositoryName}
@@ -789,10 +892,21 @@ ${detailBlocks}
         const projectType = analysis.projectClassification.projectType;
         const isFrontendProject = projectType === "full-stack" || projectType === "frontend";
         const isFrontendOnlyProject = projectType === "frontend";
-        mainSection = buildFullRepoRecommendations(scored, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject, isFrontendOnlyProject);
+        const externalCoverageFullRepo = buildExternalCoverageSet(testLocations);
+        if (externalCoverageFullRepo.size > 0) {
+            logger.info(`External test coverage keys (full-repo): ${[...externalCoverageFullRepo].join(", ")}`);
+        }
+        mainSection = buildFullRepoRecommendations(scored, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject, isFrontendOnlyProject, externalCoverageFullRepo);
     }
     else if (isDiffScope && (isUIOnlyPR || scored.length > 0)) {
-        mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces);
+        // Build external coverage set for programmatic dedup — prevents recommending
+        // tests that duplicate existing non-Skyramp tests at the METHOD::resource::testType
+        // level, so different methods on the same resource (e.g. GET vs PUT) remain distinct.
+        const externalCoverage = buildExternalCoverageSet(testLocations);
+        if (externalCoverage.size > 0) {
+            logger.info(`External test coverage keys: ${[...externalCoverage].join(", ")}`);
+        }
+        mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, externalCoverage, analysis.existingTests.relevantExternalTestPaths ?? []);
     }
     else {
         mainSection = `
@@ -923,6 +1037,11 @@ ${interactionSection}
 <existing_tests>
 ## Existing Tests
+**Two categories of test files (identified by tag):**
+- \`[skyramp]\` — generated by Skyramp tools. You may UPDATE these when the covered endpoint changes.
+- \`[external]\` — user-written tests (pytest, jest, junit, etc.) maintained outside Skyramp. Treat as read-only: use them to determine existing coverage but NEVER update, regenerate, or delete them.
 - Frameworks: ${analysis.existingTests.frameworks.join(", ") || "none"}
 ${formatTestLocations(analysis.existingTests.testLocations)}
 </existing_tests>

package/build/prompts/test-recommendation/test-recommendation-prompt.test.js CHANGED Viewed

@@ -2,7 +2,7 @@ jest.mock("@skyramp/skyramp", () => ({
     WorkspaceConfigManager: { create: jest.fn() },
 }));
 import { TestType } from "../../types/TestTypes.js";
-import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
+import { buildRecommendationPrompt, buildExternalCoverageSet, externalDedupKey } from "./test-recommendation-prompt.js";
 import { PATH_PARAM_UUID_GUIDANCE, MAX_TESTS_TO_GENERATE, buildTestQualityCriteria, buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildFewShotExamples, buildVerificationChecklist, } from "./recommendationSections.js";
 import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
 // ---------------------------------------------------------------------------
@@ -1123,7 +1123,7 @@ describe("buildRecommendationPrompt — reduced over-prompting", () => {
 describe("buildRecommendationPrompt — testFingerprint", () => {
     it("omits fingerprint when no existing test locations", () => {
         const prompt = buildRecommendationPrompt(minimalAnalysis());
-        expect(prompt).not.toContain("Skyramp tests already in this repo");
+        expect(prompt).not.toContain("Tests already covering endpoints in this repo");
     });
     it("includes fingerprint with file count and endpoints when testLocations present", () => {
         const analysis = minimalAnalysis({
@@ -1138,7 +1138,7 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
             },
         });
         const prompt = buildRecommendationPrompt(analysis);
-        expect(prompt).toContain("Skyramp tests already in this repo (2 files)");
+        expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
         expect(prompt).toContain("contract: GET /api/items, POST /api/items");
         expect(prompt).toContain("integration: POST /api/orders");
     });
@@ -1155,7 +1155,7 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
         });
         const prompt = buildRecommendationPrompt(analysis);
         // File count should be 2, not 1
-        expect(prompt).toContain("Skyramp tests already in this repo (2 files)");
+        expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
     });
     it("omits types with no endpoint coverage from fingerprint lines (no trailing 'ui: ' line)", () => {
         const analysis = minimalAnalysis({
@@ -1170,9 +1170,206 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
             },
         });
         const prompt = buildRecommendationPrompt(analysis);
-        expect(prompt).toContain("Skyramp tests already in this repo (2 files)");
+        expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
         expect(prompt).toContain("integration: POST /api/orders");
         // UI type has no endpoints — must not emit a blank "ui: " line
         expect(prompt).not.toMatch(/^\s*ui:\s*$/m);
     });
+    it("distinguishes external tests from Skyramp tests in fingerprint", () => {
+        const analysis = minimalAnalysis({
+            existingTests: {
+                frameworks: ["pytest"],
+                coverage: { unit: 0, integration: 1, e2e: 0, ui: 0, load: 0, contract: 1, smoke: 0 },
+                testLocations: {
+                    contract: "test_items_contract.py (covers: GET /api/items)",
+                    integration: "tests/test_api.py [external] (covers: POST /api/orders)",
+                },
+                hasCoverageReports: false,
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis);
+        expect(prompt).toContain("1 Skyramp + 1 external");
+        expect(prompt).toContain("cannot be updated");
+    });
+    it("uses inclusive header for test coverage table", () => {
+        const analysis = minimalAnalysis({
+            existingTests: {
+                frameworks: ["pytest"],
+                coverage: { unit: 0, integration: 0, e2e: 0, ui: 0, load: 0, contract: 1, smoke: 0 },
+                testLocations: {
+                    contract: "test_items_contract.py (covers: GET /api/items)",
+                },
+                hasCoverageReports: false,
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis);
+        expect(prompt).toContain("Existing test coverage (Skyramp + external)");
+        expect(prompt).not.toContain("Existing Skyramp test coverage");
+    });
+    it("includes external test dedup rule that blocks CREATE", () => {
+        const analysis = minimalAnalysis({
+            existingTests: {
+                frameworks: ["pytest"],
+                coverage: { unit: 0, integration: 1, e2e: 0, ui: 0, load: 0, contract: 0, smoke: 0 },
+                testLocations: {
+                    integration: "tests/test_api.py [external] (covers: POST /api/orders)",
+                },
+                hasCoverageReports: false,
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis);
+        expect(prompt).toContain("[external]");
+        expect(prompt).toContain("do NOT create a new test");
+        expect(prompt).toContain("Do NOT attempt to UPDATE, REGENERATE, or DELETE external tests");
+    });
+});
+// ---------------------------------------------------------------------------
+// Tests — External test dedup primitives
+// ---------------------------------------------------------------------------
+describe("buildExternalCoverageSet", () => {
+    it("parses single external test with one endpoint", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: GET /api/v1/orders)',
+        });
+        expect(set.has("GET::orders::integration")).toBe(true);
+        expect(set.size).toBe(1);
+    });
+    it("parses multiple endpoints in one covers clause", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: GET /api/v1/orders, POST /api/v1/orders, DELETE /api/v1/orders/{id})',
+        });
+        expect(set.has("GET::orders::integration")).toBe(true);
+        expect(set.has("POST::orders::integration")).toBe(true);
+        expect(set.has("DELETE::orders::integration")).toBe(true);
+        expect(set.size).toBe(3);
+    });
+    it("parses multiple external files in one test type", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_orders.py [external] (covers: GET /api/orders), tests/test_products.py [external] (covers: POST /api/products)',
+        });
+        expect(set.has("GET::orders::integration")).toBe(true);
+        expect(set.has("POST::products::integration")).toBe(true);
+        expect(set.size).toBe(2);
+    });
+    it("handles multiple test types", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: GET /api/orders)',
+            contract: 'tests/test_contract.py [external] (covers: GET /api/orders)',
+        });
+        expect(set.has("GET::orders::integration")).toBe(true);
+        expect(set.has("GET::orders::contract")).toBe(true);
+        expect(set.size).toBe(2);
+    });
+    it("emits both integration and contract keys for unknown test type", () => {
+        const set = buildExternalCoverageSet({
+            unknown: 'tests/test_misc.py [external] (covers: GET /api/items)',
+        });
+        expect(set.has("GET::items::integration")).toBe(true);
+        expect(set.has("GET::items::contract")).toBe(true);
+        expect(set.size).toBe(2);
+    });
+    it("ignores Skyramp tests (no [external] tag)", () => {
+        const set = buildExternalCoverageSet({
+            contract: 'test_items_contract.py (covers: GET /api/items)',
+        });
+        expect(set.size).toBe(0);
+    });
+    it("ignores external tests without covers clause", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external]',
+        });
+        expect(set.size).toBe(0);
+    });
+    it("returns empty set for empty testLocations", () => {
+        const set = buildExternalCoverageSet({});
+        expect(set.size).toBe(0);
+    });
+    it("skips endpoints with unparseable paths", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: GET )',
+        });
+        // "GET " → method="GET", path="" → resource="unknown" → skipped
+        expect(set.size).toBe(0);
+    });
+    it("strips path parameters from resource extraction", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: PUT /api/v1/orders/{order_id})',
+        });
+        // {order_id} is a path param → skipped, resource is "orders"
+        expect(set.has("PUT::orders::integration")).toBe(true);
+        expect(set.size).toBe(1);
+    });
+    it("normalizes method to uppercase", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: get /api/orders)',
+        });
+        expect(set.has("GET::orders::integration")).toBe(true);
+    });
+});
+describe("externalDedupKey", () => {
+    it("builds key from single-step contract scenario", () => {
+        const scenario = {
+            scenarioName: "get_orders",
+            description: "Get orders",
+            category: "crud",
+            priority: "high",
+            steps: [{ order: 1, method: "GET", path: "/api/v1/orders", description: "list orders", interactionType: "success", expectedStatusCode: 200 }],
+            chainingKeys: [],
+            requiresAuth: false,
+            estimatedComplexity: "simple",
+        };
+        expect(externalDedupKey(scenario)).toBe("GET::orders::contract");
+    });
+    it("builds key from multi-step integration scenario using last mutating step", () => {
+        const scenario = {
+            scenarioName: "create_and_update_order",
+            description: "Create then update order",
+            category: "workflow",
+            priority: "high",
+            steps: [
+                { order: 1, method: "POST", path: "/api/v1/orders", description: "create order", interactionType: "success", expectedStatusCode: 201 },
+                { order: 2, method: "PUT", path: "/api/v1/orders/{order_id}", description: "update order", interactionType: "success", expectedStatusCode: 200 },
+                { order: 3, method: "GET", path: "/api/v1/orders/{order_id}", description: "verify", interactionType: "success", expectedStatusCode: 200 },
+            ],
+            chainingKeys: [],
+            requiresAuth: false,
+            estimatedComplexity: "moderate",
+        };
+        // Last mutating step is PUT /orders/{order_id} → resource "orders"
+        expect(externalDedupKey(scenario)).toBe("PUT::orders::integration");
+    });
+    it("falls back to last step when no mutating methods present", () => {
+        const scenario = {
+            scenarioName: "get_items",
+            description: "List and get items",
+            category: "crud",
+            priority: "medium",
+            steps: [
+                { order: 1, method: "GET", path: "/api/v1/items", description: "list items", interactionType: "success", expectedStatusCode: 200 },
+                { order: 2, method: "GET", path: "/api/v1/items/{id}", description: "get item", interactionType: "success", expectedStatusCode: 200 },
+            ],
+            chainingKeys: [],
+            requiresAuth: false,
+            estimatedComplexity: "simple",
+        };
+        // No mutating steps → falls back to last step → GET /items/{id} → resource "items"
+        expect(externalDedupKey(scenario)).toBe("GET::items::integration");
+    });
+    it("uses explicit testType when provided", () => {
+        const scenario = {
+            scenarioName: "get_orders_contract",
+            description: "Contract test for orders",
+            category: "crud",
+            priority: "high",
+            steps: [
+                { order: 1, method: "GET", path: "/api/v1/orders", description: "list orders", interactionType: "success", expectedStatusCode: 200 },
+                { order: 1, method: "POST", path: "/api/v1/orders", description: "create order", interactionType: "success", expectedStatusCode: 201 },
+            ],
+            chainingKeys: [],
+            requiresAuth: false,
+            estimatedComplexity: "simple",
+            testType: "contract",
+        };
+        expect(externalDedupKey(scenario)).toBe("POST::orders::contract");
+    });
 });

package/build/prompts/testbot/testbot-prompts.js CHANGED Viewed

@@ -87,17 +87,19 @@ ${task1Section}
 ## Task 2: Generate New Tests
-${userPrompt ? "" : "Drift-based maintenance (Task 1) is complete. This step only processes the GENERATE list. Exception: if a GENERATE item targets a resource with an existing contract test, UPDATE that file instead (see covered-resource handling below) — this is a generation-driven edit, not a maintenance re-run."}
+${userPrompt ? "Generate only the tests that the user requested from the Additional Recommendations. The rules below still apply." : "Drift-based maintenance (Task 1) is complete. This step only processes the GENERATE list. Exception: if a GENERATE item targets a resource with an existing `[skyramp]` contract test, UPDATE that test file (see covered-resource handling below) — a new test case added to an existing file counts toward the budget and is reported in `newTestsCreated`."}
 - **MANDATORY — use the pre-ranked GENERATE list as-is**: The Execution Plan's GENERATE section governs ADD actions. You MUST generate exactly those scenarios in the exact order listed. Do NOT substitute, rename, or replace a GENERATE item. If parameter grounding uncovers a distinct bug-catching scenario not already in the GENERATE or ADDITIONAL list, generate it after all planned GENERATE items are complete and report it in \`newTestsCreated\` — this is an additional test driven by source-code analysis and does not count against the GENERATE budget.
 - Scenario JSON files are always new files — always generate them for new methods. Every generated scenario JSON must have a corresponding new integration test generated from it via \`skyramp_integration_test_generation\`.
-- **Covered-resource handling (aligns with Execution Plan Step 0):** When a GENERATE item targets a resource that already has an existing test file of the **same test type** (e.g. existing contract test → GENERATE contract test for same resource):
-  - **Contract tests**: UPDATE the existing file (add the new method's test cases). Report in \`testMaintenance\`, NOT \`newTestsCreated\`. This does NOT count toward the budget — advance to the next candidate.
-  - **Integration/scenario tests**: Always generate as a new file via the scenario pipeline (\`skyramp_batch_scenario_test_generation\` → \`skyramp_integration_test_generation\`), even if an existing integration test covers the same resource. A new multi-step scenario (e.g. create → PATCH → verify recalculation) is a distinct test file. Report in \`newTestsCreated\` and count toward the budget.
-  - **UI tests**: Always generate as a new file. Report in \`newTestsCreated\`.
+- Covered-resource handling (aligns with Execution Plan Step 0): When a GENERATE item targets a resource that already has an existing test file covering the same endpoint:
+  - If the existing test source is \`[external]\`, skip the resource entirely — the external test already provides coverage. Do NOT UPDATE, REGENERATE, or DELETE external tests.
+  - If the existing test is tagged \`[skyramp]\`, apply type-specific rules:
+    - Contract tests: UPDATE the existing Skyramp test file (add the new method's test cases). A new test case is a new test even if the file already exists — report in \`newTestsCreated\` and count toward the budget.
+    - Integration/scenario tests: Always generate as a new file via the scenario pipeline (\`skyramp_batch_scenario_test_generation\` → \`skyramp_integration_test_generation\`), even if an existing integration test covers the same resource. A new multi-step scenario (e.g. create → PATCH → verify recalculation) is a distinct test file. Report in \`newTestsCreated\` and count toward the budget.
+    - UI tests: Always generate as a new file. Report in \`newTestsCreated\`.
   Keep advancing until you have created exactly ${maxGenerate} new test files OR exhausted all candidates.
-- **Example**: If enrichment reveals that sending \`discount_value\` without \`discount_type\` silently orphans the value (a concrete bug), complete all planned GENERATE items first, then generate this discovered scenario as an extra test and report it in \`newTestsCreated\`.
-- **Total generated**: Follow the **"Budget: N generate"** line in the Execution Plan. Process every GENERATE-tagged item in order. Items that become UPDATEs (covered resource) do not count — backfill from ADDITIONAL candidates (highest-ranked first) until \`newTestsCreated\` reaches ${maxGenerate} or all candidates are exhausted.
+- Example: If enrichment reveals that sending \`discount_value\` without \`discount_type\` silently orphans the value (a concrete bug), complete all planned GENERATE items first, then generate this discovered scenario as an extra test and report it in \`newTestsCreated\`.
+- Total generated: Follow the "Budget: N generate" line in the Execution Plan. Process every GENERATE-tagged item in order. Backfill from ADDITIONAL candidates (highest-ranked first) until \`newTestsCreated\` reaches ${maxGenerate} or all candidates are exhausted.
 - **UI test priority**: If the diff contains frontend/UI changes (e.g. \`.tsx\`, \`.jsx\`, \`.vue\`, \`.svelte\` files), you MUST attempt to generate at least one UI test. Use \`browser_navigate\` to the app's base URL — if the app responds, record a trace and generate the test.
   **Skip only if one of these conditions is met:**
   - **(a) App is unreachable** — \`browser_navigate\` fails or connection is refused.
@@ -252,10 +254,9 @@ This applies when the diff contains ONLY changes with no observable API or UI be
 In these cases:
 - \`newTestsCreated\` must be \`[]\`
-- Add exactly one entry to \`issuesFound\`: \`"No testable behavioral surface detected: <brief reason, e.g. 'JSDoc-only changes with no endpoint modifications', 'CSS reformat with no logic changes', 'dependency version bump with no API surface change', 'utility function added but not integrated into any endpoint or component'>. Zero new tests generated by design."\`
+- \`issuesFound\` must be \`[]\` — do NOT add a "No testable behavioral surface" entry; the business case already explains the abstention
 - \`businessCaseAnalysis\` must be a one-sentence summary of what the PR actually does (do NOT leave it blank)
 - \`additionalRecommendations\` must be \`[]\` — do NOT recommend tests for a no-surface PR
-- A blank \`issuesFound\` when tests were intentionally skipped will lose report quality points
 Otherwise: in \`newTestsCreated\`, you must have exactly ${maxGenerate} budget-counting new tests for the planned GENERATE items. Only new files (ADD) created for those planned GENERATE items count toward this ${maxGenerate} target — GENERATE items converted to UPDATE do not. You may also include at most one additional discovered-scenario file in \`newTestsCreated\` (the bug-catching test generated after all planned items); that extra test does **not** count against the ${maxGenerate} budget. If you have fewer than ${maxGenerate} budget-counting new tests, backfill from the remaining ADDITIONAL candidates before proceeding. Only proceed with fewer than ${maxGenerate} budget-counting new tests if all candidates failed after retry AND the fallback single-contract test also failed.