npm - @skyramp/mcp - Versions diffs - 0.1.0-rc.6 → 0.1.1 - Mend

@skyramp/mcp 0.1.0-rc.6 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/build/playwright/registerPlaywrightTools.js +2 -0
package/build/prompts/test-recommendation/analysisOutputPrompt.js +12 -2
package/build/prompts/test-recommendation/test-recommendation-prompt.js +146 -27
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +202 -5
package/build/prompts/testbot/testbot-prompts.js +30 -14
package/build/services/TestDiscoveryService.js +417 -58
package/build/services/TestDiscoveryService.test.js +361 -0
package/build/tools/test-management/actionsTool.js +4 -1
package/build/tools/test-management/analyzeChangesTool.js +76 -9
package/build/tools/test-management/analyzeTestHealthTool.js +6 -2
package/build/types/RepositoryAnalysis.js +1 -0
package/build/types/TestAnalysis.js +6 -1
package/build/utils/docker.test.js +1 -1
package/build/utils/routeParsers.js +7 -0
package/build/utils/routeParsers.test.js +29 -1
package/build/utils/versions.js +1 -1
package/node_modules/playwright/lib/common/expectBundleImpl.js +221 -221
package/node_modules/playwright/lib/mcp/browser/tools/extensionFrames.js +180 -0
package/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +2 -2
package/node_modules/playwright/lib/utilsBundleImpl.js +49 -49
package/package.json +2 -2

package/build/playwright/registerPlaywrightTools.js CHANGED Viewed

@@ -30,8 +30,10 @@ export async function registerPlaywrightTools(server, options) {
         'browser_snapshot',
         'browser_click',
         'browser_type',
+        'browser_press_key',
         'browser_select_option',
         'browser_hover',
+        'browser_drag',
         'browser_tabs',
         'browser_navigate_back',
         'browser_wait_for',

package/build/prompts/test-recommendation/analysisOutputPrompt.js CHANGED Viewed

@@ -93,8 +93,18 @@ ${nextStep}`;
         ? `\n<diff>\n${p.diffContent}\n</diff>`
         : "";
     const step2 = isUIOnly
-        ? `### Step 2: Identify consumed API endpoints
-UI-only PR — read changed components to find API calls (fetch, axios, hooks).`
+        ? `### Step 2: Identify consumed API endpoints and integration status
+UI-only PR — perform two checks:
+1. Read changed frontend files to find API calls (fetch, axios, hooks).
+2. For each changed component file (skip CSS/HTML/style-only files — they have no exported component name to search for): check whether any production source file imports, re-exports, or renders it.
+   - Search for both the component's exported name AND its module path/filename to catch aliased and default imports (e.g. \`import Foo from './CartLine'\`).
+   - Derive the exported name from the file itself: use the default export name, a named exported PascalCase component, or the PascalCase file basename when no clearer name exists.
+   - Exclude test/story files from the search: ignore matches in \`*.test.*\`, \`*.spec.*\`, \`*.stories.*\`, and \`__tests__/\` directories — only production code imports count as integration.
+If no production file imports, re-exports, or renders a changed component, mark it as **unintegrated** in the Execution Plan output.
+Exception: if the same PR also adds a route/page file (e.g. under Next.js \`pages/\` or \`app/\`) that imports the component, the route IS the integration point — do NOT mark it as unintegrated.
+Do NOT apply the unintegrated heuristic to route/entrypoint files themselves — those are always reachable by convention.
+An unintegrated non-route component has no DOM node in the running app and cannot be browser-tested — it qualifies as a dead-code / unintegrated-component no-surface PR regardless of how complex the component logic is.`
         : p.diffContent
             ? `### Step 2: Extract new and modified API endpoints from the diff
 Read the \`<diff>\` above and identify every new or modified API endpoint — route registrations, handler methods, controller annotations. Then use the **Router Mounting / Nesting** section above to reconstruct the full URL path for each endpoint by chaining all parent router prefixes down to the handler (e.g. a handler in a file with \`prefix="/reviews"\` that is mounted at \`/{product_id}\` under a router mounted at \`/api/v1/products\` → full path \`/api/v1/products/{product_id}/reviews\`).

package/build/prompts/test-recommendation/test-recommendation-prompt.js CHANGED Viewed

@@ -2,6 +2,7 @@ import * as crypto from "crypto";
 import { AnalysisScope, isDiff, } from "../../types/RepositoryAnalysis.js";
 import { WorkspaceAuthType, getDefaultAuthHeader, AUTH_MIDDLEWARE_PATTERNS_STR } from "../../utils/workspaceAuth.js";
 import { logger } from "../../utils/logger.js";
+import { extractResourceFromPath } from "../../utils/routeParsers.js";
 import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildFewShotExamples, buildVerificationChecklist, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
 import { CATEGORY_PRIORITY, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
 import { buildScopeAssessmentSection, isFrontendFile } from "./scopeAssessment.js";
@@ -10,15 +11,16 @@ function formatTestLocations(locs) {
     if (entries.length === 0)
         return "";
     const rows = entries.map(([type, files]) => `| ${type.padEnd(12)} | ${files} |`).join("\n");
-    return ("\n**Existing Skyramp test coverage:**\n" +
+    return ("\n**Existing test coverage (Skyramp + external):**\n" +
         "| Test type    | File (covers: endpoints)                                |\n" +
         "|--------------|---------------------------------------------------------|\n" +
         rows + "\n\n" +
         "**Deduplication rule (apply this table before generating anything):**\n" +
-        "- Contract test: if the HTTP method + path already appears in a `covers:` entry of type `contract` → UPDATE that file, do NOT create a new one.\n" +
-        "- Integration test: if the primary (last mutating) step's method + path already appears in a `covers:` entry of type `integration` → UPDATE, do NOT create a new one.\n" +
+        "- `[external]` tests: if a resource is covered by an `[external]` test, do NOT create a new test for the same HTTP method + resource + test type (e.g. an external integration test covering `POST /orders` blocks any new `POST` integration test on the `orders` resource). Do NOT attempt to UPDATE, REGENERATE, or DELETE external tests — they are user-maintained.\n" +
+        "- `[skyramp]` contract test: if the HTTP method + path already appears in a `[skyramp]` `covers:` entry of type `contract` → UPDATE that file, do NOT create a new one.\n" +
+        "- `[skyramp]` integration test: if the primary (last mutating) step's method + path already appears in a `[skyramp]` `covers:` entry of type `integration` → UPDATE, do NOT create a new one.\n" +
         "- UI/E2E test: always create a new file — traces are distinct recordings.\n" +
-        "For contract and integration tests: if in doubt, prefer UPDATE over creating a duplicate.");
+        "For `[skyramp]` contract and integration tests: if in doubt, prefer UPDATE over creating a duplicate.");
 }
 // ── Priority-tier ordering (replaces numeric CATEGORY_WEIGHTS) ──
 // Categories map to HIGH / MEDIUM / LOW tiers.
@@ -49,24 +51,87 @@ function computeTiebreakerSeed(endpoints, diffFiles) {
     return crypto.createHash("sha256").update(canonical).digest("hex").slice(0, 8);
 }
 // ── Helpers ──
-const SKIP_SEGMENTS_SET = new Set(["api", "v1", "v2", "v3", "public"]);
-function extractResourceFromPath(path) {
-    const segments = path.split("/").filter(Boolean);
-    const nonParam = segments.filter(s => !s.startsWith("{") && !SKIP_SEGMENTS_SET.has(s));
-    return nonParam[nonParam.length - 1] || "unknown";
-}
-function scenarioCoverageKey(scenario) {
+/** Resolve the primary step and inferred test type for a scenario. */
+function resolvePrimaryStep(scenario) {
     const testType = scenario.testType ?? (scenario.steps.length === 1 ? "contract" : "integration");
     const mutatingSteps = scenario.steps.filter(st => ["POST", "PUT", "PATCH", "DELETE"].includes(st.method));
     // Use the last mutating step — earlier steps are typically prerequisite setup
     // (e.g. POST /products before PATCH /orders), while the final mutation is the
     // primary action under test.
     const primaryStep = mutatingSteps[mutatingSteps.length - 1] ?? scenario.steps[scenario.steps.length - 1];
+    return { primaryStep, testType };
+}
+function scenarioCoverageKey(scenario) {
+    const { primaryStep, testType } = resolvePrimaryStep(scenario);
     const resource = extractResourceFromPath(primaryStep?.path ?? "");
     return `${resource}::${testType}`;
 }
+/**
+ * Method-aware coverage key for external test dedup.
+ * Unlike scenarioCoverageKey (resource::testType), this includes the HTTP method
+ * so that e.g. an external test covering "GET /orders" doesn't block generating
+ * a test for "PUT /orders" — a different operation on the same resource.
+ */
+function externalDedupKey(scenario) {
+    const { primaryStep, testType } = resolvePrimaryStep(scenario);
+    const method = primaryStep?.method ?? "GET";
+    const resource = extractResourceFromPath(primaryStep?.path ?? "");
+    return `${method}::${resource}::${testType}`;
+}
+/**
+ * Build a set of coverage keys from external (non-Skyramp) tests.
+ * Parses `testLocations` entries tagged with `[external]` to extract the
+ * method-aware `METHOD::resource::testType` keys they cover. This allows
+ * programmatic filtering of scenarios that duplicate external test coverage
+ * while preserving distinct operations on the same resource (for example,
+ * `GET::orders::integration` vs `PUT::orders::integration`) — complementing
+ * the prompt-level Step 0 dedup instructions with an algorithmic guarantee.
+ *
+ * Format of testLocations: Record<testType, "file1 [external] (covers: GET /api/v1/orders, POST /api/v1/orders), file2 (covers: ...)">
+ */
+function buildExternalCoverageSet(testLocations) {
+    const coverage = new Set();
+    let externalWithoutCoverage = 0;
+    for (const [testType, fileList] of Object.entries(testLocations)) {
+        // Count external files with no covers clause — these fall back to prompt-level dedup only
+        const externalCount = (fileList.match(/\[external\]/g) || []).length;
+        const coveredCount = (fileList.match(/\[external\]\s*\(covers:/g) || []).length;
+        externalWithoutCoverage += externalCount - coveredCount;
+        // Match all "[external] (covers: ...)" segments in the file list string.
+        // Each match captures the covers clause for one external test file.
+        for (const m of fileList.matchAll(/\[external\]\s*\(covers:\s*([^)]+)\)/g)) {
+            const endpoints = m[1].split(",").map(e => e.trim());
+            for (const ep of endpoints) {
+                // ep is "METHOD /path" e.g. "GET /api/v1/orders/{order_id}"
+                const spaceIdx = ep.indexOf(" ");
+                if (spaceIdx < 0)
+                    continue;
+                const method = ep.slice(0, spaceIdx).toUpperCase();
+                const epPath = ep.slice(spaceIdx + 1);
+                const resource = extractResourceFromPath(epPath);
+                if (resource !== "unknown") {
+                    // Method-aware key: "GET::orders::integration" — matches externalDedupKey() format.
+                    // When testType is "unknown" (heuristic failed), emit keys for both integration and
+                    // contract to avoid silent misses — conservative over-blocking is preferable.
+                    if (testType === "unknown") {
+                        coverage.add(`${method}::${resource}::integration`);
+                        coverage.add(`${method}::${resource}::contract`);
+                    }
+                    else {
+                        coverage.add(`${method}::${resource}::${testType}`);
+                    }
+                }
+            }
+        }
+    }
+    if (externalWithoutCoverage > 0) {
+        logger.info(`${externalWithoutCoverage} external test file(s) have no extractable endpoint coverage — ` +
+            `programmatic dedup skipped for these; Step 0 semantic check is the fallback.`);
+    }
+    return coverage;
+}
 // ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
-function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject = false, isFrontendOnlyProject = false) {
+function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject = false, isFrontendOnlyProject = false, externalCoverage = new Set()) {
     // Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
     const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
     const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
@@ -90,11 +155,22 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
     const TYPE_LABEL = {
         e2e: "E2E", ui: "UI", integration: "Integration", contract: "Contract",
     };
+    // Filter out scenarios already covered by external tests before slicing.
+    const scoredFiltered = externalCoverage.size > 0
+        ? scored.filter(item => {
+            const key = externalDedupKey(item.scenario);
+            if (externalCoverage.has(key)) {
+                logger.info(`External dedup (full-repo): skipping "${item.scenario.scenarioName}" (${key})`);
+                return false;
+            }
+            return true;
+        })
+        : scored;
     // For full-stack repos, carve out E2E and UI slots before filling with backend tests.
     const backendSlotCount = isFrontendProject
         ? Math.max(0, topN - minE2ESlots - minUISlots)
         : topN;
-    const allItems = scored.slice(0, backendSlotCount);
+    const allItems = scoredFiltered.slice(0, backendSlotCount);
     const byType = new Map();
     for (const t of TYPE_ORDER)
         byType.set(t, []);
@@ -304,7 +380,7 @@ ${repoSupplementNote}
 Cross-check every endpoint path against the Router Mounting / Nesting section in the analysis above. Sub-routers may be mounted at nested prefixes — e.g. a reviews router with \`@router.get("/")\` may actually be \`GET /api/v1/products/{product_id}/reviews\` if mounted under that prefix. Always use the fully-qualified nested path in tool calls, not the path as it appears in the route file alone.
 **Existing test files (check before assigning output filenames):**
-See the Existing Tests section above. If a recommendation's primary resource already has a test file listed there, prefer passing an explicit \`output\` filename (e.g. \`output: "orders_integration_test.py"\`) to update the existing file rather than creating a duplicate.
+See the Existing Tests section above. If a recommendation's primary resource already has a \`[skyramp]\` test file listed there, prefer passing an explicit \`output\` filename (e.g. \`output: "orders_integration_test.py"\`) to update the existing file rather than creating a duplicate. Do NOT update \`[external]\` test files — they are user-maintained.
 Before filling in tool call parameters for each item, use the analysis data already provided above (endpoint interactions, source context) first. Only read the route handler source code directly when the analysis data does not contain the specific value you need:
 - Required request body fields (POST/PUT/PATCH) — use field names from the analysis interactions; read source only if they show \`{}\` or are missing
@@ -324,7 +400,7 @@ ${buildTestQualityCriteria()}
 | Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
 </enrichment_notes>`;
 }
-function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
+function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, externalCoverage = new Set(), relevantExternalTestPaths = []) {
     const frontendUrl = "<frontend_url>";
     // Slot allocation:
     // - UI-only PR: all GENERATE slots are UI placeholders (no pre-ranked backend scenarios)
@@ -335,8 +411,21 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
         : hasFrontendChanges
             ? Math.max(0, maxGen - 1)
             : maxGen;
-    const generateItems = scored.slice(0, Math.min(backendGenerateCount, scored.length));
-    const rawAdditionalItems = scored.slice(backendGenerateCount, topN);
+    // Filter out scenarios whose primary method + resource + test type is already covered by external tests.
+    // Method-aware: an external test covering GET /orders won't block PUT /orders scenarios.
+    // This is the programmatic complement to the prompt-level Step 0 dedup instructions.
+    const scoredAfterExternalDedup = externalCoverage.size > 0
+        ? scored.filter(item => {
+            const key = externalDedupKey(item.scenario);
+            if (externalCoverage.has(key)) {
+                logger.info(`External dedup: skipping "${item.scenario.scenarioName}" (${key}) — covered by external test`);
+                return false;
+            }
+            return true;
+        })
+        : scored;
+    const generateItems = scoredAfterExternalDedup.slice(0, Math.min(backendGenerateCount, scoredAfterExternalDedup.length));
+    const rawAdditionalItems = scoredAfterExternalDedup.slice(backendGenerateCount, topN);
     // Filter additional items whose primary resource + test type already appear in GENERATE
     const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
     const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
@@ -485,20 +574,24 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
 Derive scenario names and steps from the actual changed frontend files. If your Budget Plan calls for 0% UI/E2E, omit this entirely.` : "";
     const supplementNote = `\n**If your Budget Plan total exceeds the pre-ranked items listed above:** draft additional tests from source-code enrichment (Step 1). For each new or changed endpoint, identify boundary or variation scenarios — formula parameters, search/filter constraints, required field validation. Only after exhausting PR-specific scenarios, add generic patterns (auth boundary → 401, non-existent ID → 404). Do NOT supplement with tests whose endpoint + test type match a GENERATE item.`;
     // ── PR / branch-diff mode: execution plan ────────────────────────────────
+    const externalTestFilesList = relevantExternalTestPaths.length > 0
+        ? `**Read these external test files first** (paths are relative to the \`repositoryPath\` you passed to \`skyramp_analyze_changes\` — prepend it to get the absolute path). Determine exactly which HTTP methods + paths each one covers. This is the definitive source of truth for external coverage:\n${relevantExternalTestPaths.map(p => `- \`${p}\``).join("\n")}\n\n`
+        : "";
     return `## Execution Plan
 Seed: ${seed} | Endpoints: ${endpointCount} | Max: ${maxGen} generate + up to ${Math.max(topN - maxGen, 0)} additional (your Budget Plan determines the exact count)
 ${buildScopeAssessmentSection(topN, maxGen, isUIOnlyPR)}
-**Step 0 — Existing-test cross-check (before executing anything)**
-For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
-- **Contract tests**: If an existing contract test already covers that resource path → UPDATE the existing file instead of creating a new one. This does NOT count toward \`newTestsCreated\` — backfill from ADDITIONAL candidates to fill the open ADD slot using this priority order:
+**Step 0 — External test coverage verification (before executing anything)**
+${externalTestFilesList}For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
+- **\`[external]\` tests**: If the endpoint is already covered by an \`[external]\` test of the same type → skip the resource entirely (do NOT create or update). Backfill from ADDITIONAL using the priority order below:
   1. **BUG-CATCHING TESTS FIRST (CRITICAL)**: If source code analysis revealed a bug, logic error, or incorrect formula (e.g. discount math adding instead of subtracting, off-by-one errors, missing validation), CREATE A TEST THAT EXPOSES IT. The test SHOULD FAIL — that's the point. Document the bug. Example: if discount formula is wrong, test with discount=20% and assert correct math. If no bug found, skip to #2.
   2. **PR-endpoint edge cases**: Look for integration test candidates covering error paths, boundary values, or alternative scenarios for the SAME endpoints changed in the PR diff. If no suitable candidate exists in ADDITIONAL, derive one from your source-code enrichment findings.
   3. **Same-resource other scenarios**: Other HTTP methods or flows on the same resource group touched by the PR.
   4. **Cross-resource workflows involving the PR endpoint**: Integration scenarios that include the PR's changed endpoint as one of the steps.
-  5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted or would only produce UPDATEs (not new files).
+  5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted.
   **Avoid backfilling with a test for a completely unrelated resource (e.g. \`POST /reviews\` when the PR only changes \`/orders\`) if any PR-endpoint edge-case integration test is feasible.**
+- **Contract tests (\`[skyramp]\`)**: If an existing \`[skyramp]\` contract test already covers that resource path → UPDATE the existing test file instead of creating a new one. A new test case is a new test even if the file already exists — count it toward \`newTestsCreated\`.
 - **Integration/scenario tests**: Always generate as a new file via the scenario pipeline, even if an existing integration test covers the same resource. A new multi-step scenario is a distinct test. Count it toward \`newTestsCreated\`.
 - **UI tests**: Always generate as a new file. Count toward \`newTestsCreated\`.
@@ -589,6 +682,8 @@ ${supplementNote}
 - **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
 - **Only add** new recommendations for code paths introduced since the last run.`;
 }
+// Exported for testing — these are the core dedup primitives.
+export { buildExternalCoverageSet, externalDedupKey };
 export function buildRecommendationPrompt(analysis, analysisScope = AnalysisScope.FullRepo, topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType, workspaceAuthScheme, maxGenerateOverride, sessionId) {
     const isDiffScope = isDiff(analysisScope);
     const diffContext = analysis.branchDiffContext;
@@ -653,7 +748,7 @@ Output should be concise and immediately actionable.`
 ## Source Priority
 When information conflicts, prefer: **Traces** (actual behavior) > **Code** (implemented behavior) > **Spec/Docs** (documented behavior).
 `;
-    // Compact fingerprint of what Skyramp has already tested in this repo.
+    // Compact fingerprint of tests already covering endpoints in this repo (Skyramp + external).
     // Re-derived fresh each run from test files on disk — no separate persistence needed.
     const testLocations = analysis.existingTests?.testLocations ?? {};
     const testFingerprint = (() => {
@@ -663,13 +758,17 @@ When information conflicts, prefer: **Traces** (actual behavior) > **Code** (imp
         // Each value is a comma-joined list of "file (covers: ep1, ep2)" entries — one per file.
         // Count files by splitting on "), " boundaries (each entry ends with ")").
         let totalFiles = 0;
+        let externalCount = 0;
         const byType = new Map();
         for (const [type, fileList] of entries) {
             // Use matchAll to extract covers from ALL files of this type, not just the first.
             const allEndpoints = [...fileList.matchAll(/covers:\s*([^)]+)/g)].map(m => m[1].trim());
-            // Count files: strip "(covers: ...)" clauses then split on ", " to count entries,
+            // Count files: strip "[external]", "[skyramp]", and "(covers: ...)" clauses then split on ", " to count entries,
             // correctly handling both files-with-covers and files-without-covers (e.g. UI tests).
-            totalFiles += fileList.replace(/\s*\(covers:[^)]*\)/g, "").split(", ").filter(s => s.trim().length > 0).length;
+            const strippedList = fileList.replace(/\s*\[(?:external|skyramp)\]/g, "").replace(/\s*\(covers:[^)]*\)/g, "");
+            totalFiles += strippedList.split(", ").filter(s => s.trim().length > 0).length;
+            // Count external files from [external] annotations
+            externalCount += (fileList.match(/\[external\]/g) || []).length;
             if (!byType.has(type))
                 byType.set(type, []);
             byType.get(type).push(...allEndpoints);
@@ -677,7 +776,11 @@ When information conflicts, prefer: **Traces** (actual behavior) > **Code** (imp
         const lines = [...byType.entries()]
             .filter(([, eps]) => eps.length > 0)
             .map(([type, eps]) => `  ${type}: ${[...new Set(eps)].join(", ")}`);
-        return `\nSkyramp tests already in this repo (${totalFiles} files):\n${lines.join("\n")}\n(Use this to focus on coverage gaps, not re-testing what already exists.)`;
+        const skyrampCount = totalFiles - externalCount;
+        const breakdown = externalCount > 0
+            ? `${skyrampCount} Skyramp + ${externalCount} external`
+            : `${totalFiles} files`;
+        return `\nTests already covering endpoints in this repo (${breakdown}):\n${lines.join("\n")}\n(Use this to focus on coverage gaps. External tests block new recommendations but cannot be updated.)`;
     })();
     const repoContext = `
 Repository: ${analysis.metadata.repositoryName}
@@ -789,10 +892,21 @@ ${detailBlocks}
         const projectType = analysis.projectClassification.projectType;
         const isFrontendProject = projectType === "full-stack" || projectType === "frontend";
         const isFrontendOnlyProject = projectType === "frontend";
-        mainSection = buildFullRepoRecommendations(scored, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject, isFrontendOnlyProject);
+        const externalCoverageFullRepo = buildExternalCoverageSet(testLocations);
+        if (externalCoverageFullRepo.size > 0) {
+            logger.info(`External test coverage keys (full-repo): ${[...externalCoverageFullRepo].join(", ")}`);
+        }
+        mainSection = buildFullRepoRecommendations(scored, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject, isFrontendOnlyProject, externalCoverageFullRepo);
     }
     else if (isDiffScope && (isUIOnlyPR || scored.length > 0)) {
-        mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces);
+        // Build external coverage set for programmatic dedup — prevents recommending
+        // tests that duplicate existing non-Skyramp tests at the METHOD::resource::testType
+        // level, so different methods on the same resource (e.g. GET vs PUT) remain distinct.
+        const externalCoverage = buildExternalCoverageSet(testLocations);
+        if (externalCoverage.size > 0) {
+            logger.info(`External test coverage keys: ${[...externalCoverage].join(", ")}`);
+        }
+        mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, externalCoverage, analysis.existingTests.relevantExternalTestPaths ?? []);
     }
     else {
         mainSection = `
@@ -923,6 +1037,11 @@ ${interactionSection}
 <existing_tests>
 ## Existing Tests
+**Two categories of test files (identified by tag):**
+- \`[skyramp]\` — generated by Skyramp tools. You may UPDATE these when the covered endpoint changes.
+- \`[external]\` — user-written tests (pytest, jest, junit, etc.) maintained outside Skyramp. Treat as read-only: use them to determine existing coverage but NEVER update, regenerate, or delete them.
 - Frameworks: ${analysis.existingTests.frameworks.join(", ") || "none"}
 ${formatTestLocations(analysis.existingTests.testLocations)}
 </existing_tests>

package/build/prompts/test-recommendation/test-recommendation-prompt.test.js CHANGED Viewed

@@ -2,7 +2,7 @@ jest.mock("@skyramp/skyramp", () => ({
     WorkspaceConfigManager: { create: jest.fn() },
 }));
 import { TestType } from "../../types/TestTypes.js";
-import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
+import { buildRecommendationPrompt, buildExternalCoverageSet, externalDedupKey } from "./test-recommendation-prompt.js";
 import { PATH_PARAM_UUID_GUIDANCE, MAX_TESTS_TO_GENERATE, buildTestQualityCriteria, buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildFewShotExamples, buildVerificationChecklist, } from "./recommendationSections.js";
 import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
 // ---------------------------------------------------------------------------
@@ -1123,7 +1123,7 @@ describe("buildRecommendationPrompt — reduced over-prompting", () => {
 describe("buildRecommendationPrompt — testFingerprint", () => {
     it("omits fingerprint when no existing test locations", () => {
         const prompt = buildRecommendationPrompt(minimalAnalysis());
-        expect(prompt).not.toContain("Skyramp tests already in this repo");
+        expect(prompt).not.toContain("Tests already covering endpoints in this repo");
     });
     it("includes fingerprint with file count and endpoints when testLocations present", () => {
         const analysis = minimalAnalysis({
@@ -1138,7 +1138,7 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
             },
         });
         const prompt = buildRecommendationPrompt(analysis);
-        expect(prompt).toContain("Skyramp tests already in this repo (2 files)");
+        expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
         expect(prompt).toContain("contract: GET /api/items, POST /api/items");
         expect(prompt).toContain("integration: POST /api/orders");
     });
@@ -1155,7 +1155,7 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
         });
         const prompt = buildRecommendationPrompt(analysis);
         // File count should be 2, not 1
-        expect(prompt).toContain("Skyramp tests already in this repo (2 files)");
+        expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
     });
     it("omits types with no endpoint coverage from fingerprint lines (no trailing 'ui: ' line)", () => {
         const analysis = minimalAnalysis({
@@ -1170,9 +1170,206 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
             },
         });
         const prompt = buildRecommendationPrompt(analysis);
-        expect(prompt).toContain("Skyramp tests already in this repo (2 files)");
+        expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
         expect(prompt).toContain("integration: POST /api/orders");
         // UI type has no endpoints — must not emit a blank "ui: " line
         expect(prompt).not.toMatch(/^\s*ui:\s*$/m);
     });
+    it("distinguishes external tests from Skyramp tests in fingerprint", () => {
+        const analysis = minimalAnalysis({
+            existingTests: {
+                frameworks: ["pytest"],
+                coverage: { unit: 0, integration: 1, e2e: 0, ui: 0, load: 0, contract: 1, smoke: 0 },
+                testLocations: {
+                    contract: "test_items_contract.py (covers: GET /api/items)",
+                    integration: "tests/test_api.py [external] (covers: POST /api/orders)",
+                },
+                hasCoverageReports: false,
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis);
+        expect(prompt).toContain("1 Skyramp + 1 external");
+        expect(prompt).toContain("cannot be updated");
+    });
+    it("uses inclusive header for test coverage table", () => {
+        const analysis = minimalAnalysis({
+            existingTests: {
+                frameworks: ["pytest"],
+                coverage: { unit: 0, integration: 0, e2e: 0, ui: 0, load: 0, contract: 1, smoke: 0 },
+                testLocations: {
+                    contract: "test_items_contract.py (covers: GET /api/items)",
+                },
+                hasCoverageReports: false,
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis);
+        expect(prompt).toContain("Existing test coverage (Skyramp + external)");
+        expect(prompt).not.toContain("Existing Skyramp test coverage");
+    });
+    it("includes external test dedup rule that blocks CREATE", () => {
+        const analysis = minimalAnalysis({
+            existingTests: {
+                frameworks: ["pytest"],
+                coverage: { unit: 0, integration: 1, e2e: 0, ui: 0, load: 0, contract: 0, smoke: 0 },
+                testLocations: {
+                    integration: "tests/test_api.py [external] (covers: POST /api/orders)",
+                },
+                hasCoverageReports: false,
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis);
+        expect(prompt).toContain("[external]");
+        expect(prompt).toContain("do NOT create a new test");
+        expect(prompt).toContain("Do NOT attempt to UPDATE, REGENERATE, or DELETE external tests");
+    });
+});
+// ---------------------------------------------------------------------------
+// Tests — External test dedup primitives
+// ---------------------------------------------------------------------------
+describe("buildExternalCoverageSet", () => {
+    it("parses single external test with one endpoint", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: GET /api/v1/orders)',
+        });
+        expect(set.has("GET::orders::integration")).toBe(true);
+        expect(set.size).toBe(1);
+    });
+    it("parses multiple endpoints in one covers clause", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: GET /api/v1/orders, POST /api/v1/orders, DELETE /api/v1/orders/{id})',
+        });
+        expect(set.has("GET::orders::integration")).toBe(true);
+        expect(set.has("POST::orders::integration")).toBe(true);
+        expect(set.has("DELETE::orders::integration")).toBe(true);
+        expect(set.size).toBe(3);
+    });
+    it("parses multiple external files in one test type", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_orders.py [external] (covers: GET /api/orders), tests/test_products.py [external] (covers: POST /api/products)',
+        });
+        expect(set.has("GET::orders::integration")).toBe(true);
+        expect(set.has("POST::products::integration")).toBe(true);
+        expect(set.size).toBe(2);
+    });
+    it("handles multiple test types", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: GET /api/orders)',
+            contract: 'tests/test_contract.py [external] (covers: GET /api/orders)',
+        });
+        expect(set.has("GET::orders::integration")).toBe(true);
+        expect(set.has("GET::orders::contract")).toBe(true);
+        expect(set.size).toBe(2);
+    });
+    it("emits both integration and contract keys for unknown test type", () => {
+        const set = buildExternalCoverageSet({
+            unknown: 'tests/test_misc.py [external] (covers: GET /api/items)',
+        });
+        expect(set.has("GET::items::integration")).toBe(true);
+        expect(set.has("GET::items::contract")).toBe(true);
+        expect(set.size).toBe(2);
+    });
+    it("ignores Skyramp tests (no [external] tag)", () => {
+        const set = buildExternalCoverageSet({
+            contract: 'test_items_contract.py (covers: GET /api/items)',
+        });
+        expect(set.size).toBe(0);
+    });
+    it("ignores external tests without covers clause", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external]',
+        });
+        expect(set.size).toBe(0);
+    });
+    it("returns empty set for empty testLocations", () => {
+        const set = buildExternalCoverageSet({});
+        expect(set.size).toBe(0);
+    });
+    it("skips endpoints with unparseable paths", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: GET )',
+        });
+        // "GET " → method="GET", path="" → resource="unknown" → skipped
+        expect(set.size).toBe(0);
+    });
+    it("strips path parameters from resource extraction", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: PUT /api/v1/orders/{order_id})',
+        });
+        // {order_id} is a path param → skipped, resource is "orders"
+        expect(set.has("PUT::orders::integration")).toBe(true);
+        expect(set.size).toBe(1);
+    });
+    it("normalizes method to uppercase", () => {
+        const set = buildExternalCoverageSet({
+            integration: 'tests/test_api.py [external] (covers: get /api/orders)',
+        });
+        expect(set.has("GET::orders::integration")).toBe(true);
+    });
+});
+describe("externalDedupKey", () => {
+    it("builds key from single-step contract scenario", () => {
+        const scenario = {
+            scenarioName: "get_orders",
+            description: "Get orders",
+            category: "crud",
+            priority: "high",
+            steps: [{ order: 1, method: "GET", path: "/api/v1/orders", description: "list orders", interactionType: "success", expectedStatusCode: 200 }],
+            chainingKeys: [],
+            requiresAuth: false,
+            estimatedComplexity: "simple",
+        };
+        expect(externalDedupKey(scenario)).toBe("GET::orders::contract");
+    });
+    it("builds key from multi-step integration scenario using last mutating step", () => {
+        const scenario = {
+            scenarioName: "create_and_update_order",
+            description: "Create then update order",
+            category: "workflow",
+            priority: "high",
+            steps: [
+                { order: 1, method: "POST", path: "/api/v1/orders", description: "create order", interactionType: "success", expectedStatusCode: 201 },
+                { order: 2, method: "PUT", path: "/api/v1/orders/{order_id}", description: "update order", interactionType: "success", expectedStatusCode: 200 },
+                { order: 3, method: "GET", path: "/api/v1/orders/{order_id}", description: "verify", interactionType: "success", expectedStatusCode: 200 },
+            ],
+            chainingKeys: [],
+            requiresAuth: false,
+            estimatedComplexity: "moderate",
+        };
+        // Last mutating step is PUT /orders/{order_id} → resource "orders"
+        expect(externalDedupKey(scenario)).toBe("PUT::orders::integration");
+    });
+    it("falls back to last step when no mutating methods present", () => {
+        const scenario = {
+            scenarioName: "get_items",
+            description: "List and get items",
+            category: "crud",
+            priority: "medium",
+            steps: [
+                { order: 1, method: "GET", path: "/api/v1/items", description: "list items", interactionType: "success", expectedStatusCode: 200 },
+                { order: 2, method: "GET", path: "/api/v1/items/{id}", description: "get item", interactionType: "success", expectedStatusCode: 200 },
+            ],
+            chainingKeys: [],
+            requiresAuth: false,
+            estimatedComplexity: "simple",
+        };
+        // No mutating steps → falls back to last step → GET /items/{id} → resource "items"
+        expect(externalDedupKey(scenario)).toBe("GET::items::integration");
+    });
+    it("uses explicit testType when provided", () => {
+        const scenario = {
+            scenarioName: "get_orders_contract",
+            description: "Contract test for orders",
+            category: "crud",
+            priority: "high",
+            steps: [
+                { order: 1, method: "GET", path: "/api/v1/orders", description: "list orders", interactionType: "success", expectedStatusCode: 200 },
+                { order: 1, method: "POST", path: "/api/v1/orders", description: "create order", interactionType: "success", expectedStatusCode: 201 },
+            ],
+            chainingKeys: [],
+            requiresAuth: false,
+            estimatedComplexity: "simple",
+            testType: "contract",
+        };
+        expect(externalDedupKey(scenario)).toBe("POST::orders::contract");
+    });
 });