@skyramp/mcp 0.1.0-rc.6 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,8 +30,10 @@ export async function registerPlaywrightTools(server, options) {
30
30
  'browser_snapshot',
31
31
  'browser_click',
32
32
  'browser_type',
33
+ 'browser_press_key',
33
34
  'browser_select_option',
34
35
  'browser_hover',
36
+ 'browser_drag',
35
37
  'browser_tabs',
36
38
  'browser_navigate_back',
37
39
  'browser_wait_for',
@@ -93,8 +93,18 @@ ${nextStep}`;
93
93
  ? `\n<diff>\n${p.diffContent}\n</diff>`
94
94
  : "";
95
95
  const step2 = isUIOnly
96
- ? `### Step 2: Identify consumed API endpoints
97
- UI-only PR — read changed components to find API calls (fetch, axios, hooks).`
96
+ ? `### Step 2: Identify consumed API endpoints and integration status
97
+ UI-only PR — perform two checks:
98
+ 1. Read changed frontend files to find API calls (fetch, axios, hooks).
99
+ 2. For each changed component file (skip CSS/HTML/style-only files — they have no exported component name to search for): check whether any production source file imports, re-exports, or renders it.
100
+ - Search for both the component's exported name AND its module path/filename to catch aliased and default imports (e.g. \`import Foo from './CartLine'\`).
101
+ - Derive the exported name from the file itself: use the default export name, a named exported PascalCase component, or the PascalCase file basename when no clearer name exists.
102
+ - Exclude test/story files from the search: ignore matches in \`*.test.*\`, \`*.spec.*\`, \`*.stories.*\`, and \`__tests__/\` directories — only production code imports count as integration.
103
+
104
+ If no production file imports, re-exports, or renders a changed component, mark it as **unintegrated** in the Execution Plan output.
105
+ Exception: if the same PR also adds a route/page file (e.g. under Next.js \`pages/\` or \`app/\`) that imports the component, the route IS the integration point — do NOT mark it as unintegrated.
106
+ Do NOT apply the unintegrated heuristic to route/entrypoint files themselves — those are always reachable by convention.
107
+ An unintegrated non-route component has no DOM node in the running app and cannot be browser-tested — it qualifies as a dead-code / unintegrated-component no-surface PR regardless of how complex the component logic is.`
98
108
  : p.diffContent
99
109
  ? `### Step 2: Extract new and modified API endpoints from the diff
100
110
  Read the \`<diff>\` above and identify every new or modified API endpoint — route registrations, handler methods, controller annotations. Then use the **Router Mounting / Nesting** section above to reconstruct the full URL path for each endpoint by chaining all parent router prefixes down to the handler (e.g. a handler in a file with \`prefix="/reviews"\` that is mounted at \`/{product_id}\` under a router mounted at \`/api/v1/products\` → full path \`/api/v1/products/{product_id}/reviews\`).
@@ -2,6 +2,7 @@ import * as crypto from "crypto";
2
2
  import { AnalysisScope, isDiff, } from "../../types/RepositoryAnalysis.js";
3
3
  import { WorkspaceAuthType, getDefaultAuthHeader, AUTH_MIDDLEWARE_PATTERNS_STR } from "../../utils/workspaceAuth.js";
4
4
  import { logger } from "../../utils/logger.js";
5
+ import { extractResourceFromPath } from "../../utils/routeParsers.js";
5
6
  import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildFewShotExamples, buildVerificationChecklist, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
6
7
  import { CATEGORY_PRIORITY, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
7
8
  import { buildScopeAssessmentSection, isFrontendFile } from "./scopeAssessment.js";
@@ -10,15 +11,16 @@ function formatTestLocations(locs) {
10
11
  if (entries.length === 0)
11
12
  return "";
12
13
  const rows = entries.map(([type, files]) => `| ${type.padEnd(12)} | ${files} |`).join("\n");
13
- return ("\n**Existing Skyramp test coverage:**\n" +
14
+ return ("\n**Existing test coverage (Skyramp + external):**\n" +
14
15
  "| Test type | File (covers: endpoints) |\n" +
15
16
  "|--------------|---------------------------------------------------------|\n" +
16
17
  rows + "\n\n" +
17
18
  "**Deduplication rule (apply this table before generating anything):**\n" +
18
- "- Contract test: if the HTTP method + path already appears in a `covers:` entry of type `contract` UPDATE that file, do NOT create a new one.\n" +
19
- "- Integration test: if the primary (last mutating) step's method + path already appears in a `covers:` entry of type `integration` → UPDATE, do NOT create a new one.\n" +
19
+ "- `[external]` tests: if a resource is covered by an `[external]` test, do NOT create a new test for the same HTTP method + resource + test type (e.g. an external integration test covering `POST /orders` blocks any new `POST` integration test on the `orders` resource). Do NOT attempt to UPDATE, REGENERATE, or DELETE external tests — they are user-maintained.\n" +
20
+ "- `[skyramp]` contract test: if the HTTP method + path already appears in a `[skyramp]` `covers:` entry of type `contract` → UPDATE that file, do NOT create a new one.\n" +
21
+ "- `[skyramp]` integration test: if the primary (last mutating) step's method + path already appears in a `[skyramp]` `covers:` entry of type `integration` → UPDATE, do NOT create a new one.\n" +
20
22
  "- UI/E2E test: always create a new file — traces are distinct recordings.\n" +
21
- "For contract and integration tests: if in doubt, prefer UPDATE over creating a duplicate.");
23
+ "For `[skyramp]` contract and integration tests: if in doubt, prefer UPDATE over creating a duplicate.");
22
24
  }
23
25
  // ── Priority-tier ordering (replaces numeric CATEGORY_WEIGHTS) ──
24
26
  // Categories map to HIGH / MEDIUM / LOW tiers.
@@ -49,24 +51,87 @@ function computeTiebreakerSeed(endpoints, diffFiles) {
49
51
  return crypto.createHash("sha256").update(canonical).digest("hex").slice(0, 8);
50
52
  }
51
53
  // ── Helpers ──
52
- const SKIP_SEGMENTS_SET = new Set(["api", "v1", "v2", "v3", "public"]);
53
- function extractResourceFromPath(path) {
54
- const segments = path.split("/").filter(Boolean);
55
- const nonParam = segments.filter(s => !s.startsWith("{") && !SKIP_SEGMENTS_SET.has(s));
56
- return nonParam[nonParam.length - 1] || "unknown";
57
- }
58
- function scenarioCoverageKey(scenario) {
54
+ /** Resolve the primary step and inferred test type for a scenario. */
55
+ function resolvePrimaryStep(scenario) {
59
56
  const testType = scenario.testType ?? (scenario.steps.length === 1 ? "contract" : "integration");
60
57
  const mutatingSteps = scenario.steps.filter(st => ["POST", "PUT", "PATCH", "DELETE"].includes(st.method));
61
58
  // Use the last mutating step — earlier steps are typically prerequisite setup
62
59
  // (e.g. POST /products before PATCH /orders), while the final mutation is the
63
60
  // primary action under test.
64
61
  const primaryStep = mutatingSteps[mutatingSteps.length - 1] ?? scenario.steps[scenario.steps.length - 1];
62
+ return { primaryStep, testType };
63
+ }
64
+ function scenarioCoverageKey(scenario) {
65
+ const { primaryStep, testType } = resolvePrimaryStep(scenario);
65
66
  const resource = extractResourceFromPath(primaryStep?.path ?? "");
66
67
  return `${resource}::${testType}`;
67
68
  }
69
+ /**
70
+ * Method-aware coverage key for external test dedup.
71
+ * Unlike scenarioCoverageKey (resource::testType), this includes the HTTP method
72
+ * so that e.g. an external test covering "GET /orders" doesn't block generating
73
+ * a test for "PUT /orders" — a different operation on the same resource.
74
+ */
75
+ function externalDedupKey(scenario) {
76
+ const { primaryStep, testType } = resolvePrimaryStep(scenario);
77
+ const method = primaryStep?.method ?? "GET";
78
+ const resource = extractResourceFromPath(primaryStep?.path ?? "");
79
+ return `${method}::${resource}::${testType}`;
80
+ }
81
+ /**
82
+ * Build a set of coverage keys from external (non-Skyramp) tests.
83
+ * Parses `testLocations` entries tagged with `[external]` to extract the
84
+ * method-aware `METHOD::resource::testType` keys they cover. This allows
85
+ * programmatic filtering of scenarios that duplicate external test coverage
86
+ * while preserving distinct operations on the same resource (for example,
87
+ * `GET::orders::integration` vs `PUT::orders::integration`) — complementing
88
+ * the prompt-level Step 0 dedup instructions with an algorithmic guarantee.
89
+ *
90
+ * Format of testLocations: Record<testType, "file1 [external] (covers: GET /api/v1/orders, POST /api/v1/orders), file2 (covers: ...)">
91
+ */
92
+ function buildExternalCoverageSet(testLocations) {
93
+ const coverage = new Set();
94
+ let externalWithoutCoverage = 0;
95
+ for (const [testType, fileList] of Object.entries(testLocations)) {
96
+ // Count external files with no covers clause — these fall back to prompt-level dedup only
97
+ const externalCount = (fileList.match(/\[external\]/g) || []).length;
98
+ const coveredCount = (fileList.match(/\[external\]\s*\(covers:/g) || []).length;
99
+ externalWithoutCoverage += externalCount - coveredCount;
100
+ // Match all "[external] (covers: ...)" segments in the file list string.
101
+ // Each match captures the covers clause for one external test file.
102
+ for (const m of fileList.matchAll(/\[external\]\s*\(covers:\s*([^)]+)\)/g)) {
103
+ const endpoints = m[1].split(",").map(e => e.trim());
104
+ for (const ep of endpoints) {
105
+ // ep is "METHOD /path" e.g. "GET /api/v1/orders/{order_id}"
106
+ const spaceIdx = ep.indexOf(" ");
107
+ if (spaceIdx < 0)
108
+ continue;
109
+ const method = ep.slice(0, spaceIdx).toUpperCase();
110
+ const epPath = ep.slice(spaceIdx + 1);
111
+ const resource = extractResourceFromPath(epPath);
112
+ if (resource !== "unknown") {
113
+ // Method-aware key: "GET::orders::integration" — matches externalDedupKey() format.
114
+ // When testType is "unknown" (heuristic failed), emit keys for both integration and
115
+ // contract to avoid silent misses — conservative over-blocking is preferable.
116
+ if (testType === "unknown") {
117
+ coverage.add(`${method}::${resource}::integration`);
118
+ coverage.add(`${method}::${resource}::contract`);
119
+ }
120
+ else {
121
+ coverage.add(`${method}::${resource}::${testType}`);
122
+ }
123
+ }
124
+ }
125
+ }
126
+ }
127
+ if (externalWithoutCoverage > 0) {
128
+ logger.info(`${externalWithoutCoverage} external test file(s) have no extractable endpoint coverage — ` +
129
+ `programmatic dedup skipped for these; Step 0 semantic check is the fallback.`);
130
+ }
131
+ return coverage;
132
+ }
68
133
  // ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
69
- function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject = false, isFrontendOnlyProject = false) {
134
+ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject = false, isFrontendOnlyProject = false, externalCoverage = new Set()) {
70
135
  // Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
71
136
  const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
72
137
  const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
@@ -90,11 +155,22 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
90
155
  const TYPE_LABEL = {
91
156
  e2e: "E2E", ui: "UI", integration: "Integration", contract: "Contract",
92
157
  };
158
+ // Filter out scenarios already covered by external tests before slicing.
159
+ const scoredFiltered = externalCoverage.size > 0
160
+ ? scored.filter(item => {
161
+ const key = externalDedupKey(item.scenario);
162
+ if (externalCoverage.has(key)) {
163
+ logger.info(`External dedup (full-repo): skipping "${item.scenario.scenarioName}" (${key})`);
164
+ return false;
165
+ }
166
+ return true;
167
+ })
168
+ : scored;
93
169
  // For full-stack repos, carve out E2E and UI slots before filling with backend tests.
94
170
  const backendSlotCount = isFrontendProject
95
171
  ? Math.max(0, topN - minE2ESlots - minUISlots)
96
172
  : topN;
97
- const allItems = scored.slice(0, backendSlotCount);
173
+ const allItems = scoredFiltered.slice(0, backendSlotCount);
98
174
  const byType = new Map();
99
175
  for (const t of TYPE_ORDER)
100
176
  byType.set(t, []);
@@ -304,7 +380,7 @@ ${repoSupplementNote}
304
380
  Cross-check every endpoint path against the Router Mounting / Nesting section in the analysis above. Sub-routers may be mounted at nested prefixes — e.g. a reviews router with \`@router.get("/")\` may actually be \`GET /api/v1/products/{product_id}/reviews\` if mounted under that prefix. Always use the fully-qualified nested path in tool calls, not the path as it appears in the route file alone.
305
381
 
306
382
  **Existing test files (check before assigning output filenames):**
307
- See the Existing Tests section above. If a recommendation's primary resource already has a test file listed there, prefer passing an explicit \`output\` filename (e.g. \`output: "orders_integration_test.py"\`) to update the existing file rather than creating a duplicate.
383
+ See the Existing Tests section above. If a recommendation's primary resource already has a \`[skyramp]\` test file listed there, prefer passing an explicit \`output\` filename (e.g. \`output: "orders_integration_test.py"\`) to update the existing file rather than creating a duplicate. Do NOT update \`[external]\` test files — they are user-maintained.
308
384
 
309
385
  Before filling in tool call parameters for each item, use the analysis data already provided above (endpoint interactions, source context) first. Only read the route handler source code directly when the analysis data does not contain the specific value you need:
310
386
  - Required request body fields (POST/PUT/PATCH) — use field names from the analysis interactions; read source only if they show \`{}\` or are missing
@@ -324,7 +400,7 @@ ${buildTestQualityCriteria()}
324
400
  | Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
325
401
  </enrichment_notes>`;
326
402
  }
327
- function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
403
+ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, externalCoverage = new Set(), relevantExternalTestPaths = []) {
328
404
  const frontendUrl = "<frontend_url>";
329
405
  // Slot allocation:
330
406
  // - UI-only PR: all GENERATE slots are UI placeholders (no pre-ranked backend scenarios)
@@ -335,8 +411,21 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
335
411
  : hasFrontendChanges
336
412
  ? Math.max(0, maxGen - 1)
337
413
  : maxGen;
338
- const generateItems = scored.slice(0, Math.min(backendGenerateCount, scored.length));
339
- const rawAdditionalItems = scored.slice(backendGenerateCount, topN);
414
+ // Filter out scenarios whose primary method + resource + test type is already covered by external tests.
415
+ // Method-aware: an external test covering GET /orders won't block PUT /orders scenarios.
416
+ // This is the programmatic complement to the prompt-level Step 0 dedup instructions.
417
+ const scoredAfterExternalDedup = externalCoverage.size > 0
418
+ ? scored.filter(item => {
419
+ const key = externalDedupKey(item.scenario);
420
+ if (externalCoverage.has(key)) {
421
+ logger.info(`External dedup: skipping "${item.scenario.scenarioName}" (${key}) — covered by external test`);
422
+ return false;
423
+ }
424
+ return true;
425
+ })
426
+ : scored;
427
+ const generateItems = scoredAfterExternalDedup.slice(0, Math.min(backendGenerateCount, scoredAfterExternalDedup.length));
428
+ const rawAdditionalItems = scoredAfterExternalDedup.slice(backendGenerateCount, topN);
340
429
  // Filter additional items whose primary resource + test type already appear in GENERATE
341
430
  const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
342
431
  const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
@@ -485,20 +574,24 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
485
574
  Derive scenario names and steps from the actual changed frontend files. If your Budget Plan calls for 0% UI/E2E, omit this entirely.` : "";
486
575
  const supplementNote = `\n**If your Budget Plan total exceeds the pre-ranked items listed above:** draft additional tests from source-code enrichment (Step 1). For each new or changed endpoint, identify boundary or variation scenarios — formula parameters, search/filter constraints, required field validation. Only after exhausting PR-specific scenarios, add generic patterns (auth boundary → 401, non-existent ID → 404). Do NOT supplement with tests whose endpoint + test type match a GENERATE item.`;
487
576
  // ── PR / branch-diff mode: execution plan ────────────────────────────────
577
+ const externalTestFilesList = relevantExternalTestPaths.length > 0
578
+ ? `**Read these external test files first** (paths are relative to the \`repositoryPath\` you passed to \`skyramp_analyze_changes\` — prepend it to get the absolute path). Determine exactly which HTTP methods + paths each one covers. This is the definitive source of truth for external coverage:\n${relevantExternalTestPaths.map(p => `- \`${p}\``).join("\n")}\n\n`
579
+ : "";
488
580
  return `## Execution Plan
489
581
  Seed: ${seed} | Endpoints: ${endpointCount} | Max: ${maxGen} generate + up to ${Math.max(topN - maxGen, 0)} additional (your Budget Plan determines the exact count)
490
582
 
491
583
  ${buildScopeAssessmentSection(topN, maxGen, isUIOnlyPR)}
492
584
 
493
- **Step 0 — Existing-test cross-check (before executing anything)**
494
- For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
495
- - **Contract tests**: If an existing contract test already covers that resource path UPDATE the existing file instead of creating a new one. This does NOT count toward \`newTestsCreated\` backfill from ADDITIONAL candidates to fill the open ADD slot using this priority order:
585
+ **Step 0 — External test coverage verification (before executing anything)**
586
+ ${externalTestFilesList}For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
587
+ - **\`[external]\` tests**: If the endpoint is already covered by an \`[external]\` test of the same type skip the resource entirely (do NOT create or update). Backfill from ADDITIONAL using the priority order below:
496
588
  1. **BUG-CATCHING TESTS FIRST (CRITICAL)**: If source code analysis revealed a bug, logic error, or incorrect formula (e.g. discount math adding instead of subtracting, off-by-one errors, missing validation), CREATE A TEST THAT EXPOSES IT. The test SHOULD FAIL — that's the point. Document the bug. Example: if discount formula is wrong, test with discount=20% and assert correct math. If no bug found, skip to #2.
497
589
  2. **PR-endpoint edge cases**: Look for integration test candidates covering error paths, boundary values, or alternative scenarios for the SAME endpoints changed in the PR diff. If no suitable candidate exists in ADDITIONAL, derive one from your source-code enrichment findings.
498
590
  3. **Same-resource other scenarios**: Other HTTP methods or flows on the same resource group touched by the PR.
499
591
  4. **Cross-resource workflows involving the PR endpoint**: Integration scenarios that include the PR's changed endpoint as one of the steps.
500
- 5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted or would only produce UPDATEs (not new files).
592
+ 5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted.
501
593
  **Avoid backfilling with a test for a completely unrelated resource (e.g. \`POST /reviews\` when the PR only changes \`/orders\`) if any PR-endpoint edge-case integration test is feasible.**
594
+ - **Contract tests (\`[skyramp]\`)**: If an existing \`[skyramp]\` contract test already covers that resource path → UPDATE the existing test file instead of creating a new one. A new test case is a new test even if the file already exists — count it toward \`newTestsCreated\`.
502
595
  - **Integration/scenario tests**: Always generate as a new file via the scenario pipeline, even if an existing integration test covers the same resource. A new multi-step scenario is a distinct test. Count it toward \`newTestsCreated\`.
503
596
  - **UI tests**: Always generate as a new file. Count toward \`newTestsCreated\`.
504
597
 
@@ -589,6 +682,8 @@ ${supplementNote}
589
682
  - **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
590
683
  - **Only add** new recommendations for code paths introduced since the last run.`;
591
684
  }
685
+ // Exported for testing — these are the core dedup primitives.
686
+ export { buildExternalCoverageSet, externalDedupKey };
592
687
  export function buildRecommendationPrompt(analysis, analysisScope = AnalysisScope.FullRepo, topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType, workspaceAuthScheme, maxGenerateOverride, sessionId) {
593
688
  const isDiffScope = isDiff(analysisScope);
594
689
  const diffContext = analysis.branchDiffContext;
@@ -653,7 +748,7 @@ Output should be concise and immediately actionable.`
653
748
  ## Source Priority
654
749
  When information conflicts, prefer: **Traces** (actual behavior) > **Code** (implemented behavior) > **Spec/Docs** (documented behavior).
655
750
  `;
656
- // Compact fingerprint of what Skyramp has already tested in this repo.
751
+ // Compact fingerprint of tests already covering endpoints in this repo (Skyramp + external).
657
752
  // Re-derived fresh each run from test files on disk — no separate persistence needed.
658
753
  const testLocations = analysis.existingTests?.testLocations ?? {};
659
754
  const testFingerprint = (() => {
@@ -663,13 +758,17 @@ When information conflicts, prefer: **Traces** (actual behavior) > **Code** (imp
663
758
  // Each value is a comma-joined list of "file (covers: ep1, ep2)" entries — one per file.
664
759
  // Count files by splitting on "), " boundaries (each entry ends with ")").
665
760
  let totalFiles = 0;
761
+ let externalCount = 0;
666
762
  const byType = new Map();
667
763
  for (const [type, fileList] of entries) {
668
764
  // Use matchAll to extract covers from ALL files of this type, not just the first.
669
765
  const allEndpoints = [...fileList.matchAll(/covers:\s*([^)]+)/g)].map(m => m[1].trim());
670
- // Count files: strip "(covers: ...)" clauses then split on ", " to count entries,
766
+ // Count files: strip "[external]", "[skyramp]", and "(covers: ...)" clauses then split on ", " to count entries,
671
767
  // correctly handling both files-with-covers and files-without-covers (e.g. UI tests).
672
- totalFiles += fileList.replace(/\s*\(covers:[^)]*\)/g, "").split(", ").filter(s => s.trim().length > 0).length;
768
+ const strippedList = fileList.replace(/\s*\[(?:external|skyramp)\]/g, "").replace(/\s*\(covers:[^)]*\)/g, "");
769
+ totalFiles += strippedList.split(", ").filter(s => s.trim().length > 0).length;
770
+ // Count external files from [external] annotations
771
+ externalCount += (fileList.match(/\[external\]/g) || []).length;
673
772
  if (!byType.has(type))
674
773
  byType.set(type, []);
675
774
  byType.get(type).push(...allEndpoints);
@@ -677,7 +776,11 @@ When information conflicts, prefer: **Traces** (actual behavior) > **Code** (imp
677
776
  const lines = [...byType.entries()]
678
777
  .filter(([, eps]) => eps.length > 0)
679
778
  .map(([type, eps]) => ` ${type}: ${[...new Set(eps)].join(", ")}`);
680
- return `\nSkyramp tests already in this repo (${totalFiles} files):\n${lines.join("\n")}\n(Use this to focus on coverage gaps, not re-testing what already exists.)`;
779
+ const skyrampCount = totalFiles - externalCount;
780
+ const breakdown = externalCount > 0
781
+ ? `${skyrampCount} Skyramp + ${externalCount} external`
782
+ : `${totalFiles} files`;
783
+ return `\nTests already covering endpoints in this repo (${breakdown}):\n${lines.join("\n")}\n(Use this to focus on coverage gaps. External tests block new recommendations but cannot be updated.)`;
681
784
  })();
682
785
  const repoContext = `
683
786
  Repository: ${analysis.metadata.repositoryName}
@@ -789,10 +892,21 @@ ${detailBlocks}
789
892
  const projectType = analysis.projectClassification.projectType;
790
893
  const isFrontendProject = projectType === "full-stack" || projectType === "frontend";
791
894
  const isFrontendOnlyProject = projectType === "frontend";
792
- mainSection = buildFullRepoRecommendations(scored, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject, isFrontendOnlyProject);
895
+ const externalCoverageFullRepo = buildExternalCoverageSet(testLocations);
896
+ if (externalCoverageFullRepo.size > 0) {
897
+ logger.info(`External test coverage keys (full-repo): ${[...externalCoverageFullRepo].join(", ")}`);
898
+ }
899
+ mainSection = buildFullRepoRecommendations(scored, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject, isFrontendOnlyProject, externalCoverageFullRepo);
793
900
  }
794
901
  else if (isDiffScope && (isUIOnlyPR || scored.length > 0)) {
795
- mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces);
902
+ // Build external coverage set for programmatic dedup prevents recommending
903
+ // tests that duplicate existing non-Skyramp tests at the METHOD::resource::testType
904
+ // level, so different methods on the same resource (e.g. GET vs PUT) remain distinct.
905
+ const externalCoverage = buildExternalCoverageSet(testLocations);
906
+ if (externalCoverage.size > 0) {
907
+ logger.info(`External test coverage keys: ${[...externalCoverage].join(", ")}`);
908
+ }
909
+ mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, externalCoverage, analysis.existingTests.relevantExternalTestPaths ?? []);
796
910
  }
797
911
  else {
798
912
  mainSection = `
@@ -923,6 +1037,11 @@ ${interactionSection}
923
1037
 
924
1038
  <existing_tests>
925
1039
  ## Existing Tests
1040
+
1041
+ **Two categories of test files (identified by tag):**
1042
+ - \`[skyramp]\` — generated by Skyramp tools. You may UPDATE these when the covered endpoint changes.
1043
+ - \`[external]\` — user-written tests (pytest, jest, junit, etc.) maintained outside Skyramp. Treat as read-only: use them to determine existing coverage but NEVER update, regenerate, or delete them.
1044
+
926
1045
  - Frameworks: ${analysis.existingTests.frameworks.join(", ") || "none"}
927
1046
  ${formatTestLocations(analysis.existingTests.testLocations)}
928
1047
  </existing_tests>
@@ -2,7 +2,7 @@ jest.mock("@skyramp/skyramp", () => ({
2
2
  WorkspaceConfigManager: { create: jest.fn() },
3
3
  }));
4
4
  import { TestType } from "../../types/TestTypes.js";
5
- import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
5
+ import { buildRecommendationPrompt, buildExternalCoverageSet, externalDedupKey } from "./test-recommendation-prompt.js";
6
6
  import { PATH_PARAM_UUID_GUIDANCE, MAX_TESTS_TO_GENERATE, buildTestQualityCriteria, buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildFewShotExamples, buildVerificationChecklist, } from "./recommendationSections.js";
7
7
  import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
8
8
  // ---------------------------------------------------------------------------
@@ -1123,7 +1123,7 @@ describe("buildRecommendationPrompt — reduced over-prompting", () => {
1123
1123
  describe("buildRecommendationPrompt — testFingerprint", () => {
1124
1124
  it("omits fingerprint when no existing test locations", () => {
1125
1125
  const prompt = buildRecommendationPrompt(minimalAnalysis());
1126
- expect(prompt).not.toContain("Skyramp tests already in this repo");
1126
+ expect(prompt).not.toContain("Tests already covering endpoints in this repo");
1127
1127
  });
1128
1128
  it("includes fingerprint with file count and endpoints when testLocations present", () => {
1129
1129
  const analysis = minimalAnalysis({
@@ -1138,7 +1138,7 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
1138
1138
  },
1139
1139
  });
1140
1140
  const prompt = buildRecommendationPrompt(analysis);
1141
- expect(prompt).toContain("Skyramp tests already in this repo (2 files)");
1141
+ expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
1142
1142
  expect(prompt).toContain("contract: GET /api/items, POST /api/items");
1143
1143
  expect(prompt).toContain("integration: POST /api/orders");
1144
1144
  });
@@ -1155,7 +1155,7 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
1155
1155
  });
1156
1156
  const prompt = buildRecommendationPrompt(analysis);
1157
1157
  // File count should be 2, not 1
1158
- expect(prompt).toContain("Skyramp tests already in this repo (2 files)");
1158
+ expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
1159
1159
  });
1160
1160
  it("omits types with no endpoint coverage from fingerprint lines (no trailing 'ui: ' line)", () => {
1161
1161
  const analysis = minimalAnalysis({
@@ -1170,9 +1170,206 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
1170
1170
  },
1171
1171
  });
1172
1172
  const prompt = buildRecommendationPrompt(analysis);
1173
- expect(prompt).toContain("Skyramp tests already in this repo (2 files)");
1173
+ expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
1174
1174
  expect(prompt).toContain("integration: POST /api/orders");
1175
1175
  // UI type has no endpoints — must not emit a blank "ui: " line
1176
1176
  expect(prompt).not.toMatch(/^\s*ui:\s*$/m);
1177
1177
  });
1178
+ it("distinguishes external tests from Skyramp tests in fingerprint", () => {
1179
+ const analysis = minimalAnalysis({
1180
+ existingTests: {
1181
+ frameworks: ["pytest"],
1182
+ coverage: { unit: 0, integration: 1, e2e: 0, ui: 0, load: 0, contract: 1, smoke: 0 },
1183
+ testLocations: {
1184
+ contract: "test_items_contract.py (covers: GET /api/items)",
1185
+ integration: "tests/test_api.py [external] (covers: POST /api/orders)",
1186
+ },
1187
+ hasCoverageReports: false,
1188
+ },
1189
+ });
1190
+ const prompt = buildRecommendationPrompt(analysis);
1191
+ expect(prompt).toContain("1 Skyramp + 1 external");
1192
+ expect(prompt).toContain("cannot be updated");
1193
+ });
1194
+ it("uses inclusive header for test coverage table", () => {
1195
+ const analysis = minimalAnalysis({
1196
+ existingTests: {
1197
+ frameworks: ["pytest"],
1198
+ coverage: { unit: 0, integration: 0, e2e: 0, ui: 0, load: 0, contract: 1, smoke: 0 },
1199
+ testLocations: {
1200
+ contract: "test_items_contract.py (covers: GET /api/items)",
1201
+ },
1202
+ hasCoverageReports: false,
1203
+ },
1204
+ });
1205
+ const prompt = buildRecommendationPrompt(analysis);
1206
+ expect(prompt).toContain("Existing test coverage (Skyramp + external)");
1207
+ expect(prompt).not.toContain("Existing Skyramp test coverage");
1208
+ });
1209
+ it("includes external test dedup rule that blocks CREATE", () => {
1210
+ const analysis = minimalAnalysis({
1211
+ existingTests: {
1212
+ frameworks: ["pytest"],
1213
+ coverage: { unit: 0, integration: 1, e2e: 0, ui: 0, load: 0, contract: 0, smoke: 0 },
1214
+ testLocations: {
1215
+ integration: "tests/test_api.py [external] (covers: POST /api/orders)",
1216
+ },
1217
+ hasCoverageReports: false,
1218
+ },
1219
+ });
1220
+ const prompt = buildRecommendationPrompt(analysis);
1221
+ expect(prompt).toContain("[external]");
1222
+ expect(prompt).toContain("do NOT create a new test");
1223
+ expect(prompt).toContain("Do NOT attempt to UPDATE, REGENERATE, or DELETE external tests");
1224
+ });
1225
+ });
1226
+ // ---------------------------------------------------------------------------
1227
+ // Tests — External test dedup primitives
1228
+ // ---------------------------------------------------------------------------
1229
+ describe("buildExternalCoverageSet", () => {
1230
+ it("parses single external test with one endpoint", () => {
1231
+ const set = buildExternalCoverageSet({
1232
+ integration: 'tests/test_api.py [external] (covers: GET /api/v1/orders)',
1233
+ });
1234
+ expect(set.has("GET::orders::integration")).toBe(true);
1235
+ expect(set.size).toBe(1);
1236
+ });
1237
+ it("parses multiple endpoints in one covers clause", () => {
1238
+ const set = buildExternalCoverageSet({
1239
+ integration: 'tests/test_api.py [external] (covers: GET /api/v1/orders, POST /api/v1/orders, DELETE /api/v1/orders/{id})',
1240
+ });
1241
+ expect(set.has("GET::orders::integration")).toBe(true);
1242
+ expect(set.has("POST::orders::integration")).toBe(true);
1243
+ expect(set.has("DELETE::orders::integration")).toBe(true);
1244
+ expect(set.size).toBe(3);
1245
+ });
1246
+ it("parses multiple external files in one test type", () => {
1247
+ const set = buildExternalCoverageSet({
1248
+ integration: 'tests/test_orders.py [external] (covers: GET /api/orders), tests/test_products.py [external] (covers: POST /api/products)',
1249
+ });
1250
+ expect(set.has("GET::orders::integration")).toBe(true);
1251
+ expect(set.has("POST::products::integration")).toBe(true);
1252
+ expect(set.size).toBe(2);
1253
+ });
1254
+ it("handles multiple test types", () => {
1255
+ const set = buildExternalCoverageSet({
1256
+ integration: 'tests/test_api.py [external] (covers: GET /api/orders)',
1257
+ contract: 'tests/test_contract.py [external] (covers: GET /api/orders)',
1258
+ });
1259
+ expect(set.has("GET::orders::integration")).toBe(true);
1260
+ expect(set.has("GET::orders::contract")).toBe(true);
1261
+ expect(set.size).toBe(2);
1262
+ });
1263
+ it("emits both integration and contract keys for unknown test type", () => {
1264
+ const set = buildExternalCoverageSet({
1265
+ unknown: 'tests/test_misc.py [external] (covers: GET /api/items)',
1266
+ });
1267
+ expect(set.has("GET::items::integration")).toBe(true);
1268
+ expect(set.has("GET::items::contract")).toBe(true);
1269
+ expect(set.size).toBe(2);
1270
+ });
1271
+ it("ignores Skyramp tests (no [external] tag)", () => {
1272
+ const set = buildExternalCoverageSet({
1273
+ contract: 'test_items_contract.py (covers: GET /api/items)',
1274
+ });
1275
+ expect(set.size).toBe(0);
1276
+ });
1277
+ it("ignores external tests without covers clause", () => {
1278
+ const set = buildExternalCoverageSet({
1279
+ integration: 'tests/test_api.py [external]',
1280
+ });
1281
+ expect(set.size).toBe(0);
1282
+ });
1283
+ it("returns empty set for empty testLocations", () => {
1284
+ const set = buildExternalCoverageSet({});
1285
+ expect(set.size).toBe(0);
1286
+ });
1287
+ it("skips endpoints with unparseable paths", () => {
1288
+ const set = buildExternalCoverageSet({
1289
+ integration: 'tests/test_api.py [external] (covers: GET )',
1290
+ });
1291
+ // "GET " → method="GET", path="" → resource="unknown" → skipped
1292
+ expect(set.size).toBe(0);
1293
+ });
1294
+ it("strips path parameters from resource extraction", () => {
1295
+ const set = buildExternalCoverageSet({
1296
+ integration: 'tests/test_api.py [external] (covers: PUT /api/v1/orders/{order_id})',
1297
+ });
1298
+ // {order_id} is a path param → skipped, resource is "orders"
1299
+ expect(set.has("PUT::orders::integration")).toBe(true);
1300
+ expect(set.size).toBe(1);
1301
+ });
1302
+ it("normalizes method to uppercase", () => {
1303
+ const set = buildExternalCoverageSet({
1304
+ integration: 'tests/test_api.py [external] (covers: get /api/orders)',
1305
+ });
1306
+ expect(set.has("GET::orders::integration")).toBe(true);
1307
+ });
1308
+ });
1309
+ describe("externalDedupKey", () => {
1310
+ it("builds key from single-step contract scenario", () => {
1311
+ const scenario = {
1312
+ scenarioName: "get_orders",
1313
+ description: "Get orders",
1314
+ category: "crud",
1315
+ priority: "high",
1316
+ steps: [{ order: 1, method: "GET", path: "/api/v1/orders", description: "list orders", interactionType: "success", expectedStatusCode: 200 }],
1317
+ chainingKeys: [],
1318
+ requiresAuth: false,
1319
+ estimatedComplexity: "simple",
1320
+ };
1321
+ expect(externalDedupKey(scenario)).toBe("GET::orders::contract");
1322
+ });
1323
+ it("builds key from multi-step integration scenario using last mutating step", () => {
1324
+ const scenario = {
1325
+ scenarioName: "create_and_update_order",
1326
+ description: "Create then update order",
1327
+ category: "workflow",
1328
+ priority: "high",
1329
+ steps: [
1330
+ { order: 1, method: "POST", path: "/api/v1/orders", description: "create order", interactionType: "success", expectedStatusCode: 201 },
1331
+ { order: 2, method: "PUT", path: "/api/v1/orders/{order_id}", description: "update order", interactionType: "success", expectedStatusCode: 200 },
1332
+ { order: 3, method: "GET", path: "/api/v1/orders/{order_id}", description: "verify", interactionType: "success", expectedStatusCode: 200 },
1333
+ ],
1334
+ chainingKeys: [],
1335
+ requiresAuth: false,
1336
+ estimatedComplexity: "moderate",
1337
+ };
1338
+ // Last mutating step is PUT /orders/{order_id} → resource "orders"
1339
+ expect(externalDedupKey(scenario)).toBe("PUT::orders::integration");
1340
+ });
1341
+ it("falls back to last step when no mutating methods present", () => {
1342
+ const scenario = {
1343
+ scenarioName: "get_items",
1344
+ description: "List and get items",
1345
+ category: "crud",
1346
+ priority: "medium",
1347
+ steps: [
1348
+ { order: 1, method: "GET", path: "/api/v1/items", description: "list items", interactionType: "success", expectedStatusCode: 200 },
1349
+ { order: 2, method: "GET", path: "/api/v1/items/{id}", description: "get item", interactionType: "success", expectedStatusCode: 200 },
1350
+ ],
1351
+ chainingKeys: [],
1352
+ requiresAuth: false,
1353
+ estimatedComplexity: "simple",
1354
+ };
1355
+ // No mutating steps → falls back to last step → GET /items/{id} → resource "items"
1356
+ expect(externalDedupKey(scenario)).toBe("GET::items::integration");
1357
+ });
1358
+ it("uses explicit testType when provided", () => {
1359
+ const scenario = {
1360
+ scenarioName: "get_orders_contract",
1361
+ description: "Contract test for orders",
1362
+ category: "crud",
1363
+ priority: "high",
1364
+ steps: [
1365
+ { order: 1, method: "GET", path: "/api/v1/orders", description: "list orders", interactionType: "success", expectedStatusCode: 200 },
1366
+ { order: 1, method: "POST", path: "/api/v1/orders", description: "create order", interactionType: "success", expectedStatusCode: 201 },
1367
+ ],
1368
+ chainingKeys: [],
1369
+ requiresAuth: false,
1370
+ estimatedComplexity: "simple",
1371
+ testType: "contract",
1372
+ };
1373
+ expect(externalDedupKey(scenario)).toBe("POST::orders::contract");
1374
+ });
1178
1375
  });