npm - @skyramp/mcp - Versions diffs - 0.0.59 → 0.0.60-rc.2 - Mend

@skyramp/mcp 0.0.59 → 0.0.60-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/build/index.js +32 -5
package/build/prompts/test-recommendation/analysisOutputPrompt.js +98 -0
package/build/prompts/test-recommendation/recommendationSections.js +226 -0
package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +71 -0
package/build/prompts/test-recommendation/test-recommendation-prompt.js +166 -104
package/build/prompts/testGenerationPrompt.js +2 -3
package/build/prompts/testbot/testbot-prompts.js +96 -93
package/build/resources/analysisResources.js +254 -0
package/build/services/ScenarioGenerationService.js +70 -26
package/build/tools/generate-tests/generateIntegrationRestTool.js +54 -1
package/build/tools/generate-tests/generateScenarioRestTool.js +8 -5
package/build/tools/submitReportTool.js +28 -0
package/build/tools/test-maintenance/stateCleanupTool.js +8 -0
package/build/tools/test-recommendation/analyzeRepositoryTool.js +349 -217
package/build/tools/test-recommendation/recommendTestsTool.js +163 -159
package/build/tools/workspace/initializeWorkspaceTool.js +1 -1
package/build/types/RepositoryAnalysis.js +99 -12
package/build/utils/AnalysisStateManager.js +40 -23
package/build/utils/branchDiff.js +47 -0
package/build/utils/pr-comment-parser.js +124 -0
package/build/utils/projectMetadata.js +188 -0
package/build/utils/projectMetadata.test.js +81 -0
package/build/utils/repoScanner.js +378 -0
package/build/utils/routeParsers.js +213 -0
package/build/utils/routeParsers.test.js +87 -0
package/build/utils/scenarioDrafting.js +119 -0
package/build/utils/scenarioDrafting.test.js +66 -0
package/build/utils/trace-parser.js +166 -0
package/build/utils/workspaceAuth.js +16 -0
package/package.json +1 -1
package/build/prompts/test-recommendation/repository-analysis-prompt.js +0 -326
package/build/prompts/test-recommendation/test-mapping-prompt.js +0 -266
package/build/tools/test-recommendation/mapTestsTool.js +0 -243
package/build/types/TestMapping.js +0 -173
package/build/utils/scoring-engine.js +0 -380

package/build/index.js CHANGED Viewed

@@ -19,8 +19,8 @@ import { registerLoginTool } from "./tools/auth/loginTool.js";
 import { registerLogoutTool } from "./tools/auth/logoutTool.js";
 import { registerFixErrorTool } from "./tools/fixErrorTool.js";
 import { registerAnalyzeRepositoryTool } from "./tools/test-recommendation/analyzeRepositoryTool.js";
-import { registerMapTestsTool } from "./tools/test-recommendation/mapTestsTool.js";
 import { registerRecommendTestsTool } from "./tools/test-recommendation/recommendTestsTool.js";
+import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
 import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
 import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
 import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
@@ -34,6 +34,7 @@ import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testb
 import { registerInitTestbotTool } from "./tools/initTestbotTool.js";
 import { registerSubmitReportTool } from "./tools/submitReportTool.js";
 import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
+import { registerAnalysisResources } from "./resources/analysisResources.js";
 import { AnalyticsService } from "./services/AnalyticsService.js";
 import { initCheck } from "./utils/initAgent.js";
 const server = new McpServer({
@@ -47,13 +48,32 @@ const server = new McpServer({
         prompts: {
             listChanged: true,
         },
+        resources: {
+            listChanged: true,
+        },
     },
-    instructions: `Skyramp MCP Server — generates and executes API tests (smoke, fuzz, contract, load, integration, E2E, UI).
+    instructions: `Skyramp MCP Server — generates and executes API tests (fuzz, contract, integration, E2E, UI).
 ## Rules
 - NEVER show CLI commands. ALWAYS use the MCP tools provided.
 - For UI and E2E tests, use the trace collection start/stop tools.
+## Test Recommendation Flow (2-step)
+1. Call \`skyramp_analyze_repository\` → returns a \`sessionId\`.
+   The analysis scans source code (code-first) to build enriched endpoints
+   (Path → Method → Interaction with request/response bodies, headers, cookies)
+   and draft user-flow scenarios for integration/E2E tests.
+2. Call \`skyramp_recommend_tests\` with \`sessionId\` → the LLM reasons over the
+   enriched data to recommend tests, referencing specific interactions and scenarios.
+After analysis, you can also inspect data via MCP Resources:
+- \`skyramp://analysis/{sessionId}/summary\` — high-level overview
+- \`skyramp://analysis/{sessionId}/endpoints\` — compact endpoint listing
+- \`skyramp://analysis/{sessionId}/endpoints/{path}\` — full path detail
+- \`skyramp://analysis/{sessionId}/endpoints/{path}/{method}\` — single method detail
+- \`skyramp://analysis/{sessionId}/scenarios\` — drafted scenarios
+- \`skyramp://analysis/{sessionId}/diff\` — branch diff context
 ## Workspace Initialization (before ANY other Skyramp tool)
 Follow this flow EVERY time before calling any Skyramp tool:
@@ -75,8 +95,13 @@ Before calling ANY test generation tool, you MUST follow this flow:
 2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, and \`api.baseUrl\` from the services section.
 3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
 4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
-5. **If the workspace file does not exist**, or the needed values (language, framework, outputDir) are missing from the workspace config, ASK the user which language and framework they want before calling the tool.
-6. The user can always override workspace defaults by explicitly specifying values in their request.
+5. **CRITICAL — scenario generation**: When calling \`skyramp_scenario_test_generation\`, ALWAYS pass:
+   - \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
+   - \`authHeader\`: The auth header name from \`api.authHeader\` in the workspace config. Use \`Cookie\` for cookie/session-based auth (NextAuth, etc.), \`Authorization\` for Bearer tokens, \`X-API-Key\` for API keys. Without it, the trace defaults to \`Authorization: Bearer\` which breaks cookie-based apps.
+   - \`apiSchema\` is OPTIONAL — omit it for code-first apps without OpenAPI specs.
+6. **CRITICAL — integration test from scenario**: When calling \`skyramp_integration_test_generation\` with a \`scenarioFile\`, ALSO pass \`authHeader\` (same value as used in scenario generation). This tells the CLI which header to parameterize with the auth token. Without it, the generated test defaults to \`Authorization: Bearer\` regardless of what's in the trace.
+7. **If the workspace file does not exist**, or the needed values (language, framework, outputDir) are missing from the workspace config, ASK the user which language and framework they want before calling the tool.
+8. The user can always override workspace defaults by explicitly specifying values in their request.
 `,
 });
 // Check for first-time invocation after version update (runs in background, doesn't block)
@@ -116,6 +141,7 @@ const prompts = [
     registerTestGenerationPrompt,
     registerStartTraceCollectionPrompt,
     registerTestHealthPrompt,
+    registerRecommendTestsPrompt,
 ];
 if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
     prompts.push(registerTestbotPrompt);
@@ -145,8 +171,9 @@ const codeQualityTools = [
 codeQualityTools.forEach((registerTool) => registerTool(server));
 // Register test recommendation tools
 registerAnalyzeRepositoryTool(server);
-registerMapTestsTool(server);
 registerRecommendTestsTool(server);
+// Register analysis resources (MCP Resources for enriched data access)
+registerAnalysisResources(server);
 // Register test maintenance tools
 registerDiscoverTestsTool(server);
 registerAnalyzeTestDriftTool(server);

package/build/prompts/test-recommendation/analysisOutputPrompt.js ADDED Viewed

@@ -0,0 +1,98 @@
+function buildEnrichmentInstructions(p) {
+    const isDiffScope = p.analysisScope === "current_branch_diff";
+    if (!isDiffScope) {
+        return `## Your Task — Enrich & Recommend (full repo)
+### Step 1: Read key files
+Read \`package.json\` / \`requirements.txt\`, \`docker-compose.yml\`, and route/controller files
+to understand the tech stack, endpoint shapes, and auth mechanisms.
+### Step 2: Identify resource relationships
+Map how endpoints relate to each other — which POST creates resources consumed by other endpoints?
+**Resolve nested/sub-router paths** from the Router Mounting section above.
+### Step 3: Call recommend tests
+Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
+    }
+    const changedFiles = p.parsedDiff?.changedFiles.join(", ") ?? "";
+    const hasApiEndpoints = p.parsedDiff && (p.parsedDiff.newEndpoints.length > 0 || p.parsedDiff.modifiedEndpoints.length > 0);
+    const isUIOnly = !hasApiEndpoints && (p.parsedDiff?.changedFiles.every(f => !f.match(/\/(api|routes?|controllers?|routers?|handlers?|endpoints?)\//)) ?? false);
+    const step2 = hasApiEndpoints
+        ? `### Step 2: Discover related endpoints
+Read handler code for changed API endpoints. Find related endpoints via imports, shared
+models, adjacent route files. Resolve nested/sub-router paths from Router Mounting context.`
+        : isUIOnly
+            ? `### Step 2: Identify consumed API endpoints
+UI-only PR — read changed components to find API calls (fetch, axios, hooks).`
+            : `### Step 2: Identify affected endpoints
+No API route changes detected — read changed files to identify affected endpoints.`;
+    return `## Your Task — Enrich & Recommend (PR-scoped)
+### Step 1: Read the changed files
+${changedFiles}
+${step2}
+### Step 3: Draft integration scenarios
+Draft multi-step scenarios simulating realistic user workflows:
+- **Cross-resource data flow**: Foreign key relationships, parent→child creation, verification
+- **Search/filter verification**: Create data, search, verify results
+- **Negative/error paths**: Invalid references → appropriate errors
+- **UI user journeys**: Concrete browser steps for frontend changes
+**Quality:** Realistic request bodies, response data verification, actual field names for chaining.
+### Step 4: Call recommend tests
+Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
+}
+export function buildAnalysisOutputText(p) {
+    const isDiffScope = p.analysisScope === "current_branch_diff";
+    const diffSection = p.parsedDiff
+        ? `
+## Branch Diff Context
+**Branch**: \`${p.parsedDiff.currentBranch}\` → base: \`${p.parsedDiff.baseBranch}\`
+**Changed Files** (${p.parsedDiff.changedFiles.length}): ${p.parsedDiff.changedFiles.join(", ")}
+**New Endpoints** (${p.parsedDiff.newEndpoints.length}): ${p.parsedDiff.newEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
+**Modified Endpoints** (${p.parsedDiff.modifiedEndpoints.length}): ${p.parsedDiff.modifiedEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
+**Affected Services**: ${p.parsedDiff.affectedServices.join(", ") || "none"}
+`
+        : "";
+    const endpointCatalog = p.scannedEndpoints.length > 0
+        ? `
+## Pre-Scanned Endpoint Catalog (${p.scannedEndpoints.length} routes)
+${p.scannedEndpoints.map((ep) => `  ${ep.methods.join("|")} ${ep.path} (${ep.sourceFile})`).join("\n")}
+`
+        : "";
+    const wsLine = p.wsBaseUrl
+        ? `**Base URL**: \`${p.wsBaseUrl}\` | **Auth header**: \`${p.wsAuthHeader || "Authorization"}\``
+        : "";
+    const specSection = p.wsSchemaPath
+        ? `
+## OpenAPI Spec Available
+Spec at \`${p.wsSchemaPath}\`. **Read it** for authoritative paths and schemas.
+Pass \`apiSchema: "${p.wsSchemaPath}"\` to ALL test generation tool calls.`
+        : p.routerMountContext
+            ? `
+## Router Mounting / Nesting
+\`\`\`
+${p.routerMountContext}
+\`\`\`
+Use this to resolve full URL paths for nested endpoints.`
+            : "";
+    const enrichment = buildEnrichmentInstructions(p);
+    return `# Repository Analysis
+**Session ID**: \`${p.sessionId}\`
+**Repository**: \`${p.repositoryPath}\`
+**Analysis Scope**: \`${p.analysisScope}\`
+${isDiffScope ? `**Diff endpoints**: ${(p.parsedDiff?.newEndpoints.length ?? 0) + (p.parsedDiff?.modifiedEndpoints.length ?? 0)}` : `**Pre-scanned endpoints**: ${p.scannedEndpoints.length}`}
+${wsLine}
+${p.wsSchemaPath ? `**OpenAPI Spec**: \`${p.wsSchemaPath}\` (spec-based flow)` : "**Flow**: Code-scanning (may miss nesting)"}
+${diffSection}
+${endpointCatalog}
+${specSection}
+${enrichment}
+**CRITICAL**: No .json/.md file creation. Prioritize cross-resource workflows.`;
+}

package/build/prompts/test-recommendation/recommendationSections.js ADDED Viewed

@@ -0,0 +1,226 @@
+export function buildPrioritizationDimensions() {
+    return `## Prioritization Dimensions (evaluate each candidate test)
+For each candidate test, assess these dimensions using your judgment:
+| Dimension | What to assess |
+|-----------|---------------|
+| **Sophistication** | Does it test a multi-step workflow or non-obvious scenario? Or is it a simple request→response check? |
+| **Bug-Finding Potential** | Does it target known failure modes (race conditions, data consistency, state transitions, cascade effects)? |
+| **User Journey Relevance** | Does it reflect how real users interact with the system (from traces, business flows, or critical paths)? |
+| **Coverage Gap** | Does it address an area with zero existing test coverage? Or does it duplicate what\'s already tested? |
+| **Code Insight** | Is it derived from actual implementation analysis (e.g., spotted a middleware pattern, found an N+1 risk) rather than just API shape? |
+Candidates scoring well across MULTIPLE dimensions should be recommended first.
+Candidates satisfying only ONE dimension (e.g., covers a gap but is trivially simple) should be deprioritized.
+**Quality Gate:** For each candidate, ask: "Would a senior engineer be impressed by this test?"
+If the answer is no — deprioritize it. Impressive tests catch real bugs, exercise real workflows,
+and demonstrate understanding of the system\'s behavior. Trivial tests do not.`;
+}
+export function buildTestExamples() {
+    return `## Test Examples (calibrate your judgment)
+**Impressive tests (recommend these):**
+1. "Register user → login → create order → verify order appears in user\'s order list"
+   Cross-resource workflow with auth chaining and data verification across users + orders.
+2. "Create product with inventory=10 → place order for qty=10 → verify inventory=0 →
+   place another order → verify 409 out-of-stock error"
+   Cross-resource state machine + business rule validation (products + orders + inventory).
+3. "POST /users with duplicate email → verify 409 Conflict → verify original user unchanged"
+   Error handling with side-effect verification — not just status code check.
+**Non-impressive tests (deprioritize or skip):**
+1. "GET /products → 200" — trivial health check, no assertions beyond status code.
+2. "POST /products → GET /products/{id} → PUT /products/{id} → DELETE /products/{id}"
+   Single-resource CRUD — baseline, not impressive by itself.
+3. "POST /products with missing name → 422" — obvious validation, already covered by contract/fuzz.`;
+}
+export function buildTestPatternGuidelines() {
+    return `## Test Pattern Guidelines (reference, not rigid rules)
+### Tier 1 — Base Patterns
+- CRUD lifecycle per resource group (Create → Read → Update → Delete)
+- Auth flow (Register → Login → Access protected → Token refresh → Logout)
+- Pagination & filtering (boundary values, empty results, large page sizes)
+- Error responses (400, 401, 403, 404, 409, 422 — each with specific trigger)
+### Tier 2 — Code-Informed Patterns (higher value — look for these in the codebase)
+- **Middleware chains**: If auth/rate-limit/logging middleware exists, test the chain
+  (e.g., rate limit hit → auth still checked → correct error returned)
+- **N+1 query risk**: If list endpoints join related data (e.g., orders with products),
+  test with large datasets under load
+- **State machines**: If resources have status transitions (draft→published→archived),
+  test invalid transitions (e.g., archived→draft should fail)
+- **Cascade deletes**: If deleting a parent removes children, verify cascade AND verify
+  orphan prevention (delete product → orders referencing it get error or cascade)
+- **Race conditions**: If concurrent writes are possible (inventory deduction, counter
+  increment), test concurrent requests under load
+- **Computed fields**: If response contains derived values (total, average, count),
+  verify computation with known inputs
+- **Webhook/event side effects**: If endpoints trigger async operations, test that side
+  effects occur (e.g., POST /orders triggers email notification)`;
+}
+export function buildTestQualityCriteria() {
+    return `## What Makes a Good Test
+**Integration tests** should demonstrate cross-resource data flow — step A creates data
+that step B depends on (e.g., create product \u2192 create order referencing that product's ID \u2192
+verify order contains correct product). Single-resource CRUD alone is not an integration test.
+Use realistic request bodies from source code schemas and verify response data, not just
+status codes.
+**E2E tests** should follow realistic user journeys end-to-end: browse products \u2192 search \u2192
+add to cart \u2192 checkout. Verify that frontend actions trigger the correct API calls and
+that the UI reflects backend state.
+**UI tests** should exercise component behavior and interaction flows: fill form \u2192 validate
+inputs \u2192 submit \u2192 see confirmation. Include visual state changes (loading, error, empty)
+and accessibility checks.`;
+}
+export function buildGenerationRules(isUIOnlyPR) {
+    return `## Generation Guidelines
+**Scenario fidelity:** Every workflow scenario should reflect the actual resource
+relationships in the code. If the pre-drafted scenarios don't match the real data model,
+replace them with accurate ones.
+**Priority ordering by PR type:**
+${isUIOnlyPR ? `This is a **UI-only PR**. The most valuable tests are UI and E2E tests.
+If Playwright traces exist for the changed pages, prioritize UI/E2E tests in the top 4.
+If no traces exist, UI/E2E tests are still the highest-value recommendations — rank them
+in the top 7 with scenario steps and trace recording instructions. The testbot will not
+generate tests without traces, so all 7 become additionalRecommendations.
+1. **UI tests** — per changed component/page
+2. **E2E tests** — per user flow spanning frontend to backend
+3. **Integration tests** — only when the changed UI calls backend APIs
+` : `1. **Multi-resource integration tests** — one per cross-resource workflow (2-3 max).
+2. **Fuzz tests** — per POST/PUT endpoint with complex request bodies. Tests boundary values,
+   type coercion, missing/extra fields, and edge cases the schema allows.
+3. **Contract tests** — per endpoint with new/changed response schemas. Validates the response
+   structure matches expectations (field types, required fields, nested objects).
+4. **E2E tests** — per distinct user flow if the API serves a frontend or client
+5. **CRUD lifecycle integration tests** — only for resources with new/changed endpoints
+   where multi-resource tests don't already cover them.
+`}When no Playwright trace exists, still recommend the test with instructions for recording
+a trace using \`skyramp_start_trace_collection\` with \`playwright: true\`.
+**Mixed PRs with frontend changes:** Include at least 1 E2E or UI test in the top 7,
+ranked by value regardless of trace availability. If traces exist, place it in the top 4.
+If no traces, it can still rank highly — the testbot will handle trace-dependent generation.
+**Before finalizing:** Check that the top 4 aren't filled with CRUD tests for unchanged
+resources when PR-relevant tests exist lower in the ranking. Swap if needed.
+**No duplicate coverage.** If an existing test already covers an endpoint + test type,
+recommend a multi-resource workflow that includes that endpoint alongside others instead.`;
+}
+export function buildToolWorkflows(authHeaderValue) {
+    return `## How to Generate Tests — Tool Workflows
+**Auth Header:** \`authHeader: "${authHeaderValue}"\` — pass to EVERY tool call below.
+**For multi-endpoint workflows (integration tests) — Scenario → Integration pipeline:**
+1. Call \`skyramp_scenario_test_generation\` once per step: \`scenarioName\`, \`destination\`,
+   \`baseURL\`, \`method\`, \`path\`, \`requestBody\`, \`authHeader: "${authHeaderValue}"\`.
+   \`statusCode\` is optional — defaults: POST→201, DELETE→204, GET/PUT/PATCH→200. Only override for non-standard codes.
+   **OpenAPI spec is NOT required.** \`apiSchema\` is OPTIONAL — omit it if no spec exists.
+   \`requestBody\` should use realistic field values from source code schemas (Zod, Pydantic, DTOs).
+   Inspect the source code to determine the correct request body shape — avoid sending \`{}\`.
+   Use unique names with timestamp suffix to avoid conflicts on re-runs.
+   For GET/PUT/DELETE with path IDs, use a placeholder — chaining resolves the real ID.
+2. Produces a \`scenario_<name>.json\` in the same \`outputDir\` as the test files (not \`.skyramp/\`).
+3. Call \`skyramp_integration_test_generation\` with \`scenarioFile\` AND \`authHeader: "${authHeaderValue}"\`.
+   Do NOT pass \`chainingKey\` — defaults to \`response.id\`. After generation, the testbot
+   will verify and fix path param chaining in the generated test.
+**For single-endpoint tests (contract/fuzz):**
+\`skyramp_{type}_test_generation\` with \`endpointURL\` (full URL incl. base + path), \`method\`,
+\`authHeader: "${authHeaderValue}"\`, and \`requestData\` from source code schemas.
+If an OpenAPI spec exists, ALSO pass \`apiSchema\` — it enables schema-aware validation
+(contract tests verify response structure, fuzz tests generate smarter boundary values).
+Without a spec, \`endpointURL\` alone is sufficient.
+**For UI tests (no Playwright recording):**
+1. \`skyramp_start_trace_collection\` (playwright: true)
+2. Perform browser steps
+3. \`skyramp_stop_trace_collection\`
+4. \`skyramp_ui_test_generation\` with playwright zip
+**For E2E tests:**
+Same trace flow, pass both trace file and playwright zip to \`skyramp_e2e_test_generation\`.`;
+}
+export function buildCoverageChecklist(openApiSpec, isUIOnlyPR, hasFrontendChanges, authHeaderValue, topN) {
+    const specNote = openApiSpec
+        ? `\n**OpenAPI Spec available**: \`${openApiSpec.path}\`
+Use it actively:
+- **Contract tests**: pass \`apiSchema: "${openApiSpec.path}"\` — the CLI validates response schemas against the spec.
+- **Fuzz tests**: pass \`apiSchema: "${openApiSpec.path}"\` — the CLI generates boundary values from schema constraints.
+- **Integration tests**: pass \`apiSchema\` to \`skyramp_scenario_test_generation\` — it extracts destination and request/response shapes.
+- **Single-endpoint tests**: pass both \`endpointURL\` AND \`apiSchema\` for schema-aware generation.
+\n`
+        : "";
+    const distribution = isUIOnlyPR
+        ? `- Prioritize UI tests (≥3), then E2E tests (≥2), then integration only if UI calls APIs. 0% smoke.`
+        : hasFrontendChanges
+            ? `- Mix: integration (2-3), E2E (1-2), UI (1-2), fuzz or contract (1). 0% smoke.`
+            : `- Mix: integration (2-3, multi-resource first), fuzz (1-2), contract (1-2), E2E (1 if user-facing flows exist). 0% smoke.`;
+    return `## Coverage Checklist
+${specNote}
+${isUIOnlyPR ? `**UI-only PR** — This PR has no backend changes. Focus on UI and E2E tests.
+With Playwright traces: prioritize UI tests (one per changed component) and E2E tests
+(one per page-level user flow). Integration tests are relevant only if the UI calls APIs.
+Without traces: recommend UI/E2E tests with scenario steps and trace recording instructions
+(\`skyramp_start_trace_collection\` with \`playwright: true\`). The testbot will skip generation
+entirely for frontend-only PRs without traces — all recommendations become additional
+recommendations in the report. Skip fuzz, contract, and smoke tests.
+` : `For each endpoint, recommend the most valuable test types — aim for variety:
+1. **Integration** — multi-resource workflows (not just single-resource CRUD)
+2. **Fuzz** — POST/PUT endpoints with request bodies (validates edge cases, type safety)
+3. **Contract** — endpoints with new/changed response schemas (validates structure)
+4. **E2E** — user flows spanning frontend to backend${hasFrontendChanges ? " (include at least 1 for this PR)" : ""}
+5. **UI** — changed frontend components${hasFrontendChanges ? " (include at least 1)" : ""}
+6. No smoke tests.
+Do NOT recommend 7 integration tests — diversify across test types.
+`}
+## For Each Recommendation Include:
+1. Test type  2. Priority (high/medium/low)  3. Target endpoint/scenario
+4. What it validates (business logic, not just "tests the endpoint")
+5. Skyramp tool call details — exact tool + key params for zero-editing execution
+6. For integration/E2E: reference draftedScenario by scenarioName
+## When Artifacts Are Missing
+Recommend the test anyway — never mark it "blocked":
+- **No OpenAPI spec** \u2192 use \`endpointURL\` and \`requestBody\` from source code
+- **No Playwright recording** \u2192 provide trace recording instructions
+- **No backend trace** \u2192 use the scenario generation pipeline
+## Select the Top ${topN}
+Consider all possible tests (endpoints \u00d7 interaction types + scenarios), then select the
+top ${topN} most valuable. Include \`totalConsidered\` count in your output. The top 4 will
+be generated; recommendations #5-${topN} will appear in the report but won't be generated,
+so ensure the top 4 are the highest-impact tests.
+**Before outputting, verify:**
+${isUIOnlyPR ? `- If traces exist, at least 2 of the top 4 should be UI/E2E tests.
+- Without traces, all 7 become additionalRecommendations (no generation). Rank UI/E2E highest.
+- Avoid CRUD tests for unchanged resources the UI doesn't call.` : `- If the PR includes frontend changes, include at least 1 E2E/UI test in the top 4.
+- CRUD tests for unchanged resources should not displace PR-relevant tests in the top 4.`}
+- Each integration scenario's step sequence should be logically valid — preconditions
+  met by prior steps.
+Preferred ordering: ${isUIOnlyPR ? "UI \u2192 E2E \u2192 integration (if UI calls APIs)." : "integration \u2192 fuzz \u2192 contract \u2192 E2E \u2192 UI."}
+${distribution}
+Each recommendation should include enough detail for direct tool invocation.
+Reference draftedScenarios by name and interactions by description.
+Use "high"/"medium"/"low" for priority — no numeric scores.
+Total candidates should be \u2265 ${topN}.
+Generate recommendations now.`;
+}

package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js ADDED Viewed

@@ -0,0 +1,71 @@
+import { z } from "zod";
+import { StateManager, getSessionFilePath, hasSessionData, getSessionData, } from "../../utils/AnalysisStateManager.js";
+import { logger } from "../../utils/logger.js";
+import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
+import { getWorkspaceAuthHeader } from "../../utils/workspaceAuth.js";
+export function registerRecommendTestsPrompt(server) {
+    server.registerPrompt("skyramp_recommend_tests", {
+        description: "Generate test recommendations from enriched repository analysis. " +
+            "Provide a sessionId from skyramp_analyze_repository.",
+        argsSchema: {
+            sessionId: z
+                .string()
+                .describe("Session ID from skyramp_analyze_repository"),
+            scope: z
+                .enum(["full_repo", "current_branch_diff"])
+                .default("full_repo")
+                .optional()
+                .describe("Analysis scope (defaults to the scope used during analysis)"),
+            focus: z
+                .enum(["all", "interactions", "scenarios"])
+                .default("all")
+                .optional()
+                .describe("Focus area: all tests, interaction-based (contract/fuzz), or scenario-based (integration/e2e)"),
+        },
+    }, async (args) => {
+        const sessionId = args.sessionId;
+        if (!sessionId) {
+            throw new Error("sessionId is required");
+        }
+        // Try process memory first, then fall back to state file
+        let data = null;
+        if (hasSessionData(sessionId)) {
+            data = getSessionData(sessionId);
+        }
+        else {
+            const registeredPath = getSessionFilePath(sessionId);
+            const mgr = registeredPath
+                ? StateManager.fromStatePath(registeredPath)
+                : StateManager.fromSessionId(sessionId);
+            if (!mgr.exists()) {
+                throw new Error(`Analysis session "${sessionId}" not found. Run skyramp_analyze_repository first.`);
+            }
+            data = await mgr.readData();
+        }
+        if (!data?.analysis) {
+            throw new Error(`Session "${sessionId}" has no analysis data.`);
+        }
+        const scope = args.scope || data.analysisScope || "full_repo";
+        const focus = args.focus || "all";
+        const effectiveTopN = scope === "current_branch_diff" ? 7 : 10;
+        const workspaceAuthHeader = data.repositoryPath
+            ? await getWorkspaceAuthHeader(data.repositoryPath)
+            : undefined;
+        const prompt = buildRecommendationPrompt(data.analysis, scope, focus, effectiveTopN, data.prContext, workspaceAuthHeader);
+        logger.info("Serving recommendation prompt via MCP Prompt", {
+            sessionId,
+            scope,
+        });
+        return {
+            messages: [
+                {
+                    role: "user",
+                    content: {
+                        type: "text",
+                        text: `Session: ${sessionId}\nRepository: ${data.repositoryPath}\nScope: ${scope}\n\nAvailable MCP Resources:\n- skyramp://analysis/${sessionId}/summary\n- skyramp://analysis/${sessionId}/endpoints\n- skyramp://analysis/${sessionId}/scenarios\n- skyramp://analysis/${sessionId}/diff\n\n${prompt}`,
+                    },
+                },
+            ],
+        };
+    });
+}