@skyramp/mcp 0.0.58 → 0.0.60-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/build/index.js +32 -5
  2. package/build/prompts/test-recommendation/analysisOutputPrompt.js +98 -0
  3. package/build/prompts/test-recommendation/recommendationSections.js +209 -0
  4. package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +71 -0
  5. package/build/prompts/test-recommendation/test-recommendation-prompt.js +166 -104
  6. package/build/prompts/testGenerationPrompt.js +2 -3
  7. package/build/prompts/testbot/testbot-prompts.js +93 -19
  8. package/build/resources/analysisResources.js +254 -0
  9. package/build/services/ScenarioGenerationService.js +70 -24
  10. package/build/tools/generate-tests/generateIntegrationRestTool.js +50 -0
  11. package/build/tools/generate-tests/generateScenarioRestTool.js +7 -1
  12. package/build/tools/submitReportTool.js +34 -2
  13. package/build/tools/test-maintenance/stateCleanupTool.js +8 -0
  14. package/build/tools/test-recommendation/analyzeRepositoryTool.js +349 -217
  15. package/build/tools/test-recommendation/recommendTestsTool.js +163 -133
  16. package/build/tools/workspace/initializeWorkspaceTool.js +1 -1
  17. package/build/types/RepositoryAnalysis.js +99 -12
  18. package/build/utils/AnalysisStateManager.js +40 -23
  19. package/build/utils/branchDiff.js +47 -0
  20. package/build/utils/pr-comment-parser.js +124 -0
  21. package/build/utils/projectMetadata.js +188 -0
  22. package/build/utils/projectMetadata.test.js +81 -0
  23. package/build/utils/repoScanner.js +378 -0
  24. package/build/utils/routeParsers.js +213 -0
  25. package/build/utils/routeParsers.test.js +87 -0
  26. package/build/utils/scenarioDrafting.js +119 -0
  27. package/build/utils/scenarioDrafting.test.js +66 -0
  28. package/build/utils/trace-parser.js +166 -0
  29. package/build/utils/workspaceAuth.js +16 -0
  30. package/package.json +1 -1
  31. package/build/prompts/test-recommendation/repository-analysis-prompt.js +0 -326
  32. package/build/prompts/test-recommendation/test-mapping-prompt.js +0 -266
  33. package/build/tools/test-recommendation/mapTestsTool.js +0 -243
  34. package/build/types/TestMapping.js +0 -173
  35. package/build/utils/scoring-engine.js +0 -380
package/build/index.js CHANGED
@@ -19,8 +19,8 @@ import { registerLoginTool } from "./tools/auth/loginTool.js";
19
19
  import { registerLogoutTool } from "./tools/auth/logoutTool.js";
20
20
  import { registerFixErrorTool } from "./tools/fixErrorTool.js";
21
21
  import { registerAnalyzeRepositoryTool } from "./tools/test-recommendation/analyzeRepositoryTool.js";
22
- import { registerMapTestsTool } from "./tools/test-recommendation/mapTestsTool.js";
23
22
  import { registerRecommendTestsTool } from "./tools/test-recommendation/recommendTestsTool.js";
23
+ import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
24
24
  import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
25
25
  import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
26
26
  import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
@@ -34,6 +34,7 @@ import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testb
34
34
  import { registerInitTestbotTool } from "./tools/initTestbotTool.js";
35
35
  import { registerSubmitReportTool } from "./tools/submitReportTool.js";
36
36
  import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
37
+ import { registerAnalysisResources } from "./resources/analysisResources.js";
37
38
  import { AnalyticsService } from "./services/AnalyticsService.js";
38
39
  import { initCheck } from "./utils/initAgent.js";
39
40
  const server = new McpServer({
@@ -47,13 +48,32 @@ const server = new McpServer({
47
48
  prompts: {
48
49
  listChanged: true,
49
50
  },
51
+ resources: {
52
+ listChanged: true,
53
+ },
50
54
  },
51
- instructions: `Skyramp MCP Server — generates and executes API tests (smoke, fuzz, contract, load, integration, E2E, UI).
55
+ instructions: `Skyramp MCP Server — generates and executes API tests (fuzz, contract, integration, E2E, UI).
52
56
 
53
57
  ## Rules
54
58
  - NEVER show CLI commands. ALWAYS use the MCP tools provided.
55
59
  - For UI and E2E tests, use the trace collection start/stop tools.
56
60
 
61
+ ## Test Recommendation Flow (2-step)
62
+ 1. Call \`skyramp_analyze_repository\` → returns a \`sessionId\`.
63
+ The analysis scans source code (code-first) to build enriched endpoints
64
+ (Path → Method → Interaction with request/response bodies, headers, cookies)
65
+ and draft user-flow scenarios for integration/E2E tests.
66
+ 2. Call \`skyramp_recommend_tests\` with \`sessionId\` → the LLM reasons over the
67
+ enriched data to recommend tests, referencing specific interactions and scenarios.
68
+
69
+ After analysis, you can also inspect data via MCP Resources:
70
+ - \`skyramp://analysis/{sessionId}/summary\` — high-level overview
71
+ - \`skyramp://analysis/{sessionId}/endpoints\` — compact endpoint listing
72
+ - \`skyramp://analysis/{sessionId}/endpoints/{path}\` — full path detail
73
+ - \`skyramp://analysis/{sessionId}/endpoints/{path}/{method}\` — single method detail
74
+ - \`skyramp://analysis/{sessionId}/scenarios\` — drafted scenarios
75
+ - \`skyramp://analysis/{sessionId}/diff\` — branch diff context
76
+
57
77
  ## Workspace Initialization (before ANY other Skyramp tool)
58
78
  Follow this flow EVERY time before calling any Skyramp tool:
59
79
 
@@ -75,8 +95,13 @@ Before calling ANY test generation tool, you MUST follow this flow:
75
95
  2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, and \`api.baseUrl\` from the services section.
76
96
  3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
77
97
  4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
78
- 5. **If the workspace file does not exist**, or the needed values (language, framework, outputDir) are missing from the workspace config, ASK the user which language and framework they want before calling the tool.
79
- 6. The user can always override workspace defaults by explicitly specifying values in their request.
98
+ 5. **CRITICAL scenario generation**: When calling \`skyramp_scenario_test_generation\`, ALWAYS pass:
99
+ - \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
100
+ - \`authHeader\`: The auth header name from \`api.authHeader\` in the workspace config. Use \`Cookie\` for cookie/session-based auth (NextAuth, etc.), \`Authorization\` for Bearer tokens, \`X-API-Key\` for API keys. Without it, the trace defaults to \`Authorization: Bearer\` which breaks cookie-based apps.
101
+ - \`apiSchema\` is OPTIONAL — omit it for code-first apps without OpenAPI specs.
102
+ 6. **CRITICAL — integration test from scenario**: When calling \`skyramp_integration_test_generation\` with a \`scenarioFile\`, ALSO pass \`authHeader\` (same value as used in scenario generation). This tells the CLI which header to parameterize with the auth token. Without it, the generated test defaults to \`Authorization: Bearer\` regardless of what's in the trace.
103
+ 7. **If the workspace file does not exist**, or the needed values (language, framework, outputDir) are missing from the workspace config, ASK the user which language and framework they want before calling the tool.
104
+ 8. The user can always override workspace defaults by explicitly specifying values in their request.
80
105
  `,
81
106
  });
82
107
  // Check for first-time invocation after version update (runs in background, doesn't block)
@@ -116,6 +141,7 @@ const prompts = [
116
141
  registerTestGenerationPrompt,
117
142
  registerStartTraceCollectionPrompt,
118
143
  registerTestHealthPrompt,
144
+ registerRecommendTestsPrompt,
119
145
  ];
120
146
  if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
121
147
  prompts.push(registerTestbotPrompt);
@@ -145,8 +171,9 @@ const codeQualityTools = [
145
171
  codeQualityTools.forEach((registerTool) => registerTool(server));
146
172
  // Register test recommendation tools
147
173
  registerAnalyzeRepositoryTool(server);
148
- registerMapTestsTool(server);
149
174
  registerRecommendTestsTool(server);
175
+ // Register analysis resources (MCP Resources for enriched data access)
176
+ registerAnalysisResources(server);
150
177
  // Register test maintenance tools
151
178
  registerDiscoverTestsTool(server);
152
179
  registerAnalyzeTestDriftTool(server);
@@ -0,0 +1,98 @@
1
+ function buildEnrichmentInstructions(p) {
2
+ const isDiffScope = p.analysisScope === "current_branch_diff";
3
+ if (!isDiffScope) {
4
+ return `## Your Task — Enrich & Recommend (full repo)
5
+
6
+ ### Step 1: Read key files
7
+ Read \`package.json\` / \`requirements.txt\`, \`docker-compose.yml\`, and route/controller files
8
+ to understand the tech stack, endpoint shapes, and auth mechanisms.
9
+
10
+ ### Step 2: Identify resource relationships
11
+ Map how endpoints relate to each other — which POST creates resources consumed by other endpoints?
12
+ **Resolve nested/sub-router paths** from the Router Mounting section above.
13
+
14
+ ### Step 3: Call recommend tests
15
+ Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
16
+ }
17
+ const changedFiles = p.parsedDiff?.changedFiles.join(", ") ?? "";
18
+ const hasApiEndpoints = p.parsedDiff && (p.parsedDiff.newEndpoints.length > 0 || p.parsedDiff.modifiedEndpoints.length > 0);
19
+ const isUIOnly = !hasApiEndpoints && (p.parsedDiff?.changedFiles.every(f => !f.match(/\/(api|routes?|controllers?|routers?|handlers?|endpoints?)\//)) ?? false);
20
+ const step2 = hasApiEndpoints
21
+ ? `### Step 2: Discover related endpoints
22
+ Read handler code for changed API endpoints. Find related endpoints via imports, shared
23
+ models, adjacent route files. Resolve nested/sub-router paths from Router Mounting context.`
24
+ : isUIOnly
25
+ ? `### Step 2: Identify consumed API endpoints
26
+ UI-only PR — read changed components to find API calls (fetch, axios, hooks).`
27
+ : `### Step 2: Identify affected endpoints
28
+ No API route changes detected — read changed files to identify affected endpoints.`;
29
+ return `## Your Task — Enrich & Recommend (PR-scoped)
30
+
31
+ ### Step 1: Read the changed files
32
+ ${changedFiles}
33
+
34
+ ${step2}
35
+
36
+ ### Step 3: Draft integration scenarios
37
+ Draft multi-step scenarios simulating realistic user workflows:
38
+ - **Cross-resource data flow**: Foreign key relationships, parent→child creation, verification
39
+ - **Search/filter verification**: Create data, search, verify results
40
+ - **Negative/error paths**: Invalid references → appropriate errors
41
+ - **UI user journeys**: Concrete browser steps for frontend changes
42
+
43
+ **Quality:** Realistic request bodies, response data verification, actual field names for chaining.
44
+
45
+ ### Step 4: Call recommend tests
46
+ Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
47
+ }
48
+ export function buildAnalysisOutputText(p) {
49
+ const isDiffScope = p.analysisScope === "current_branch_diff";
50
+ const diffSection = p.parsedDiff
51
+ ? `
52
+ ## Branch Diff Context
53
+ **Branch**: \`${p.parsedDiff.currentBranch}\` → base: \`${p.parsedDiff.baseBranch}\`
54
+ **Changed Files** (${p.parsedDiff.changedFiles.length}): ${p.parsedDiff.changedFiles.join(", ")}
55
+ **New Endpoints** (${p.parsedDiff.newEndpoints.length}): ${p.parsedDiff.newEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
56
+ **Modified Endpoints** (${p.parsedDiff.modifiedEndpoints.length}): ${p.parsedDiff.modifiedEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
57
+ **Affected Services**: ${p.parsedDiff.affectedServices.join(", ") || "none"}
58
+ `
59
+ : "";
60
+ const endpointCatalog = p.scannedEndpoints.length > 0
61
+ ? `
62
+ ## Pre-Scanned Endpoint Catalog (${p.scannedEndpoints.length} routes)
63
+ ${p.scannedEndpoints.map((ep) => ` ${ep.methods.join("|")} ${ep.path} (${ep.sourceFile})`).join("\n")}
64
+ `
65
+ : "";
66
+ const wsLine = p.wsBaseUrl
67
+ ? `**Base URL**: \`${p.wsBaseUrl}\` | **Auth header**: \`${p.wsAuthHeader || "Authorization"}\``
68
+ : "";
69
+ const specSection = p.wsSchemaPath
70
+ ? `
71
+ ## OpenAPI Spec Available
72
+ Spec at \`${p.wsSchemaPath}\`. **Read it** for authoritative paths and schemas.
73
+ Pass \`apiSchema: "${p.wsSchemaPath}"\` to ALL test generation tool calls.`
74
+ : p.routerMountContext
75
+ ? `
76
+ ## Router Mounting / Nesting
77
+ \`\`\`
78
+ ${p.routerMountContext}
79
+ \`\`\`
80
+ Use this to resolve full URL paths for nested endpoints.`
81
+ : "";
82
+ const enrichment = buildEnrichmentInstructions(p);
83
+ return `# Repository Analysis
84
+
85
+ **Session ID**: \`${p.sessionId}\`
86
+ **Repository**: \`${p.repositoryPath}\`
87
+ **Analysis Scope**: \`${p.analysisScope}\`
88
+ ${isDiffScope ? `**Diff endpoints**: ${(p.parsedDiff?.newEndpoints.length ?? 0) + (p.parsedDiff?.modifiedEndpoints.length ?? 0)}` : `**Pre-scanned endpoints**: ${p.scannedEndpoints.length}`}
89
+ ${wsLine}
90
+ ${p.wsSchemaPath ? `**OpenAPI Spec**: \`${p.wsSchemaPath}\` (spec-based flow)` : "**Flow**: Code-scanning (may miss nesting)"}
91
+
92
+ ${diffSection}
93
+ ${endpointCatalog}
94
+ ${specSection}
95
+ ${enrichment}
96
+
97
+ **CRITICAL**: No .json/.md file creation. Prioritize cross-resource workflows.`;
98
+ }
@@ -0,0 +1,209 @@
1
+ export function buildPrioritizationDimensions() {
2
+ return `## Prioritization Dimensions (evaluate each candidate test)
3
+
4
+ For each candidate test, assess these dimensions using your judgment:
5
+
6
+ | Dimension | What to assess |
7
+ |-----------|---------------|
8
+ | **Sophistication** | Does it test a multi-step workflow or non-obvious scenario? Or is it a simple request→response check? |
9
+ | **Bug-Finding Potential** | Does it target known failure modes (race conditions, data consistency, state transitions, cascade effects)? |
10
+ | **User Journey Relevance** | Does it reflect how real users interact with the system (from traces, business flows, or critical paths)? |
11
+ | **Coverage Gap** | Does it address an area with zero existing test coverage? Or does it duplicate what\'s already tested? |
12
+ | **Code Insight** | Is it derived from actual implementation analysis (e.g., spotted a middleware pattern, found an N+1 risk) rather than just API shape? |
13
+
14
+ Candidates scoring well across MULTIPLE dimensions should be recommended first.
15
+ Candidates satisfying only ONE dimension (e.g., covers a gap but is trivially simple) should be deprioritized.
16
+
17
+ **Quality Gate:** For each candidate, ask: "Would a senior engineer be impressed by this test?"
18
+ If the answer is no — deprioritize it. Impressive tests catch real bugs, exercise real workflows,
19
+ and demonstrate understanding of the system\'s behavior. Trivial tests do not.`;
20
+ }
21
+ export function buildTestExamples() {
22
+ return `## Test Examples (calibrate your judgment)
23
+
24
+ **Impressive tests (recommend these):**
25
+ 1. "Register user → login → create order → verify order appears in user\'s order list"
26
+ Cross-resource workflow with auth chaining and data verification across users + orders.
27
+ 2. "Create product with inventory=10 → place order for qty=10 → verify inventory=0 →
28
+ place another order → verify 409 out-of-stock error"
29
+ Cross-resource state machine + business rule validation (products + orders + inventory).
30
+ 3. "POST /users with duplicate email → verify 409 Conflict → verify original user unchanged"
31
+ Error handling with side-effect verification — not just status code check.
32
+
33
+ **Non-impressive tests (deprioritize or skip):**
34
+ 1. "GET /products → 200" — trivial health check, no assertions beyond status code.
35
+ 2. "POST /products → GET /products/{id} → PUT /products/{id} → DELETE /products/{id}"
36
+ Single-resource CRUD — baseline, not impressive by itself.
37
+ 3. "POST /products with missing name → 422" — obvious validation, already covered by contract/fuzz.`;
38
+ }
39
+ export function buildTestPatternGuidelines() {
40
+ return `## Test Pattern Guidelines (reference, not rigid rules)
41
+
42
+ ### Tier 1 — Base Patterns
43
+ - CRUD lifecycle per resource group (Create → Read → Update → Delete)
44
+ - Auth flow (Register → Login → Access protected → Token refresh → Logout)
45
+ - Pagination & filtering (boundary values, empty results, large page sizes)
46
+ - Error responses (400, 401, 403, 404, 409, 422 — each with specific trigger)
47
+
48
+ ### Tier 2 — Code-Informed Patterns (higher value — look for these in the codebase)
49
+ - **Middleware chains**: If auth/rate-limit/logging middleware exists, test the chain
50
+ (e.g., rate limit hit → auth still checked → correct error returned)
51
+ - **N+1 query risk**: If list endpoints join related data (e.g., orders with products),
52
+ test with large datasets under load
53
+ - **State machines**: If resources have status transitions (draft→published→archived),
54
+ test invalid transitions (e.g., archived→draft should fail)
55
+ - **Cascade deletes**: If deleting a parent removes children, verify cascade AND verify
56
+ orphan prevention (delete product → orders referencing it get error or cascade)
57
+ - **Race conditions**: If concurrent writes are possible (inventory deduction, counter
58
+ increment), test concurrent requests under load
59
+ - **Computed fields**: If response contains derived values (total, average, count),
60
+ verify computation with known inputs
61
+ - **Webhook/event side effects**: If endpoints trigger async operations, test that side
62
+ effects occur (e.g., POST /orders triggers email notification)`;
63
+ }
64
+ export function buildTestQualityCriteria() {
65
+ return `## ⭐ MANDATORY — Meaningful Integration & E2E Tests
66
+
67
+ **What makes a MEANINGFUL integration test (all 3 required):**
68
+ 1. **Cross-resource data flow** — Step A creates data that Step B depends on (e.g., create
69
+ product → create order referencing that product's ID → verify order contains correct product).
70
+ A test that just does CRUD on a single resource is NOT an integration test.
71
+ 2. **Realistic request bodies** — Use domain-appropriate data from the source code schemas.
72
+ Product tests should use real field names (name, price, category), not placeholders.
73
+ Order tests should reference actual product IDs from prior steps, not hardcoded "1".
74
+ 3. **Verification assertions** — Each test MUST verify the response DATA, not just status
75
+ codes. After creating an order, GET it and verify it contains the correct product_id,
76
+ quantity, and computed total. After searching products, verify the results match the query.
77
+
78
+ **What makes a MEANINGFUL E2E test:**
79
+ 1. **Realistic user journey** — A real user flow from start to finish: browse products →
80
+ search/filter → add to cart → checkout. NOT just "navigate to /products and check 200".
81
+ 2. **Frontend-to-backend validation** — Verify that frontend actions trigger the correct API
82
+ calls and that the UI reflects the backend state correctly.
83
+ 3. **Domain-specific scenarios** — If the PR adds search, test: enter search term → verify
84
+ results appear → click result → verify detail page loads with correct data.
85
+
86
+ **What makes a MEANINGFUL UI test:**
87
+ 1. **Component behavior** — Test that UI components render correctly and respond to user
88
+ interactions (clicks, typing, form submissions).
89
+ 2. **Visual state changes** — Test loading states, error states, empty states.
90
+ 3. **User interaction flows** — Test complete interaction sequences: fill form → validate
91
+ inputs → submit → see confirmation. NOT just "page renders".
92
+ 4. **Accessibility** — Verify keyboard navigation, ARIA labels, and focus management.`;
93
+ }
94
+ export function buildGenerationRules(isUIOnlyPR) {
95
+ return `**RULE 1 (non-negotiable):** Every "workflow" scenario below MUST become an integration
96
+ test recommendation. But ALSO: if the pre-drafted scenarios don't reflect actual resource
97
+ relationships in the code, **replace them** with scenarios that reflect the REAL data model.
98
+
99
+ **RULE 2: Priority ordering — integration tests DOMINATE, but UI/E2E are MANDATORY for frontend changes:**
100
+ 1. **Multi-resource integration tests** (HIGHEST PRIORITY) — one for EACH cross-resource
101
+ workflow scenario. Generate AT LEAST 2 when 2+ resources exist.
102
+ 2. **CRUD lifecycle integration tests** — one for EACH resource with new/changed endpoints.
103
+ 3. **E2E tests** — one for EACH distinct user flow spanning frontend → backend.
104
+ 4. **UI tests** — one for EACH changed frontend component/page with meaningful interactions.
105
+ **If no Playwright trace exists, still recommend the test and provide step-by-step instructions
106
+ for the user to record a trace using \`skyramp_start_trace_collection\` with \`playwright: true\`.**
107
+ 5. **Fuzz tests** — one for EACH POST/PUT endpoint with request body validation.
108
+ 6. **Contract tests** — one for EACH endpoint with a new/changed response schema.
109
+ 7. **NEVER generate smoke tests. Zero smoke tests. Not even one.**
110
+
111
+ **RULE 2a (MANDATORY): When changed files include frontend components/pages/views:**
112
+ - At least 1 of the top 7 MUST be a UI test for a changed component.
113
+ - At least 1 of the top 7 MUST be an E2E test for a user flow involving the changed UI.
114
+ - If Playwright trace is missing, set \`frontendTrace\` to a message like:
115
+ "Requires Playwright recording — run \`skyramp_start_trace_collection\` with playwright:true"
116
+
117
+ **RULE 3: No duplicate coverage.**
118
+ Do NOT generate a test if an existing test already covers that endpoint × test type.
119
+ If \`products_integration_test.py\` already exists covering CRUD products, recommend a
120
+ multi-resource workflow that INCLUDES products alongside other resources instead.`;
121
+ }
122
+ export function buildToolWorkflows(authHeaderValue) {
123
+ return `## How to Generate Tests — Tool Workflows
124
+
125
+ **Auth Header:** \`authHeader: "${authHeaderValue}"\` — pass to EVERY tool call below.
126
+
127
+ **For multi-endpoint workflows (integration tests) — Scenario → Integration pipeline:**
128
+ 1. Call \`skyramp_scenario_test_generation\` once per step: \`scenarioName\`, \`destination\`,
129
+ \`baseURL\`, \`method\`, \`path\`, \`requestBody\`, \`statusCode\`, \`authHeader: "${authHeaderValue}"\`.
130
+ **OpenAPI spec is NOT required.** \`apiSchema\` is OPTIONAL — omit it if no spec exists.
131
+ \`requestBody\` MUST use realistic field values from source code schemas (Zod, Pydantic, DTOs).
132
+ Never send \`{}\` — inspect the source code to determine the correct request body shape.
133
+ Use unique names with timestamp suffix to avoid conflicts on re-runs.
134
+ For GET/PUT/DELETE with path IDs, use a placeholder — chaining resolves the real ID.
135
+ 2. Produces a \`scenario_<name>.json\` capturing the multi-step flow.
136
+ 3. Call \`skyramp_integration_test_generation\` with \`scenarioFile\` AND \`authHeader: "${authHeaderValue}"\`.
137
+ Do NOT pass \`chainingKey\` — auto-set to \`response.id\`.
138
+
139
+ **For single-endpoint tests (contract/fuzz):**
140
+ \`skyramp_{type}_test_generation\` with \`endpointURL\` (full URL incl. base + path), \`method\`,
141
+ \`authHeader: "${authHeaderValue}"\`, and \`requestData\` from source code schemas.
142
+ **OpenAPI is NOT required** — \`endpointURL\` is sufficient. Only pass \`apiSchema\` if one exists.
143
+
144
+ **For UI tests (no Playwright recording):**
145
+ 1. \`skyramp_start_trace_collection\` (playwright: true)
146
+ 2. Perform browser steps
147
+ 3. \`skyramp_stop_trace_collection\`
148
+ 4. \`skyramp_ui_test_generation\` with playwright zip
149
+
150
+ **For E2E tests:**
151
+ Same trace flow, pass both trace file and playwright zip to \`skyramp_e2e_test_generation\`.`;
152
+ }
153
+ export function buildCoverageChecklist(openApiSpec, isUIOnlyPR, hasFrontendChanges, authHeaderValue, topN) {
154
+ const specNote = openApiSpec
155
+ ? `\n**OpenAPI Spec**: \`${openApiSpec.path}\` — pass \`apiSchema: "${openApiSpec.path}"\` to ALL tool calls.\n`
156
+ : "";
157
+ const distribution = isUIOnlyPR
158
+ ? `- ≥50% UI tests, ≥30% E2E tests, remaining: integration. 0% fuzz/contract/smoke.`
159
+ : hasFrontendChanges
160
+ ? `- ≥30% integration, ≥20% E2E, ≥15% UI (at least 1 UI + 1 E2E MANDATORY). Remaining: fuzz + contract. 0% smoke.`
161
+ : `- ≥60% integration (multi-resource FIRST, then CRUD). Remaining: fuzz + contract. 0% smoke.`;
162
+ const skipUI = isUIOnlyPR ? " (Skip for UI-only PRs)" : "";
163
+ return `## Coverage Checklist
164
+ ${specNote}
165
+ For EACH endpoint, recommend ALL applicable types **that don't already exist**:
166
+ 1. **Integration** (HIGHEST) — per workflow scenario + per resource CRUD.
167
+ 2. **E2E** — per user flow spanning frontend → backend.
168
+ 3. **UI** — per changed component/page.${isUIOnlyPR ? " **PRIMARY for this PR.**" : ""}
169
+ 4. **Fuzz** — per POST/PUT with request body.${skipUI}
170
+ 5. **Contract** — per new/changed response schema.${skipUI}
171
+ 6. **NO smoke tests.**
172
+
173
+ ## For Each Recommendation:
174
+ 1. Test type 2. Priority (high/medium/low) 3. Target endpoint/scenario
175
+ 4. **What it validates** (business logic, not just "tests the endpoint")
176
+ 5. **Skyramp tool call details** — exact tool + key params for zero-editing execution
177
+ 6. For integration/E2E: reference draftedScenario by scenarioName
178
+
179
+ ## When Artifacts Are Missing — NEVER mark "blocked", NEVER skip the recommendation:
180
+ - **No OpenAPI spec** → use \`endpointURL\` (full URL) and \`requestBody\` from source code.
181
+ Scenario generation, contract, fuzz, and integration tests ALL work without OpenAPI.
182
+ - **No Playwright recording** → recommend the UI/E2E test anyway. Provide step-by-step
183
+ instructions for the user to record a trace with \`skyramp_start_trace_collection\`.
184
+ - **No backend trace** → recommend the test anyway. Use scenario generation pipeline instead.
185
+
186
+ ## Generate Many, Show Few
187
+ Internally consider ALL possible tests from the endpoint catalog (endpoints × interaction types
188
+ + scenarios). Then select the top ${topN} most valuable, ranked by priority. In your output, include:
189
+ - \`totalConsidered: <number>\` — how many candidate tests you evaluated
190
+ - The curated top ${topN} recommendations, ranked #1 (highest value) to #${topN}
191
+ - The top 4 will be generated and executed; recommendations #5-#${topN} will be reported but not generated.
192
+ Therefore, ensure the top 4 are the most impactful tests. For mixed PRs with frontend changes,
193
+ the top 4 MUST include at least 1 E2E or UI test alongside integration tests.
194
+
195
+ Prioritize integration/E2E, fill with fuzz/contract.
196
+ ${distribution}
197
+
198
+ ## MANDATORY RULES:
199
+ 1. "high"/"medium"/"low" only — no numeric scores.
200
+ 2. Never mark "blocked". 3. No file creation. 4. Order: integration → E2E → UI → fuzz → contract.
201
+ 5. Reference draftedScenarios by name. 6. Reference interactions by description.
202
+ 7. Every recommendation = enough detail for direct tool invocation.
203
+
204
+ **FINAL CHECK:** Count: workflow scenarios → integration tests, resources → CRUD tests,
205
+ user flows → E2E, components → UI, POST/PUT → fuzz${skipUI}, schemas → contract${skipUI}.
206
+ Total must be ≥ ${topN}.
207
+
208
+ Generate recommendations now.`;
209
+ }
@@ -0,0 +1,71 @@
1
+ import { z } from "zod";
2
+ import { StateManager, getSessionFilePath, hasSessionData, getSessionData, } from "../../utils/AnalysisStateManager.js";
3
+ import { logger } from "../../utils/logger.js";
4
+ import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
5
+ import { getWorkspaceAuthHeader } from "../../utils/workspaceAuth.js";
6
+ export function registerRecommendTestsPrompt(server) {
7
+ server.registerPrompt("skyramp_recommend_tests", {
8
+ description: "Generate test recommendations from enriched repository analysis. " +
9
+ "Provide a sessionId from skyramp_analyze_repository.",
10
+ argsSchema: {
11
+ sessionId: z
12
+ .string()
13
+ .describe("Session ID from skyramp_analyze_repository"),
14
+ scope: z
15
+ .enum(["full_repo", "current_branch_diff"])
16
+ .default("full_repo")
17
+ .optional()
18
+ .describe("Analysis scope (defaults to the scope used during analysis)"),
19
+ focus: z
20
+ .enum(["all", "interactions", "scenarios"])
21
+ .default("all")
22
+ .optional()
23
+ .describe("Focus area: all tests, interaction-based (contract/fuzz), or scenario-based (integration/e2e)"),
24
+ },
25
+ }, async (args) => {
26
+ const sessionId = args.sessionId;
27
+ if (!sessionId) {
28
+ throw new Error("sessionId is required");
29
+ }
30
+ // Try process memory first, then fall back to state file
31
+ let data = null;
32
+ if (hasSessionData(sessionId)) {
33
+ data = getSessionData(sessionId);
34
+ }
35
+ else {
36
+ const registeredPath = getSessionFilePath(sessionId);
37
+ const mgr = registeredPath
38
+ ? StateManager.fromStatePath(registeredPath)
39
+ : StateManager.fromSessionId(sessionId);
40
+ if (!mgr.exists()) {
41
+ throw new Error(`Analysis session "${sessionId}" not found. Run skyramp_analyze_repository first.`);
42
+ }
43
+ data = await mgr.readData();
44
+ }
45
+ if (!data?.analysis) {
46
+ throw new Error(`Session "${sessionId}" has no analysis data.`);
47
+ }
48
+ const scope = args.scope || data.analysisScope || "full_repo";
49
+ const focus = args.focus || "all";
50
+ const effectiveTopN = scope === "current_branch_diff" ? 7 : 10;
51
+ const workspaceAuthHeader = data.repositoryPath
52
+ ? await getWorkspaceAuthHeader(data.repositoryPath)
53
+ : undefined;
54
+ const prompt = buildRecommendationPrompt(data.analysis, scope, focus, effectiveTopN, data.prContext, workspaceAuthHeader);
55
+ logger.info("Serving recommendation prompt via MCP Prompt", {
56
+ sessionId,
57
+ scope,
58
+ });
59
+ return {
60
+ messages: [
61
+ {
62
+ role: "user",
63
+ content: {
64
+ type: "text",
65
+ text: `Session: ${sessionId}\nRepository: ${data.repositoryPath}\nScope: ${scope}\n\nAvailable MCP Resources:\n- skyramp://analysis/${sessionId}/summary\n- skyramp://analysis/${sessionId}/endpoints\n- skyramp://analysis/${sessionId}/scenarios\n- skyramp://analysis/${sessionId}/diff\n\n${prompt}`,
66
+ },
67
+ },
68
+ ],
69
+ };
70
+ });
71
+ }