@skyramp/mcp 0.0.61 → 0.0.63-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/build/index.js +55 -17
  2. package/build/prompts/test-maintenance/drift-analysis-prompt.js +59 -0
  3. package/build/prompts/test-maintenance/driftAnalysisSections.js +153 -0
  4. package/build/prompts/test-recommendation/analysisOutputPrompt.js +113 -0
  5. package/build/prompts/test-recommendation/recommendationSections.js +193 -0
  6. package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +65 -0
  7. package/build/prompts/test-recommendation/test-recommendation-prompt.js +173 -100
  8. package/build/prompts/testGenerationPrompt.js +2 -3
  9. package/build/prompts/testbot/testbot-prompts.js +100 -74
  10. package/build/resources/analysisResources.js +248 -0
  11. package/build/services/ScenarioGenerationService.js +42 -40
  12. package/build/services/TestExecutionService.js +3 -16
  13. package/build/services/TestExecutionService.test.js +167 -0
  14. package/build/services/containerEnv.js +36 -0
  15. package/build/tools/generate-tests/generateScenarioRestTool.js +24 -6
  16. package/build/tools/submitReportTool.js +28 -0
  17. package/build/tools/test-maintenance/stateCleanupTool.js +8 -0
  18. package/build/tools/test-management/actionsTool.js +389 -0
  19. package/build/tools/test-management/analyzeChangesTool.js +653 -0
  20. package/build/tools/test-management/analyzeTestHealthTool.js +134 -0
  21. package/build/tools/test-management/executeTestsTool.js +198 -0
  22. package/build/tools/test-management/index.js +5 -0
  23. package/build/tools/test-management/stateCleanupTool.js +163 -0
  24. package/build/tools/test-recommendation/analyzeRepositoryTool.js +386 -217
  25. package/build/tools/test-recommendation/recommendTestsTool.js +162 -163
  26. package/build/tools/workspace/initializeWorkspaceTool.js +1 -1
  27. package/build/types/RepositoryAnalysis.js +100 -12
  28. package/build/utils/AnalysisStateManager.js +56 -23
  29. package/build/utils/branchDiff.js +47 -0
  30. package/build/utils/initAgent.js +62 -26
  31. package/build/utils/pr-comment-parser.js +244 -0
  32. package/build/utils/projectMetadata.js +188 -0
  33. package/build/utils/projectMetadata.test.js +81 -0
  34. package/build/utils/repoScanner.js +425 -0
  35. package/build/utils/routeParsers.js +213 -0
  36. package/build/utils/routeParsers.test.js +87 -0
  37. package/build/utils/scenarioDrafting.js +119 -0
  38. package/build/utils/scenarioDrafting.test.js +66 -0
  39. package/build/utils/skyrampMdContent.js +100 -0
  40. package/build/utils/trace-parser.js +166 -0
  41. package/build/utils/workspaceAuth.js +16 -0
  42. package/package.json +2 -2
  43. package/build/prompts/test-recommendation/repository-analysis-prompt.js +0 -326
  44. package/build/prompts/test-recommendation/test-mapping-prompt.js +0 -266
  45. package/build/tools/test-recommendation/mapTestsTool.js +0 -243
  46. package/build/types/TestMapping.js +0 -173
  47. package/build/utils/scoring-engine.js +0 -380
package/build/index.js CHANGED
@@ -19,21 +19,17 @@ import { registerLoginTool } from "./tools/auth/loginTool.js";
19
19
  import { registerLogoutTool } from "./tools/auth/logoutTool.js";
20
20
  import { registerFixErrorTool } from "./tools/fixErrorTool.js";
21
21
  import { registerAnalyzeRepositoryTool } from "./tools/test-recommendation/analyzeRepositoryTool.js";
22
- import { registerMapTestsTool } from "./tools/test-recommendation/mapTestsTool.js";
23
22
  import { registerRecommendTestsTool } from "./tools/test-recommendation/recommendTestsTool.js";
23
+ import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
24
24
  import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
25
25
  import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
26
26
  import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
27
- import { registerDiscoverTestsTool } from "./tools/test-maintenance/discoverTestsTool.js";
28
- import { registerAnalyzeTestDriftTool } from "./tools/test-maintenance/analyzeTestDriftTool.js";
29
- import { registerExecuteBatchTestsTool } from "./tools/test-maintenance/executeBatchTestsTool.js";
30
- import { registerCalculateHealthScoresTool } from "./tools/test-maintenance/calculateHealthScoresTool.js";
31
- import { registerActionsTool } from "./tools/test-maintenance/actionsTool.js";
32
- import { registerStateCleanupTool } from "./tools/test-maintenance/stateCleanupTool.js";
27
+ import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerExecuteTestsTool, registerActionsTool, registerStateCleanupTool, } from "./tools/test-management/index.js";
33
28
  import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
34
29
  import { registerInitTestbotTool } from "./tools/initTestbotTool.js";
35
30
  import { registerSubmitReportTool } from "./tools/submitReportTool.js";
36
31
  import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
32
+ import { registerAnalysisResources } from "./resources/analysisResources.js";
37
33
  import { AnalyticsService } from "./services/AnalyticsService.js";
38
34
  import { initCheck } from "./utils/initAgent.js";
39
35
  const server = new McpServer({
@@ -47,13 +43,38 @@ const server = new McpServer({
47
43
  prompts: {
48
44
  listChanged: true,
49
45
  },
46
+ resources: {
47
+ listChanged: true,
48
+ },
50
49
  },
51
- instructions: `Skyramp MCP Server — generates and executes API tests (smoke, fuzz, contract, load, integration, E2E, UI).
50
+ instructions: `Skyramp MCP Server — generates and executes API tests (fuzz, contract, integration, E2E, UI).
52
51
 
53
52
  ## Rules
54
53
  - NEVER show CLI commands. ALWAYS use the MCP tools provided.
55
54
  - For UI and E2E tests, use the trace collection start/stop tools.
56
55
 
56
+ ## Test Recommendation Flow (2-step)
57
+ 1. Call \`skyramp_analyze_repository\` → returns a \`sessionId\`.
58
+ The analysis scans source code (code-first) to build enriched endpoints
59
+ (Path → Method → Interaction with request/response bodies, headers, cookies)
60
+ and draft user-flow scenarios for integration/E2E tests.
61
+ 2. Call \`skyramp_recommend_tests\` with \`sessionId\` → the LLM reasons over the
62
+ enriched data to recommend tests, referencing specific interactions and scenarios.
63
+
64
+ ## Test Health Analysis Flow (4-step)
65
+ 1. Call \`skyramp_analyze_changes\` with \`repositoryPath\` and \`scope\` → discovers existing tests, scans endpoints, computes branch diff → returns a \`stateFile\`.
66
+ 2. Call \`skyramp_analyze_test_health\` with \`stateFile\` → runs drift analysis + health scoring + LLM semantic assessment → returns enriched \`stateFile\`.
67
+ 3. (Optional) Call \`skyramp_execute_tests\` with \`stateFile\` → runs tests live to verify status.
68
+ 4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations.
69
+
70
+ After analysis, you can also inspect data via MCP Resources:
71
+ - \`skyramp://analysis/{sessionId}/summary\` — high-level overview
72
+ - \`skyramp://analysis/{sessionId}/endpoints\` — compact endpoint listing
73
+ - \`skyramp://analysis/{sessionId}/endpoints/{path}\` — full path detail
74
+ - \`skyramp://analysis/{sessionId}/endpoints/{path}/{method}\` — single method detail
75
+ - \`skyramp://analysis/{sessionId}/scenarios\` — drafted scenarios
76
+ - \`skyramp://analysis/{sessionId}/diff\` — branch diff context
77
+
57
78
  ## Workspace Initialization (before ANY other Skyramp tool)
58
79
  Follow this flow EVERY time before calling any Skyramp tool:
59
80
 
@@ -78,16 +99,23 @@ Before calling ANY test generation tool, you MUST follow this flow:
78
99
  2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, and \`api.baseUrl\` from the services section.
79
100
  3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
80
101
  4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
81
- 5. **If the workspace file does not exist**, or the needed values (language, framework, outputDir) are missing from the workspace config, ASK the user which language and framework they want before calling the tool.
82
- 6. The user can always override workspace defaults by explicitly specifying values in their request.
102
+ 5. **CRITICAL scenario generation**: When calling \`skyramp_scenario_test_generation\`, ALWAYS pass:
103
+ - \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
104
+ - \`authHeader\`: The auth header name from \`api.authHeader\` in the workspace config. Use \`Cookie\` for cookie/session-based auth (NextAuth, etc.), \`Authorization\` for Bearer tokens, \`X-API-Key\` for API keys. Without it, the trace defaults to \`Authorization: Bearer\` which breaks cookie-based apps.
105
+ - \`apiSchema\` is OPTIONAL — omit it for code-first apps without OpenAPI specs.
106
+ 6. **CRITICAL — integration test from scenario**: When calling \`skyramp_integration_test_generation\` with a \`scenarioFile\`, ALSO pass \`authHeader\` (same value as used in scenario generation). This tells the CLI which header to parameterize with the auth token. Without it, the generated test defaults to \`Authorization: Bearer\` regardless of what's in the trace.
107
+ 7. **If the workspace file does not exist**, or the needed values (language, framework, outputDir) are missing from the workspace config, ASK the user which language and framework they want before calling the tool.
108
+ 8. The user can always override workspace defaults by explicitly specifying values in their request.
83
109
  `,
84
110
  });
85
111
  // Check for first-time invocation after version update (runs in background, doesn't block)
86
112
  let initCheckInFlight = false;
87
113
  let initCheckDone = false;
114
+ const INIT_MESSAGE = "Skyramp init: Triggering pull of Skyramp worker and executor images if not present locally.";
88
115
  const originalRegisterTool = server.registerTool.bind(server);
89
116
  server.registerTool = function (name, definition, handler) {
90
117
  const wrappedHandler = async (...args) => {
118
+ let triggeredInitThisCall = false;
91
119
  if (!initCheckDone && !initCheckInFlight) {
92
120
  // Guard with inFlight so concurrent tool calls don't each spawn a new initCheck(),
93
121
  // but allow retry on failure (initCheckInFlight is reset to false on error).
@@ -96,6 +124,7 @@ server.registerTool = function (name, definition, handler) {
96
124
  // unreachable. Deferring via setImmediate ensures the tool response is written to
97
125
  // stdout (and acknowledged by the MCP client) before any blocking FFI call runs.
98
126
  initCheckInFlight = true;
127
+ triggeredInitThisCall = true;
99
128
  setImmediate(() => {
100
129
  initCheck()
101
130
  .then(() => {
@@ -109,7 +138,15 @@ server.registerTool = function (name, definition, handler) {
109
138
  });
110
139
  });
111
140
  }
112
- return handler(...args);
141
+ const result = await handler(...args);
142
+ if (triggeredInitThisCall && result) {
143
+ const content = result.content ?? [];
144
+ result.content = [
145
+ { type: "text", text: INIT_MESSAGE },
146
+ ...content,
147
+ ];
148
+ }
149
+ return result;
113
150
  };
114
151
  return originalRegisterTool(name, definition, wrappedHandler);
115
152
  };
@@ -119,6 +156,7 @@ const prompts = [
119
156
  registerTestGenerationPrompt,
120
157
  registerStartTraceCollectionPrompt,
121
158
  registerTestHealthPrompt,
159
+ registerRecommendTestsPrompt,
122
160
  ];
123
161
  if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
124
162
  prompts.push(registerTestbotPrompt);
@@ -148,13 +186,13 @@ const codeQualityTools = [
148
186
  codeQualityTools.forEach((registerTool) => registerTool(server));
149
187
  // Register test recommendation tools
150
188
  registerAnalyzeRepositoryTool(server);
151
- registerMapTestsTool(server);
152
189
  registerRecommendTestsTool(server);
153
- // Register test maintenance tools
154
- registerDiscoverTestsTool(server);
155
- registerAnalyzeTestDriftTool(server);
156
- registerExecuteBatchTestsTool(server);
157
- registerCalculateHealthScoresTool(server);
190
+ // Register analysis resources (MCP Resources for enriched data access)
191
+ registerAnalysisResources(server);
192
+ // Register unified test-management tools (replaces separate test-maintenance tools)
193
+ registerAnalyzeChangesTool(server);
194
+ registerAnalyzeTestHealthTool(server);
195
+ registerExecuteTestsTool(server);
158
196
  registerActionsTool(server);
159
197
  registerStateCleanupTool(server);
160
198
  // Register workspace management tools
@@ -0,0 +1,59 @@
1
+ import { buildDriftScoringGuide, buildActionDecisionMatrix, buildBreakingChangePatterns, buildTestAssessmentGuidelines, buildAddRecommendationGuidelines, buildDriftOutputChecklist, } from "./driftAnalysisSections.js";
2
+ export function buildDriftAnalysisPrompt(params) {
3
+ const { existingTests, parsedDiff, scannedEndpoints, repositoryPath, stateFile } = params;
4
+ // Detect new endpoints count from parsedDiff
5
+ let newEndpointCount = 0;
6
+ let diffSection = "";
7
+ if (parsedDiff) {
8
+ const lines = parsedDiff.split("\n");
9
+ const epMatches = parsedDiff.match(/(?:^|\n)\*\*(GET|POST|PUT|PATCH|DELETE)\s+[^\*]+\*\*/gm);
10
+ if (epMatches)
11
+ newEndpointCount = epMatches.length;
12
+ diffSection = `## Branch Diff
13
+ \`\`\`
14
+ ${lines.slice(0, 200).join("\n")}
15
+ \`\`\`
16
+ `;
17
+ }
18
+ const testListSection = existingTests.length > 0
19
+ ? `## Existing Test Files (${existingTests.length})
20
+ ${existingTests
21
+ .map((t) => {
22
+ const score = t.drift?.driftScore !== undefined ? ` [drift: ${t.drift.driftScore}]` : "";
23
+ return `- ${t.testFile} (${t.testType})${score}`;
24
+ })
25
+ .join("\n")}
26
+ `
27
+ : `## Existing Test Files
28
+ No existing Skyramp tests found in repository.
29
+ `;
30
+ const scannedSection = scannedEndpoints.length > 0
31
+ ? `## Scanned Endpoints (${scannedEndpoints.length})
32
+ ${scannedEndpoints.map((ep) => `- ${Array.isArray(ep.methods) ? ep.methods.join("|") : ep.method} ${ep.path}`).join("\n")}
33
+ `
34
+ : "";
35
+ return `# Test Health Analysis
36
+
37
+ **Repository**: \`${repositoryPath}\`
38
+ **Existing tests**: ${existingTests.length}
39
+ **New endpoints in diff**: ${newEndpointCount}
40
+
41
+ ${diffSection}
42
+ ${testListSection}
43
+ ${scannedSection}
44
+ ${buildDriftScoringGuide()}
45
+
46
+ ${buildActionDecisionMatrix()}
47
+
48
+ ${buildBreakingChangePatterns()}
49
+
50
+ ${buildTestAssessmentGuidelines()}
51
+
52
+ ${buildAddRecommendationGuidelines()}
53
+
54
+ ${buildDriftOutputChecklist(existingTests.length, newEndpointCount)}
55
+
56
+ After completing the assessment above, call \`skyramp_actions\` with \`stateFile: "${stateFile}"\`
57
+
58
+ **CRITICAL**: Do NOT create any .json or .md files. Only call skyramp_actions when done.`;
59
+ }
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Modular section builders for the Drift Analysis prompt,
3
+ * mirroring the recommendationSections.ts pattern.
4
+ */
5
+ export function buildDriftScoringGuide() {
6
+ return `## Drift Score Guide (0–100)
7
+
8
+ | Score | Label | Meaning |
9
+ |-------|-------|---------|
10
+ | 0–20 | IGNORE | No meaningful drift — test is still valid as-is |
11
+ | 21–40 | VERIFY | Minor changes detected — review but likely fine |
12
+ | 41–70 | UPDATE | Breaking changes detected — test needs edits |
13
+ | 71–100 | REGENERATE | Major structural changes — regenerate from scratch |
14
+
15
+ Assign each existing test a score based on how much the codebase has changed relative to what the test expects.`;
16
+ }
17
+ export function buildActionDecisionMatrix() {
18
+ return `## Action Decision Matrix
19
+
20
+ For each test, choose one of:
21
+
22
+ | Action | When to use |
23
+ |--------|------------|
24
+ | **IGNORE** | Drift score 0–20; no breaking changes AND no additive field gaps detected |
25
+ | **VERIFY** | Drift score 21–40; minor changes, manual review recommended |
26
+ | **UPDATE** | Drift score 25–70; breaking changes OR additive fields added to a covered endpoint (new response field the test doesn't assert) |
27
+ | **REGENERATE** | Drift score 71–100; endpoint removed, major restructuring, or test is fundamentally broken |
28
+ | **ADD** | New endpoint detected in diff that has no corresponding test yet |
29
+
30
+ Rules:
31
+ - Prefer UPDATE over REGENERATE when changes are localized (e.g., only the URL path changed).
32
+ - Prefer IGNORE over VERIFY when all changed files are unrelated to the test's endpoint.
33
+ - Always use ADD for new endpoints when the action is scoped to new test creation.
34
+ - **Additive changes (new response fields) on a covered endpoint always trigger UPDATE** — even if existing assertions still pass. The test needs a new assertion for the added field.`;
35
+ }
36
+ export function buildBreakingChangePatterns() {
37
+ return `## Breaking Change Patterns to Detect
38
+
39
+ Scan the diff lines for these high-signal patterns:
40
+
41
+ ### Endpoint-level breaking changes
42
+ - \`- @app.route("/old-path")\` / \`+ @app.route("/new-path")\` — renamed endpoint
43
+ - \`- router.get("/old")\` / \`+ router.get("/new")\` — renamed route
44
+ - \`- @GetMapping("/old")\` / \`+ @GetMapping("/new")\` — Spring rename
45
+ - Lines removing a route decorator entirely (endpoint removed)
46
+
47
+ ### Request/response shape changes
48
+ - Field type changes: \`- field: int\` → \`+ field: string\`
49
+ - Required field added: \`+ required: [..., "newField"]\`
50
+ - Response field removed: \`- "responseField":\`
51
+ - Enum value changes: \`- status: "active"\` → \`+ status: "enabled"\`
52
+
53
+ ### Auth changes
54
+ - \`+ @require_auth\`, \`+ @login_required\`, \`+ middleware(authMiddleware)\`
55
+ - \`- @require_auth\` (auth removed)
56
+ - Token type changed: Bearer → Cookie
57
+
58
+ ### Status code changes
59
+ - \`- return 200\` → \`+ return 201\`
60
+ - \`- status_code=200\` → \`+ status_code=204\`
61
+ - \`- res.status(201)\` → \`+ res.status(200)\`
62
+
63
+ ### Additive response field changes (non-breaking but coverage gap)
64
+ These do NOT break existing assertions but leave the new field untested. Always flag as UPDATE for covered endpoints.
65
+ - \`+ "newField": queryset.filter(...).count()\` added inside a \`Response({...})\` or \`res.json({...})\`
66
+ - \`+ newField = serializers.XXXField()\` added to a serializer used by a tested endpoint
67
+ - \`+ "newField":\` added to a response body dict returned by the endpoint
68
+ - New key added inside an existing dict/object returned by the endpoint`;
69
+ }
70
+ export function buildTestAssessmentGuidelines() {
71
+ return `## Per-Test Assessment (4 Steps)
72
+
73
+ For each existing test file, follow these steps:
74
+
75
+ ### Step 1: Check endpoint existence
76
+ Does the endpoint the test targets still exist in the codebase?
77
+ - If the endpoint path/method is no longer present → score 80+, action: REGENERATE
78
+ - If the endpoint was renamed → score 50–70, action: UPDATE (path substitution)
79
+
80
+ ### Step 2: Check request/response shape (breaking changes)
81
+ Has the request body or response structure changed in a way that breaks the test?
82
+ - Compare test's expected fields against current schema/model definitions
83
+ - Type changes (string→int, int→string) → score 60+, action: UPDATE or REGENERATE
84
+ - New required fields the test doesn't send → score 50+, action: UPDATE
85
+ - Response fields the test asserts on have been removed → score 50+, action: UPDATE
86
+
87
+ ### Step 2b: Check additive response field changes (coverage gaps)
88
+ **Even if existing assertions still pass**, does the diff add a new field to the response of an endpoint this test already covers?
89
+ - Look at the diff for lines like \`+ "newField":\` or \`+ newField =\` inside a view/serializer this test hits
90
+ - If YES → score 30, action: UPDATE — add an assertion for the new field (e.g. \`assert "newField" in response_body\` or \`assert response_body["newField"] >= 0\`)
91
+ - This applies even when the test only checks status codes — the test should be extended to cover the new field
92
+ - **Do NOT score IGNORE if a new response field was added to a covered endpoint**
93
+
94
+ ### Step 3: Check auth changes
95
+ Has the authentication mechanism for this endpoint changed?
96
+ - Auth added where none existed → score 40+, action: UPDATE
97
+ - Auth method changed (bearer→cookie) → score 50+, action: UPDATE
98
+ - Auth removed → score 30+, action: VERIFY or UPDATE
99
+
100
+ ### Step 4: Assign score and action
101
+ Based on the above, assign a final drift score 0–100 and choose the action (IGNORE / VERIFY / UPDATE / REGENERATE).
102
+ Provide a 1-2 sentence rationale.
103
+ - If Step 2b flagged an additive field → score must be ≥ 30 and action must be UPDATE, even if Steps 2/3 found no breaking changes.`;
104
+ }
105
+ export function buildAddRecommendationGuidelines() {
106
+ return `## ADD Recommendations for New Endpoints
107
+
108
+ For each new endpoint detected in the diff (not yet covered by any existing test):
109
+
110
+ ### Test type priority by HTTP method
111
+ | Method | Recommended test types |
112
+ |--------|----------------------|
113
+ | POST / PUT / PATCH | integration, contract |
114
+ | GET | contract, smoke |
115
+ | DELETE | integration, smoke |
116
+
117
+ ### ADD recommendation format
118
+ For each new endpoint, include:
119
+ 1. The endpoint path and method
120
+ 2. The recommended test types (from the table above)
121
+ 3. The Skyramp tool to call (e.g., \`skyramp_contract_test_generation\`, \`skyramp_integration_test_generation\`)
122
+ 4. The \`endpointURL\` to use (combine base URL + path)
123
+ 5. The language/framework to use (from workspace config or project metadata)`;
124
+ }
125
+ export function buildDriftOutputChecklist(existingTestCount, newEndpointCount) {
126
+ return `## Output Checklist
127
+
128
+ Complete ALL of the following before calling skyramp_actions:
129
+
130
+ ### Existing tests (${existingTestCount} total)
131
+ For EACH existing test, output:
132
+ \`\`\`
133
+ Test: <testFile>
134
+ Drift Score: <0-100>
135
+ Action: <IGNORE | VERIFY | UPDATE | REGENERATE>
136
+ Rationale: <1-2 sentence explanation>
137
+ \`\`\`
138
+
139
+ ${newEndpointCount > 0
140
+ ? `### New endpoints (${newEndpointCount} detected)
141
+ For EACH new endpoint, output:
142
+ \`\`\`
143
+ Endpoint: <METHOD> <path>
144
+ Action: ADD
145
+ Test types: <contract | integration | smoke | ...>
146
+ Rationale: <1 sentence>
147
+ \`\`\``
148
+ : `### New endpoints
149
+ No new endpoints detected in this diff.`}
150
+
151
+ ### Final step
152
+ After completing all assessments above, call \`skyramp_actions\` with the stateFile to execute the recommended changes.`;
153
+ }
@@ -0,0 +1,113 @@
1
+ function buildEnrichmentInstructions(p) {
2
+ const isDiffScope = p.analysisScope === "current_branch_diff";
3
+ const useHealthFlow = p.nextTool === "skyramp_analyze_test_health";
4
+ if (!isDiffScope) {
5
+ const nextStep = useHealthFlow
6
+ ? `### Step 3: Identify tests at risk of drift
7
+ Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.sessionId}"\``
8
+ : `### Step 3: Call recommend tests
9
+ Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
10
+ return `## Your Task — Enrich & Recommend (full repo)
11
+
12
+ ### Step 1: Read key files
13
+ Read \`package.json\` / \`requirements.txt\`, \`docker-compose.yml\`, route/controller files,
14
+ and model/schema files (Zod schemas, Pydantic models, TypeScript interfaces, DTOs)
15
+ to understand the tech stack, endpoint shapes, auth mechanisms, and request/response schemas.
16
+
17
+ ### Step 2: Identify resource relationships
18
+ Map how endpoints relate to each other — which POST creates resources consumed by other endpoints?
19
+ **Resolve nested/sub-router paths** from the Router Mounting section above.
20
+
21
+ ${nextStep}`;
22
+ }
23
+ const changedFiles = p.parsedDiff?.changedFiles.join(", ") ?? "";
24
+ const hasApiEndpoints = p.parsedDiff && (p.parsedDiff.newEndpoints.length > 0 || p.parsedDiff.modifiedEndpoints.length > 0);
25
+ const isUIOnly = !hasApiEndpoints && (p.parsedDiff?.changedFiles.every(f => !f.match(/\/(api|routes?|controllers?|routers?|handlers?|endpoints?)\//)) ?? false);
26
+ const step2 = hasApiEndpoints
27
+ ? `### Step 2: Discover related endpoints
28
+ Read handler code for the changed endpoints and their model/schema files (Zod schemas,
29
+ Pydantic models, DTOs) to understand request/response shapes. Find related endpoints via
30
+ imports, shared models, adjacent route files. Resolve nested/sub-router paths from Router
31
+ Mounting context.`
32
+ : isUIOnly
33
+ ? `### Step 2: Identify consumed API endpoints
34
+ UI-only PR — read changed components to find API calls (fetch, axios, hooks).`
35
+ : `### Step 2: Identify affected endpoints
36
+ No API route changes detected — read changed files to identify affected endpoints.`;
37
+ const step3Content = useHealthFlow
38
+ ? `### Step 3: Identify tests at risk of drift
39
+ Assess which existing tests may be broken by the changes in this diff.
40
+
41
+ ### Step 4: Call analyze test health
42
+ Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.sessionId}"\``
43
+ : `### Step 3: Draft integration scenarios
44
+ Draft multi-step scenarios simulating realistic user workflows:
45
+ - **Cross-resource data flow**: Foreign key relationships, parent→child creation, verification
46
+ - **Search/filter verification**: Create data, search, verify results
47
+ - **Negative/error paths**: Invalid references → appropriate errors
48
+ - **UI user journeys**: Concrete browser steps for frontend changes
49
+
50
+ **Quality:** Realistic request bodies, response data verification, actual field names for chaining.
51
+
52
+ ### Step 4: Call recommend tests
53
+ Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
54
+ return `## Your Task — Enrich & Recommend (PR-scoped)
55
+
56
+ ### Step 1: Read the changed files
57
+ ${changedFiles}
58
+
59
+ ${step2}
60
+
61
+ ${step3Content}`;
62
+ }
63
+ export function buildAnalysisOutputText(p) {
64
+ const isDiffScope = p.analysisScope === "current_branch_diff";
65
+ const diffSection = p.parsedDiff
66
+ ? `
67
+ ## Branch Diff Context
68
+ **Branch**: \`${p.parsedDiff.currentBranch}\` → base: \`${p.parsedDiff.baseBranch}\`
69
+ **Changed Files** (${p.parsedDiff.changedFiles.length}): ${p.parsedDiff.changedFiles.join(", ")}
70
+ **New Endpoints** (${p.parsedDiff.newEndpoints.length}): ${p.parsedDiff.newEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
71
+ **Modified Endpoints** (${p.parsedDiff.modifiedEndpoints.length}): ${p.parsedDiff.modifiedEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
72
+ **Affected Services**: ${p.parsedDiff.affectedServices.join(", ") || "none"}
73
+ `
74
+ : "";
75
+ const endpointCatalog = p.scannedEndpoints.length > 0
76
+ ? `
77
+ ## Pre-Scanned Endpoint Catalog (${p.scannedEndpoints.length} routes)
78
+ ${p.scannedEndpoints.map((ep) => ` ${ep.methods.join("|")} ${ep.path} (${ep.sourceFile})`).join("\n")}
79
+ `
80
+ : "";
81
+ const wsLine = p.wsBaseUrl
82
+ ? `**Base URL**: \`${p.wsBaseUrl}\` | **Auth header**: \`${p.wsAuthHeader || "Authorization"}\``
83
+ : "";
84
+ const specSection = p.wsSchemaPath
85
+ ? `
86
+ ## OpenAPI Spec Available
87
+ Spec at \`${p.wsSchemaPath}\`. **Read it** for authoritative paths and schemas.
88
+ Pass \`apiSchema: "${p.wsSchemaPath}"\` to ALL test generation tool calls.`
89
+ : p.routerMountContext
90
+ ? `
91
+ ## Router Mounting / Nesting
92
+ \`\`\`
93
+ ${p.routerMountContext}
94
+ \`\`\`
95
+ Use this to resolve full URL paths for nested endpoints.`
96
+ : "";
97
+ const enrichment = buildEnrichmentInstructions(p);
98
+ return `# Repository Analysis
99
+
100
+ **Session ID**: \`${p.sessionId}\`
101
+ **Repository**: \`${p.repositoryPath}\`
102
+ **Analysis Scope**: \`${p.analysisScope}\`
103
+ ${isDiffScope ? `**Diff endpoints**: ${(p.parsedDiff?.newEndpoints.length ?? 0) + (p.parsedDiff?.modifiedEndpoints.length ?? 0)}` : `**Pre-scanned endpoints**: ${p.scannedEndpoints.length}`}
104
+ ${wsLine}
105
+ ${p.wsSchemaPath ? `**OpenAPI Spec**: \`${p.wsSchemaPath}\` (spec-based flow)` : "**Flow**: Code-scanning (may miss nesting)"}
106
+
107
+ ${diffSection}
108
+ ${endpointCatalog}
109
+ ${specSection}
110
+ ${enrichment}
111
+
112
+ **CRITICAL**: No .json/.md file creation. Prioritize cross-resource workflows.`;
113
+ }
@@ -0,0 +1,193 @@
1
+ export const MAX_TESTS_TO_GENERATE = 4;
2
+ export const MAX_RECOMMENDATIONS = 10;
3
+ export function buildPrioritizationDimensions() {
4
+ return `## Prioritization Dimensions (evaluate each candidate test)
5
+
6
+ For each candidate test, assess these dimensions using your judgment:
7
+
8
+ | Dimension | What to assess |
9
+ |-----------|---------------|
10
+ | **Sophistication** | Does it test a multi-step workflow or non-obvious scenario? Or is it a simple request→response check? |
11
+ | **Bug-Finding Potential** | Does it target known failure modes (race conditions, data consistency, state transitions, cascade effects)? |
12
+ | **User Journey Relevance** | Does it reflect how real users interact with the system (from traces, business flows, or critical paths)? |
13
+ | **Coverage Gap** | Does it address an area with zero existing test coverage? Or does it duplicate what\'s already tested? |
14
+ | **Code Insight** | Is it derived from actual implementation analysis (e.g., spotted a middleware pattern, found an N+1 risk) rather than just API shape? |
15
+
16
+ Candidates scoring well across MULTIPLE dimensions should be recommended first.
17
+ Candidates satisfying only ONE dimension (e.g., covers a gap but is trivially simple) should be deprioritized.
18
+
19
+ **Quality Gate:** For each candidate, ask: "Would a senior engineer be impressed by this test?"
20
+ If the answer is no — deprioritize it. Impressive tests catch real bugs, exercise real workflows,
21
+ and demonstrate understanding of the system\'s behavior. Trivial tests do not.`;
22
+ }
23
+ export function buildTestExamples() {
24
+ return `## Test Examples (calibrate your judgment)
25
+
26
+ **Impressive tests (recommend these):**
27
+ 1. "Register user → login → create order → verify order appears in user\'s order list"
28
+ Cross-resource workflow with auth chaining and data verification across users + orders.
29
+ 2. "Create product with inventory=10 → place order for qty=10 → verify inventory=0 →
30
+ place another order → verify 409 out-of-stock error"
31
+ Cross-resource state machine + business rule validation (products + orders + inventory).
32
+ 3. "POST /users with duplicate email → verify 409 Conflict → verify original user unchanged"
33
+ Error handling with side-effect verification — not just status code check.
34
+
35
+ **Non-impressive tests (deprioritize or skip):**
36
+ 1. "GET /products → 200" — trivial health check, no assertions beyond status code.
37
+ 2. "POST /products → GET /products/{id} → PUT /products/{id} → DELETE /products/{id}"
38
+ Single-resource CRUD — baseline, not impressive by itself.
39
+ 3. "POST /products with missing name → 422" — obvious validation, already covered by contract/fuzz.`;
40
+ }
41
+ export function buildTestPatternGuidelines() {
42
+ return `## Test Pattern Guidelines (reference, not rigid rules)
43
+
44
+ ### Tier 1 — Base Patterns
45
+ - CRUD lifecycle per resource group (Create → Read → Update → Delete)
46
+ - Auth flow (Register → Login → Access protected → Token refresh → Logout)
47
+ - Pagination & filtering (boundary values, empty results, large page sizes)
48
+ - Error responses (400, 401, 403, 404, 409, 422 — each with specific trigger)
49
+
50
+ ### Tier 2 — Code-Informed Patterns (higher value — look for these in the codebase)
51
+ - **Middleware chains**: If auth/rate-limit/logging middleware exists, test the chain
52
+ (e.g., rate limit hit → auth still checked → correct error returned)
53
+ - **N+1 query risk**: If list endpoints join related data (e.g., orders with products),
54
+ test with large datasets under load
55
+ - **State machines**: If resources have status transitions (draft→published→archived),
56
+ test invalid transitions (e.g., archived→draft should fail)
57
+ - **Cascade deletes**: If deleting a parent removes children, verify cascade AND verify
58
+ orphan prevention (delete product → orders referencing it get error or cascade)
59
+ - **Race conditions**: If concurrent writes are possible (inventory deduction, counter
60
+ increment), test concurrent requests under load
61
+ - **Computed fields**: If response contains derived values (total, average, count),
62
+ verify computation with known inputs
63
+ - **Webhook/event side effects**: If endpoints trigger async operations, test that side
64
+ effects occur (e.g., POST /orders triggers email notification)`;
65
+ }
66
+ export function buildTestQualityCriteria() {
67
+ return `## What Makes a Good Test
68
+
69
+ **Integration tests** should demonstrate cross-resource data flow — step A creates data
70
+ that step B depends on (e.g., create product \u2192 create order referencing that product's ID \u2192
71
+ verify order contains correct product). Single-resource CRUD alone is not an integration test.
72
+ Use realistic request bodies from source code schemas and verify response data, not just
73
+ status codes.
74
+
75
+ **E2E tests** should follow realistic user journeys end-to-end: browse products \u2192 search \u2192
76
+ add to cart \u2192 checkout. Verify that frontend actions trigger the correct API calls and
77
+ that the UI reflects backend state.
78
+
79
+ **UI tests** should exercise component behavior and interaction flows: fill form \u2192 validate
80
+ inputs \u2192 submit \u2192 see confirmation. Include visual state changes (loading, error, empty)
81
+ and accessibility checks.`;
82
+ }
83
+ export function buildGenerationRules(isUIOnlyPR) {
84
+ return `## Generation Guidelines
85
+
86
+ **Scenario fidelity:** Every workflow scenario should reflect the actual resource
87
+ relationships in the code. If the pre-drafted scenarios don't match the real data model,
88
+ replace them with accurate ones.
89
+
90
+ **Available test types:**
91
+ - **Integration** — multi-endpoint workflows that chain data across resources
92
+ - **Fuzz** — boundary/invalid input testing for POST/PUT endpoints
93
+ - **Contract** — response schema validation for new/changed endpoints
94
+ - **E2E** — user journeys spanning frontend to backend (needs Playwright traces)
95
+ - **UI** — frontend component interaction flows (needs Playwright traces)
96
+ ${isUIOnlyPR ? `
97
+ This is a **UI-only PR** — no backend changes. UI and E2E tests are most relevant.
98
+ Without Playwright traces, recommend them with trace recording instructions
99
+ (\`skyramp_start_trace_collection\` with \`playwright: true\`).
100
+ ` : `
101
+ When no Playwright trace exists, still recommend E2E/UI tests with instructions for
102
+ recording a trace using \`skyramp_start_trace_collection\` with \`playwright: true\`.
103
+ `}
104
+ **No duplicate coverage.** If an existing test already covers an endpoint + test type,
105
+ recommend a different test that adds new coverage.
106
+
107
+ Choose the test types and distribution that maximize coverage for this specific PR.
108
+ No smoke tests.`;
109
+ }
110
+ export function buildToolWorkflows(authHeaderValue) {
111
+ return `## How to Generate Tests — Tool Workflows
112
+
113
+ **Auth Header:** \`authHeader: "${authHeaderValue}"\` — pass to EVERY tool call below.
114
+
115
+ **For multi-endpoint workflows (integration tests) — Scenario → Integration pipeline:**
116
+ 1. Call \`skyramp_scenario_test_generation\` once per step: \`scenarioName\`, \`destination\`,
117
+ \`baseURL\`, \`method\`, \`path\`, \`requestBody\`, \`responseBody\`, \`authHeader: "${authHeaderValue}"\`.
118
+ \`statusCode\` is optional — defaults: POST→201, DELETE→204, GET/PUT/PATCH→200. Only override for non-standard codes.
119
+ **OpenAPI spec is NOT required.** \`apiSchema\` is OPTIONAL — omit it if no spec exists.
120
+ \`requestBody\` should use realistic field values from source code schemas (Zod, Pydantic, DTOs).
121
+ \`responseBody\` should match the actual API response shape from source code (including all fields
122
+ returned by the controller — e.g., \`id\`, \`ownerId\`, \`createdAt\`, included relations like \`collection\`, \`tags\`).
123
+ Wrap in \`{"response": ...}\` if the API uses an envelope pattern. If omitted, a synthetic response is generated.
124
+ Inspect the source code to determine the correct request AND response body shapes — avoid sending \`{}\`.
125
+ Use unique names with timestamp suffix to avoid conflicts on re-runs.
126
+ For GET/PUT/DELETE with path IDs, use a placeholder — chaining resolves the real ID.
127
+ 2. Produces a \`scenario_<name>.json\` in the same \`outputDir\` as the test files (not \`.skyramp/\`).
128
+ 3. Call \`skyramp_integration_test_generation\` with \`scenarioFile\` AND \`authHeader: "${authHeaderValue}"\`.
129
+ Do NOT pass \`chainingKey\` — defaults to \`response.id\`. After generation, the testbot
130
+ will verify and fix path param chaining in the generated test.
131
+
132
+ **For single-endpoint tests (contract/fuzz):**
133
+ \`skyramp_{type}_test_generation\` with \`endpointURL\` (full URL incl. base + path), \`method\`,
134
+ \`authHeader: "${authHeaderValue}"\`, and \`requestData\` from source code schemas.
135
+ If an OpenAPI spec exists, ALSO pass \`apiSchema\` — it enables schema-aware validation
136
+ (contract tests verify response structure, fuzz tests generate smarter boundary values).
137
+ Without a spec, \`endpointURL\` alone is sufficient.
138
+
139
+ **For UI tests (no Playwright recording):**
140
+ 1. \`skyramp_start_trace_collection\` (playwright: true)
141
+ 2. Perform browser steps
142
+ 3. \`skyramp_stop_trace_collection\`
143
+ 4. \`skyramp_ui_test_generation\` with playwright zip
144
+
145
+ **For E2E tests:**
146
+ Same trace flow, pass both trace file and playwright zip to \`skyramp_e2e_test_generation\`.`;
147
+ }
148
+ export function buildCoverageChecklist(openApiSpec, isUIOnlyPR, topN, maxGenerate = MAX_TESTS_TO_GENERATE) {
149
+ const specNote = openApiSpec
150
+ ? `\n**OpenAPI Spec available**: \`${openApiSpec.path}\`
151
+ Use it actively:
152
+ - **Contract tests**: pass \`apiSchema: "${openApiSpec.path}"\` — the CLI validates response schemas against the spec.
153
+ - **Fuzz tests**: pass \`apiSchema: "${openApiSpec.path}"\` — the CLI generates boundary values from schema constraints.
154
+ - **Integration tests**: pass \`apiSchema\` to \`skyramp_scenario_test_generation\` — it extracts destination and request/response shapes.
155
+ - **Single-endpoint tests**: pass both \`endpointURL\` AND \`apiSchema\` for schema-aware generation.
156
+ \n`
157
+ : "";
158
+ return `## Coverage Checklist
159
+ ${specNote}
160
+ ${isUIOnlyPR ? `**UI-only PR** — no backend changes.
161
+ Without Playwright traces, the testbot skips generation entirely — all recommendations
162
+ become additionalRecommendations in the report.
163
+ ` : `**Available test types:** integration, fuzz, contract, E2E, UI. No smoke tests.
164
+ Choose based on what adds the most value for this PR's changes.
165
+ `}
166
+ ## For Each Recommendation Include:
167
+ 1. Test type 2. Priority (high/medium/low) 3. Target endpoint/scenario
168
+ 4. What it validates (business logic, not just "tests the endpoint")
169
+ 5. Skyramp tool call details — exact tool + key params for zero-editing execution
170
+ 6. For integration/E2E: reference draftedScenario by scenarioName
171
+
172
+ ## When Artifacts Are Missing
173
+ Recommend the test anyway — never mark it "blocked":
174
+ - **No OpenAPI spec** \u2192 use \`endpointURL\` and \`requestBody\` from source code
175
+ - **No Playwright recording** \u2192 provide trace recording instructions
176
+ - **No backend trace** \u2192 use the scenario generation pipeline
177
+
178
+ ## Select the Top ${topN}
179
+ Consider all possible tests (endpoints \u00d7 interaction types + scenarios), then select the
180
+ top ${topN} most valuable. Include \`totalConsidered\` count in your output. The top ${Math.min(maxGenerate, topN)} will
181
+ be generated; ${topN > maxGenerate ? `recommendations #${Math.min(maxGenerate, topN) + 1}\u2013${topN} go to additionalRecommendations in the report,
182
+ so ensure the top ${Math.min(maxGenerate, topN)} are the highest-impact tests.` : `all will be generated.`}
183
+
184
+ - Each integration scenario's step sequence should be logically valid — preconditions
185
+ met by prior steps.
186
+
187
+ Each recommendation should include enough detail for direct tool invocation.
188
+ Reference draftedScenarios by name and interactions by description.
189
+ Use "high"/"medium"/"low" for priority — no numeric scores.
190
+ Total candidates should be \u2265 ${topN}.
191
+
192
+ Generate recommendations now.`;
193
+ }