@skyramp/mcp 0.0.62 → 0.0.63-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +18 -26
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +59 -0
- package/build/prompts/test-maintenance/driftAnalysisSections.js +153 -0
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +21 -9
- package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +34 -38
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +56 -9
- package/build/prompts/testbot/testbot-prompts.js +113 -100
- package/build/services/DriftAnalysisService.js +1 -1
- package/build/services/ScenarioGenerationService.js +5 -1
- package/build/services/TestExecutionService.js +2 -24
- package/build/services/TestExecutionService.test.js +167 -0
- package/build/services/containerEnv.js +35 -0
- package/build/tools/generate-tests/generateScenarioRestTool.js +7 -1
- package/build/tools/submitReportTool.js +6 -6
- package/build/tools/test-management/actionsTool.js +396 -0
- package/build/tools/test-management/analyzeChangesTool.js +750 -0
- package/build/tools/test-management/analyzeTestHealthTool.js +132 -0
- package/build/tools/test-management/executeTestsTool.js +198 -0
- package/build/tools/test-management/index.js +5 -0
- package/build/tools/test-management/stateCleanupTool.js +163 -0
- package/build/tools/test-recommendation/recommendTestsTool.js +1 -1
- package/build/utils/analyze-openapi.js +2 -2
- package/build/utils/pr-comment-parser.js +157 -36
- package/build/utils/pr-comment-parser.test.js +427 -0
- package/package.json +1 -1
- package/build/tools/initTestbotTool.js +0 -187
- package/build/tools/initTestbotTool.test.js +0 -194
- package/build/tools/test-recommendation/analyzeRepositoryTool.js +0 -505
package/build/index.js
CHANGED
|
@@ -18,20 +18,12 @@ import { registerE2ETestTool } from "./tools/generate-tests/generateE2ERestTool.
|
|
|
18
18
|
import { registerLoginTool } from "./tools/auth/loginTool.js";
|
|
19
19
|
import { registerLogoutTool } from "./tools/auth/logoutTool.js";
|
|
20
20
|
import { registerFixErrorTool } from "./tools/fixErrorTool.js";
|
|
21
|
-
import { registerAnalyzeRepositoryTool } from "./tools/test-recommendation/analyzeRepositoryTool.js";
|
|
22
|
-
import { registerRecommendTestsTool } from "./tools/test-recommendation/recommendTestsTool.js";
|
|
23
21
|
import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
|
|
24
22
|
import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
|
|
25
23
|
import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
|
|
26
24
|
import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
|
|
27
|
-
import {
|
|
28
|
-
import { registerAnalyzeTestDriftTool } from "./tools/test-maintenance/analyzeTestDriftTool.js";
|
|
29
|
-
import { registerExecuteBatchTestsTool } from "./tools/test-maintenance/executeBatchTestsTool.js";
|
|
30
|
-
import { registerCalculateHealthScoresTool } from "./tools/test-maintenance/calculateHealthScoresTool.js";
|
|
31
|
-
import { registerActionsTool } from "./tools/test-maintenance/actionsTool.js";
|
|
32
|
-
import { registerStateCleanupTool } from "./tools/test-maintenance/stateCleanupTool.js";
|
|
25
|
+
import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerExecuteTestsTool, registerActionsTool, registerStateCleanupTool, } from "./tools/test-management/index.js";
|
|
33
26
|
import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
|
|
34
|
-
import { registerInitTestbotTool } from "./tools/initTestbotTool.js";
|
|
35
27
|
import { registerSubmitReportTool } from "./tools/submitReportTool.js";
|
|
36
28
|
import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
|
|
37
29
|
import { registerAnalysisResources } from "./resources/analysisResources.js";
|
|
@@ -58,15 +50,20 @@ const server = new McpServer({
|
|
|
58
50
|
- NEVER show CLI commands. ALWAYS use the MCP tools provided.
|
|
59
51
|
- For UI and E2E tests, use the trace collection start/stop tools.
|
|
60
52
|
|
|
61
|
-
## Test
|
|
62
|
-
|
|
63
|
-
The analysis scans source code (code-first) to build enriched endpoints
|
|
64
|
-
(Path → Method → Interaction with request/response bodies, headers, cookies)
|
|
65
|
-
and draft user-flow scenarios for integration/E2E tests.
|
|
66
|
-
2. Call \`skyramp_recommend_tests\` with \`sessionId\` → the LLM reasons over the
|
|
67
|
-
enriched data to recommend tests, referencing specific interactions and scenarios.
|
|
53
|
+
## Test Management Flow
|
|
54
|
+
Use \`skyramp_analyze_changes\` as the single entry point for both test recommendations and test health analysis.
|
|
68
55
|
|
|
69
|
-
|
|
56
|
+
### Recommendations (2-step)
|
|
57
|
+
1. Call \`skyramp_analyze_changes\` with \`repositoryPath\` and \`scope\` → scans endpoints, computes branch diff, discovers existing tests → returns a \`stateFile\` **and** inline ranked test recommendations.
|
|
58
|
+
2. (Optional) Use the \`skyramp_recommend_tests\` prompt with \`stateFile\` in Cursor/IDE chat for a focused recommendation view.
|
|
59
|
+
|
|
60
|
+
### Health Analysis (4-step)
|
|
61
|
+
1. Call \`skyramp_analyze_changes\` with \`repositoryPath\` and \`scope\` → returns a \`stateFile\`.
|
|
62
|
+
2. Call \`skyramp_analyze_test_health\` with \`stateFile\` → runs drift analysis + health scoring + LLM semantic assessment.
|
|
63
|
+
3. (Optional) Call \`skyramp_execute_tests\` with \`stateFile\` → runs tests live to verify status.
|
|
64
|
+
4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations.
|
|
65
|
+
|
|
66
|
+
After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use the \`sessionId\` returned in the output):
|
|
70
67
|
- \`skyramp://analysis/{sessionId}/summary\` — high-level overview
|
|
71
68
|
- \`skyramp://analysis/{sessionId}/endpoints\` — compact endpoint listing
|
|
72
69
|
- \`skyramp://analysis/{sessionId}/endpoints/{path}\` — full path detail
|
|
@@ -183,16 +180,12 @@ const codeQualityTools = [
|
|
|
183
180
|
registerCodeReuseTool,
|
|
184
181
|
];
|
|
185
182
|
codeQualityTools.forEach((registerTool) => registerTool(server));
|
|
186
|
-
// Register test recommendation tools
|
|
187
|
-
registerAnalyzeRepositoryTool(server);
|
|
188
|
-
registerRecommendTestsTool(server);
|
|
189
183
|
// Register analysis resources (MCP Resources for enriched data access)
|
|
190
184
|
registerAnalysisResources(server);
|
|
191
|
-
// Register test maintenance tools
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
registerCalculateHealthScoresTool(server);
|
|
185
|
+
// Register unified test-management tools (replaces separate test-maintenance tools)
|
|
186
|
+
registerAnalyzeChangesTool(server);
|
|
187
|
+
registerAnalyzeTestHealthTool(server);
|
|
188
|
+
registerExecuteTestsTool(server);
|
|
196
189
|
registerActionsTool(server);
|
|
197
190
|
registerStateCleanupTool(server);
|
|
198
191
|
// Register workspace management tools
|
|
@@ -206,7 +199,6 @@ const infrastructureTools = [
|
|
|
206
199
|
registerTraceStopTool,
|
|
207
200
|
];
|
|
208
201
|
if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
|
|
209
|
-
infrastructureTools.push(registerInitTestbotTool);
|
|
210
202
|
infrastructureTools.push(registerSubmitReportTool);
|
|
211
203
|
logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
|
|
212
204
|
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { buildDriftScoringGuide, buildActionDecisionMatrix, buildBreakingChangePatterns, buildTestAssessmentGuidelines, buildAddRecommendationGuidelines, buildDriftOutputChecklist, } from "./driftAnalysisSections.js";
|
|
2
|
+
export function buildDriftAnalysisPrompt(params) {
|
|
3
|
+
const { existingTests, parsedDiff, scannedEndpoints, repositoryPath, stateFile } = params;
|
|
4
|
+
// Detect new endpoints count from parsedDiff
|
|
5
|
+
let newEndpointCount = 0;
|
|
6
|
+
let diffSection = "";
|
|
7
|
+
if (parsedDiff) {
|
|
8
|
+
const lines = parsedDiff.split("\n");
|
|
9
|
+
const epMatches = parsedDiff.match(/(?:^|\n)\*\*(GET|POST|PUT|PATCH|DELETE)\s+[^\*]+\*\*/gm);
|
|
10
|
+
if (epMatches)
|
|
11
|
+
newEndpointCount = epMatches.length;
|
|
12
|
+
diffSection = `## Branch Diff
|
|
13
|
+
\`\`\`
|
|
14
|
+
${lines.slice(0, 200).join("\n")}
|
|
15
|
+
\`\`\`
|
|
16
|
+
`;
|
|
17
|
+
}
|
|
18
|
+
const testListSection = existingTests.length > 0
|
|
19
|
+
? `## Existing Test Files (${existingTests.length})
|
|
20
|
+
${existingTests
|
|
21
|
+
.map((t) => {
|
|
22
|
+
const score = t.drift?.driftScore !== undefined ? ` [drift: ${t.drift.driftScore}]` : "";
|
|
23
|
+
return `- ${t.testFile} (${t.testType})${score}`;
|
|
24
|
+
})
|
|
25
|
+
.join("\n")}
|
|
26
|
+
`
|
|
27
|
+
: `## Existing Test Files
|
|
28
|
+
No existing Skyramp tests found in repository.
|
|
29
|
+
`;
|
|
30
|
+
const scannedSection = scannedEndpoints.length > 0
|
|
31
|
+
? `## Scanned Endpoints (${scannedEndpoints.length})
|
|
32
|
+
${scannedEndpoints.map((ep) => `- ${Array.isArray(ep.methods) ? ep.methods.join("|") : ep.method} ${ep.path}`).join("\n")}
|
|
33
|
+
`
|
|
34
|
+
: "";
|
|
35
|
+
return `# Test Health Analysis
|
|
36
|
+
|
|
37
|
+
**Repository**: \`${repositoryPath}\`
|
|
38
|
+
**Existing tests**: ${existingTests.length}
|
|
39
|
+
**New endpoints in diff**: ${newEndpointCount}
|
|
40
|
+
|
|
41
|
+
${diffSection}
|
|
42
|
+
${testListSection}
|
|
43
|
+
${scannedSection}
|
|
44
|
+
${buildDriftScoringGuide()}
|
|
45
|
+
|
|
46
|
+
${buildActionDecisionMatrix()}
|
|
47
|
+
|
|
48
|
+
${buildBreakingChangePatterns()}
|
|
49
|
+
|
|
50
|
+
${buildTestAssessmentGuidelines()}
|
|
51
|
+
|
|
52
|
+
${buildAddRecommendationGuidelines()}
|
|
53
|
+
|
|
54
|
+
${buildDriftOutputChecklist(existingTests.length, newEndpointCount)}
|
|
55
|
+
|
|
56
|
+
After completing the assessment above, call \`skyramp_actions\` with \`stateFile: "${stateFile}"\`
|
|
57
|
+
|
|
58
|
+
**CRITICAL**: Do NOT create any .json or .md files. Only call skyramp_actions when done.`;
|
|
59
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Modular section builders for the Drift Analysis prompt,
|
|
3
|
+
* mirroring the recommendationSections.ts pattern.
|
|
4
|
+
*/
|
|
5
|
+
export function buildDriftScoringGuide() {
|
|
6
|
+
return `## Drift Score Guide (0–100)
|
|
7
|
+
|
|
8
|
+
| Score | Label | Meaning |
|
|
9
|
+
|-------|-------|---------|
|
|
10
|
+
| 0–20 | IGNORE | No meaningful drift — test is still valid as-is |
|
|
11
|
+
| 21–40 | VERIFY | Minor changes detected — review but likely fine |
|
|
12
|
+
| 41–70 | UPDATE | Breaking changes detected — test needs edits |
|
|
13
|
+
| 71–100 | REGENERATE | Major structural changes — regenerate from scratch |
|
|
14
|
+
|
|
15
|
+
Assign each existing test a score based on how much the codebase has changed relative to what the test expects.`;
|
|
16
|
+
}
|
|
17
|
+
export function buildActionDecisionMatrix() {
|
|
18
|
+
return `## Action Decision Matrix
|
|
19
|
+
|
|
20
|
+
For each test, choose one of:
|
|
21
|
+
|
|
22
|
+
| Action | When to use |
|
|
23
|
+
|--------|------------|
|
|
24
|
+
| **IGNORE** | Drift score 0–20; no breaking changes AND no additive field gaps detected |
|
|
25
|
+
| **VERIFY** | Drift score 21–40; minor changes, manual review recommended |
|
|
26
|
+
| **UPDATE** | Drift score 25–70; breaking changes OR additive fields added to a covered endpoint (new response field the test doesn't assert) |
|
|
27
|
+
| **REGENERATE** | Drift score 71–100; endpoint removed, major restructuring, or test is fundamentally broken |
|
|
28
|
+
| **ADD** | New endpoint detected in diff that has no corresponding test yet |
|
|
29
|
+
|
|
30
|
+
Rules:
|
|
31
|
+
- Prefer UPDATE over REGENERATE when changes are localized (e.g., only the URL path changed).
|
|
32
|
+
- Prefer IGNORE over VERIFY when all changed files are unrelated to the test's endpoint.
|
|
33
|
+
- Always use ADD for new endpoints when the action is scoped to new test creation.
|
|
34
|
+
- **Additive changes (new response fields) on a covered endpoint always trigger UPDATE** — even if existing assertions still pass. The test needs a new assertion for the added field.`;
|
|
35
|
+
}
|
|
36
|
+
export function buildBreakingChangePatterns() {
|
|
37
|
+
return `## Breaking Change Patterns to Detect
|
|
38
|
+
|
|
39
|
+
Scan the diff lines for these high-signal patterns:
|
|
40
|
+
|
|
41
|
+
### Endpoint-level breaking changes
|
|
42
|
+
- \`- @app.route("/old-path")\` / \`+ @app.route("/new-path")\` — renamed endpoint
|
|
43
|
+
- \`- router.get("/old")\` / \`+ router.get("/new")\` — renamed route
|
|
44
|
+
- \`- @GetMapping("/old")\` / \`+ @GetMapping("/new")\` — Spring rename
|
|
45
|
+
- Lines removing a route decorator entirely (endpoint removed)
|
|
46
|
+
|
|
47
|
+
### Request/response shape changes
|
|
48
|
+
- Field type changes: \`- field: int\` → \`+ field: string\`
|
|
49
|
+
- Required field added: \`+ required: [..., "newField"]\`
|
|
50
|
+
- Response field removed: \`- "responseField":\`
|
|
51
|
+
- Enum value changes: \`- status: "active"\` → \`+ status: "enabled"\`
|
|
52
|
+
|
|
53
|
+
### Auth changes
|
|
54
|
+
- \`+ @require_auth\`, \`+ @login_required\`, \`+ middleware(authMiddleware)\`
|
|
55
|
+
- \`- @require_auth\` (auth removed)
|
|
56
|
+
- Token type changed: Bearer → Cookie
|
|
57
|
+
|
|
58
|
+
### Status code changes
|
|
59
|
+
- \`- return 200\` → \`+ return 201\`
|
|
60
|
+
- \`- status_code=200\` → \`+ status_code=204\`
|
|
61
|
+
- \`- res.status(201)\` → \`+ res.status(200)\`
|
|
62
|
+
|
|
63
|
+
### Additive response field changes (non-breaking but coverage gap)
|
|
64
|
+
These do NOT break existing assertions but leave the new field untested. Always flag as UPDATE for covered endpoints.
|
|
65
|
+
- \`+ "newField": queryset.filter(...).count()\` added inside a \`Response({...})\` or \`res.json({...})\`
|
|
66
|
+
- \`+ newField = serializers.XXXField()\` added to a serializer used by a tested endpoint
|
|
67
|
+
- \`+ "newField":\` added to a response body dict returned by the endpoint
|
|
68
|
+
- New key added inside an existing dict/object returned by the endpoint`;
|
|
69
|
+
}
|
|
70
|
+
export function buildTestAssessmentGuidelines() {
|
|
71
|
+
return `## Per-Test Assessment (4 Steps)
|
|
72
|
+
|
|
73
|
+
For each existing test file, follow these steps:
|
|
74
|
+
|
|
75
|
+
### Step 1: Check endpoint existence
|
|
76
|
+
Does the endpoint the test targets still exist in the codebase?
|
|
77
|
+
- If the endpoint path/method is no longer present → score 80+, action: REGENERATE
|
|
78
|
+
- If the endpoint was renamed → score 50–70, action: UPDATE (path substitution)
|
|
79
|
+
|
|
80
|
+
### Step 2: Check request/response shape (breaking changes)
|
|
81
|
+
Has the request body or response structure changed in a way that breaks the test?
|
|
82
|
+
- Compare test's expected fields against current schema/model definitions
|
|
83
|
+
- Type changes (string→int, int→string) → score 60+, action: UPDATE or REGENERATE
|
|
84
|
+
- New required fields the test doesn't send → score 50+, action: UPDATE
|
|
85
|
+
- Response fields the test asserts on have been removed → score 50+, action: UPDATE
|
|
86
|
+
|
|
87
|
+
### Step 2b: Check additive response field changes (coverage gaps)
|
|
88
|
+
**Even if existing assertions still pass**, does the diff add a new field to the response of an endpoint this test already covers?
|
|
89
|
+
- Look at the diff for lines like \`+ "newField":\` or \`+ newField =\` inside a view/serializer this test hits
|
|
90
|
+
- If YES → score 30, action: UPDATE — add an assertion for the new field (e.g. \`assert "newField" in response_body\` or \`assert response_body["newField"] >= 0\`)
|
|
91
|
+
- This applies even when the test only checks status codes — the test should be extended to cover the new field
|
|
92
|
+
- **Do NOT score IGNORE if a new response field was added to a covered endpoint**
|
|
93
|
+
|
|
94
|
+
### Step 3: Check auth changes
|
|
95
|
+
Has the authentication mechanism for this endpoint changed?
|
|
96
|
+
- Auth added where none existed → score 40+, action: UPDATE
|
|
97
|
+
- Auth method changed (bearer→cookie) → score 50+, action: UPDATE
|
|
98
|
+
- Auth removed → score 30+, action: VERIFY or UPDATE
|
|
99
|
+
|
|
100
|
+
### Step 4: Assign score and action
|
|
101
|
+
Based on the above, assign a final drift score 0–100 and choose the action (IGNORE / VERIFY / UPDATE / REGENERATE).
|
|
102
|
+
Provide a 1-2 sentence rationale.
|
|
103
|
+
- If Step 2b flagged an additive field → score must be ≥ 30 and action must be UPDATE, even if Steps 2/3 found no breaking changes.`;
|
|
104
|
+
}
|
|
105
|
+
export function buildAddRecommendationGuidelines() {
|
|
106
|
+
return `## ADD Recommendations for New Endpoints
|
|
107
|
+
|
|
108
|
+
For each new endpoint detected in the diff (not yet covered by any existing test):
|
|
109
|
+
|
|
110
|
+
### Test type priority by HTTP method
|
|
111
|
+
| Method | Recommended test types |
|
|
112
|
+
|--------|----------------------|
|
|
113
|
+
| POST / PUT / PATCH | integration, contract |
|
|
114
|
+
| GET | contract, smoke |
|
|
115
|
+
| DELETE | integration, smoke |
|
|
116
|
+
|
|
117
|
+
### ADD recommendation format
|
|
118
|
+
For each new endpoint, include:
|
|
119
|
+
1. The endpoint path and method
|
|
120
|
+
2. The recommended test types (from the table above)
|
|
121
|
+
3. The Skyramp tool to call (e.g., \`skyramp_contract_test_generation\`, \`skyramp_integration_test_generation\`)
|
|
122
|
+
4. The \`endpointURL\` to use (combine base URL + path)
|
|
123
|
+
5. The language/framework to use (from workspace config or project metadata)`;
|
|
124
|
+
}
|
|
125
|
+
export function buildDriftOutputChecklist(existingTestCount, newEndpointCount) {
|
|
126
|
+
return `## Output Checklist
|
|
127
|
+
|
|
128
|
+
Complete ALL of the following before calling skyramp_actions:
|
|
129
|
+
|
|
130
|
+
### Existing tests (${existingTestCount} total)
|
|
131
|
+
For EACH existing test, output:
|
|
132
|
+
\`\`\`
|
|
133
|
+
Test: <testFile>
|
|
134
|
+
Drift Score: <0-100>
|
|
135
|
+
Action: <IGNORE | VERIFY | UPDATE | REGENERATE>
|
|
136
|
+
Rationale: <1-2 sentence explanation>
|
|
137
|
+
\`\`\`
|
|
138
|
+
|
|
139
|
+
${newEndpointCount > 0
|
|
140
|
+
? `### New endpoints (${newEndpointCount} detected)
|
|
141
|
+
For EACH new endpoint, output:
|
|
142
|
+
\`\`\`
|
|
143
|
+
Endpoint: <METHOD> <path>
|
|
144
|
+
Action: ADD
|
|
145
|
+
Test types: <contract | integration | smoke | ...>
|
|
146
|
+
Rationale: <1 sentence>
|
|
147
|
+
\`\`\``
|
|
148
|
+
: `### New endpoints
|
|
149
|
+
No new endpoints detected in this diff.`}
|
|
150
|
+
|
|
151
|
+
### Final step
|
|
152
|
+
After completing all assessments above, call \`skyramp_actions\` with the stateFile to execute the recommended changes.`;
|
|
153
|
+
}
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
function buildEnrichmentInstructions(p) {
|
|
2
2
|
const isDiffScope = p.analysisScope === "current_branch_diff";
|
|
3
|
+
const useHealthFlow = p.nextTool === "skyramp_analyze_test_health";
|
|
3
4
|
if (!isDiffScope) {
|
|
5
|
+
const nextStep = useHealthFlow
|
|
6
|
+
? `### Step 3: Identify tests at risk of drift
|
|
7
|
+
Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\``
|
|
8
|
+
: `### Step 3: Call recommend tests
|
|
9
|
+
Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
|
|
4
10
|
return `## Your Task — Enrich & Recommend (full repo)
|
|
5
11
|
|
|
6
12
|
### Step 1: Read key files
|
|
@@ -12,8 +18,7 @@ to understand the tech stack, endpoint shapes, auth mechanisms, and request/resp
|
|
|
12
18
|
Map how endpoints relate to each other — which POST creates resources consumed by other endpoints?
|
|
13
19
|
**Resolve nested/sub-router paths** from the Router Mounting section above.
|
|
14
20
|
|
|
15
|
-
|
|
16
|
-
Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
|
|
21
|
+
${nextStep}`;
|
|
17
22
|
}
|
|
18
23
|
const changedFiles = p.parsedDiff?.changedFiles.join(", ") ?? "";
|
|
19
24
|
const hasApiEndpoints = p.parsedDiff && (p.parsedDiff.newEndpoints.length > 0 || p.parsedDiff.modifiedEndpoints.length > 0);
|
|
@@ -29,14 +34,13 @@ Mounting context.`
|
|
|
29
34
|
UI-only PR — read changed components to find API calls (fetch, axios, hooks).`
|
|
30
35
|
: `### Step 2: Identify affected endpoints
|
|
31
36
|
No API route changes detected — read changed files to identify affected endpoints.`;
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
${changedFiles}
|
|
36
|
-
|
|
37
|
-
${step2}
|
|
37
|
+
const step3Content = useHealthFlow
|
|
38
|
+
? `### Step 3: Identify tests at risk of drift
|
|
39
|
+
Assess which existing tests may be broken by the changes in this diff.
|
|
38
40
|
|
|
39
|
-
### Step
|
|
41
|
+
### Step 4: Call analyze test health
|
|
42
|
+
Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\``
|
|
43
|
+
: `### Step 3: Draft integration scenarios
|
|
40
44
|
Draft multi-step scenarios simulating realistic user workflows:
|
|
41
45
|
- **Cross-resource data flow**: Foreign key relationships, parent→child creation, verification
|
|
42
46
|
- **Search/filter verification**: Create data, search, verify results
|
|
@@ -47,6 +51,14 @@ Draft multi-step scenarios simulating realistic user workflows:
|
|
|
47
51
|
|
|
48
52
|
### Step 4: Call recommend tests
|
|
49
53
|
Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
|
|
54
|
+
return `## Your Task — Enrich & Recommend (PR-scoped)
|
|
55
|
+
|
|
56
|
+
### Step 1: Read the changed files
|
|
57
|
+
${changedFiles}
|
|
58
|
+
|
|
59
|
+
${step2}
|
|
60
|
+
|
|
61
|
+
${step3Content}`;
|
|
50
62
|
}
|
|
51
63
|
export function buildAnalysisOutputText(p) {
|
|
52
64
|
const isDiffScope = p.analysisScope === "current_branch_diff";
|
|
@@ -1,62 +1,58 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
-
import { StateManager,
|
|
2
|
+
import { StateManager, } from "../../utils/AnalysisStateManager.js";
|
|
3
3
|
import { logger } from "../../utils/logger.js";
|
|
4
4
|
import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
|
|
5
|
-
import { getWorkspaceAuthHeader } from "../../utils/workspaceAuth.js";
|
|
6
5
|
export function registerRecommendTestsPrompt(server) {
|
|
7
6
|
server.registerPrompt("skyramp_recommend_tests", {
|
|
8
|
-
description: "Generate test recommendations from
|
|
9
|
-
"Provide a
|
|
7
|
+
description: "Generate ranked test recommendations from a test-management analysis. " +
|
|
8
|
+
"Provide a stateFile path from skyramp_analyze_changes.",
|
|
10
9
|
argsSchema: {
|
|
11
|
-
|
|
10
|
+
stateFile: z
|
|
12
11
|
.string()
|
|
13
|
-
.describe("
|
|
14
|
-
|
|
15
|
-
.
|
|
16
|
-
.
|
|
12
|
+
.describe("State file path returned by skyramp_analyze_changes"),
|
|
13
|
+
topN: z
|
|
14
|
+
.number()
|
|
15
|
+
.int()
|
|
16
|
+
.positive()
|
|
17
|
+
.default(10)
|
|
17
18
|
.optional()
|
|
18
|
-
.describe("
|
|
19
|
+
.describe("Maximum number of ranked recommendations to return (default: 10)"),
|
|
19
20
|
},
|
|
20
21
|
}, async (args) => {
|
|
21
|
-
const
|
|
22
|
-
if (!
|
|
23
|
-
throw new Error("
|
|
22
|
+
const stateFile = args.stateFile;
|
|
23
|
+
if (!stateFile) {
|
|
24
|
+
throw new Error("stateFile is required");
|
|
24
25
|
}
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
data = getSessionData(sessionId);
|
|
26
|
+
const mgr = StateManager.fromStatePath(stateFile);
|
|
27
|
+
if (!mgr.exists()) {
|
|
28
|
+
throw new Error(`State file "${stateFile}" not found. Run skyramp_analyze_changes first.`);
|
|
29
29
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
: StateManager.fromSessionId(sessionId);
|
|
35
|
-
if (!mgr.exists()) {
|
|
36
|
-
throw new Error(`Analysis session "${sessionId}" not found. Run skyramp_analyze_repository first.`);
|
|
37
|
-
}
|
|
38
|
-
data = await mgr.readData();
|
|
30
|
+
const fullState = await mgr.readFullState();
|
|
31
|
+
const state = fullState ?? null;
|
|
32
|
+
if (!state?.repositoryAnalysis?.fullAnalysis) {
|
|
33
|
+
throw new Error(`State file "${stateFile}" has no analysis data. Re-run skyramp_analyze_changes.`);
|
|
39
34
|
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
: undefined;
|
|
48
|
-
const prompt = buildRecommendationPrompt(data.analysis, scope, effectiveTopN, data.prContext, workspaceAuthHeader);
|
|
35
|
+
const { fullAnalysis, sessionId, wsAuthHeader } = state.repositoryAnalysis;
|
|
36
|
+
const repositoryPath = fullState?.metadata?.repositoryPath ?? "";
|
|
37
|
+
const analysisScope = state.analysisScope === "branch_diff"
|
|
38
|
+
? "current_branch_diff"
|
|
39
|
+
: "full_repo";
|
|
40
|
+
const effectiveTopN = args.topN;
|
|
41
|
+
const prompt = buildRecommendationPrompt(fullAnalysis, analysisScope, effectiveTopN, undefined, wsAuthHeader);
|
|
49
42
|
logger.info("Serving recommendation prompt via MCP Prompt", {
|
|
50
|
-
|
|
51
|
-
|
|
43
|
+
stateFile,
|
|
44
|
+
analysisScope,
|
|
52
45
|
});
|
|
46
|
+
const resourceLinks = sessionId
|
|
47
|
+
? `\nAvailable MCP Resources:\n- skyramp://analysis/${sessionId}/summary\n- skyramp://analysis/${sessionId}/endpoints\n- skyramp://analysis/${sessionId}/scenarios\n- skyramp://analysis/${sessionId}/diff\n`
|
|
48
|
+
: "";
|
|
53
49
|
return {
|
|
54
50
|
messages: [
|
|
55
51
|
{
|
|
56
52
|
role: "user",
|
|
57
53
|
content: {
|
|
58
54
|
type: "text",
|
|
59
|
-
text: `
|
|
55
|
+
text: `State file: ${stateFile}\nRepository: ${repositoryPath}\nScope: ${analysisScope}\n${resourceLinks}\n${prompt}`,
|
|
60
56
|
},
|
|
61
57
|
},
|
|
62
58
|
],
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { buildPrioritizationDimensions, buildTestExamples, buildTestPatternGuidelines, buildTestQualityCriteria, buildGenerationRules, buildToolWorkflows, buildCoverageChecklist, } from "./recommendationSections.js";
|
|
1
|
+
import { buildPrioritizationDimensions, buildTestExamples, buildTestPatternGuidelines, buildTestQualityCriteria, buildGenerationRules, buildToolWorkflows, buildCoverageChecklist, MAX_TESTS_TO_GENERATE, } from "./recommendationSections.js";
|
|
2
2
|
function formatTestLocations(locs) {
|
|
3
3
|
const entries = Object.entries(locs || {});
|
|
4
4
|
if (entries.length === 0)
|
|
@@ -10,11 +10,18 @@ export function buildRecommendationPrompt(analysis, analysisScope = "full_repo",
|
|
|
10
10
|
const isDiffScope = analysisScope === "current_branch_diff";
|
|
11
11
|
const diffContext = analysis.branchDiffContext;
|
|
12
12
|
const openApiSpec = analysis.artifacts?.openApiSpecs?.[0];
|
|
13
|
+
// ── Filter out bot-generated test files from changedFiles ──
|
|
14
|
+
// Prevents bot-committed test files from being treated as application changes
|
|
15
|
+
// on subsequent testbot runs on the same PR.
|
|
16
|
+
const SKYRAMP_TEST_FILE_PATTERN = /(?:_test|_smoke|_contract|_fuzz|_integration|_load|_e2e|_ui)\.[^/]+$|scenario_[^/]+\.json$/;
|
|
17
|
+
const filteredChangedFiles = diffContext
|
|
18
|
+
? diffContext.changedFiles.filter(f => !SKYRAMP_TEST_FILE_PATTERN.test(f))
|
|
19
|
+
: [];
|
|
13
20
|
// ── Frontend / UI change detection ──
|
|
14
21
|
const FRONTEND_FILE_PATTERN = /\.(tsx?|jsx?|vue|svelte|css|scss|less|html)$/;
|
|
15
22
|
const API_DIR_PATTERN = /\/(api|routes?|controllers?|routers?|handlers?|endpoints?|server)\//;
|
|
16
23
|
const hasFrontendChanges = isDiffScope && diffContext
|
|
17
|
-
?
|
|
24
|
+
? filteredChangedFiles.some(f => FRONTEND_FILE_PATTERN.test(f) &&
|
|
18
25
|
!API_DIR_PATTERN.test(f) &&
|
|
19
26
|
/\/(components?|pages?|views?|layouts?|app|src\/app|frontend|client|public|styles?)\//i.test(f))
|
|
20
27
|
: false;
|
|
@@ -62,7 +69,7 @@ ${endpointLines}
|
|
|
62
69
|
diffSection = `
|
|
63
70
|
## Branch Diff Context
|
|
64
71
|
Branch: \`${diffContext.currentBranch}\` → base: \`${diffContext.baseBranch}\`
|
|
65
|
-
Changed files: ${
|
|
72
|
+
Changed files: ${filteredChangedFiles.join(", ")}
|
|
66
73
|
New endpoints:
|
|
67
74
|
${fmtEps(diffContext.newEndpoints, (m) => `${m.sourceFile}, ${m.interactionCount} interactions`)}
|
|
68
75
|
Modified endpoints:
|
|
@@ -154,14 +161,54 @@ Use base URL: \`${analysis.apiEndpoints.baseUrl}\` and auth: \`${authHeaderValue
|
|
|
154
161
|
// ── PR History ──
|
|
155
162
|
let prHistorySection = "";
|
|
156
163
|
if (prContext && prContext.previousRecommendations.length > 0) {
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
164
|
+
const implemented = prContext.previousRecommendations.filter(r => r.status === "implemented");
|
|
165
|
+
const recommended = prContext.previousRecommendations.filter(r => r.status === "recommended");
|
|
166
|
+
let historyBody = "";
|
|
167
|
+
if (implemented.length > 0) {
|
|
168
|
+
const implLines = implemented
|
|
169
|
+
.map((r) => ` - ${r.testType} — ${r.endpoint}`)
|
|
170
|
+
.join("\n");
|
|
171
|
+
const fileLines = prContext.implementedTestFiles.length > 0
|
|
172
|
+
? `\nExisting test files (in working tree):\n${prContext.implementedTestFiles.map(f => " - \`" + f + "\`").join("\n")}\n`
|
|
173
|
+
: "";
|
|
174
|
+
historyBody += `### Previously Generated Tests
|
|
175
|
+
${implLines}${fileLines}`;
|
|
176
|
+
}
|
|
177
|
+
if (prContext.executionResults.length > 0) {
|
|
178
|
+
const resultLines = prContext.executionResults
|
|
179
|
+
.map((r) => ` - \`${r.testFile}\` — ${r.status}`)
|
|
180
|
+
.join("\n");
|
|
181
|
+
historyBody += `### Execution Results from Prior Run
|
|
182
|
+
${resultLines}
|
|
183
|
+
If a test failed previously, check whether the failure was environmental or a real bug,
|
|
184
|
+
and adjust the test approach if needed.
|
|
185
|
+
`;
|
|
186
|
+
}
|
|
187
|
+
if (recommended.length > 0) {
|
|
188
|
+
const recLines = recommended
|
|
189
|
+
.map((r) => ` - ${r.testType} — ${r.endpoint}${r.scenarioName ? ` (scenario: ${r.scenarioName})` : ""}`)
|
|
190
|
+
.join("\n");
|
|
191
|
+
historyBody += `
|
|
192
|
+
### Previously Recommended (not generated)
|
|
162
193
|
${recLines}
|
|
163
|
-
|
|
194
|
+
These were recommended but not generated in the prior run. Consider promoting them
|
|
195
|
+
to generation if they still apply to the current code changes.
|
|
164
196
|
`;
|
|
197
|
+
}
|
|
198
|
+
prHistorySection = `
|
|
199
|
+
## PR History (PR #${prContext.prNumber})
|
|
200
|
+
Tests from prior bot runs are still in the working tree — the maintenance pipeline
|
|
201
|
+
(Task 2) keeps them up to date. Use the history below to **avoid duplicating** existing
|
|
202
|
+
coverage and to fill gaps:
|
|
203
|
+
- **Do NOT re-recommend** tests listed under "Previously Generated Tests" — they already
|
|
204
|
+
exist and are maintained automatically.
|
|
205
|
+
- **Promote** previously recommended-but-not-generated tests into the top
|
|
206
|
+
${MAX_TESTS_TO_GENERATE} generation slots if they still apply to the current code.
|
|
207
|
+
- **Add new** recommendations only for endpoints or code paths introduced in the latest
|
|
208
|
+
commit that aren't covered by existing tests.
|
|
209
|
+
- If prior execution results show failures, note the issue but do not re-recommend
|
|
210
|
+
the test — Task 2 handles fixes for existing tests.
|
|
211
|
+
${historyBody}`;
|
|
165
212
|
}
|
|
166
213
|
// ── Compose all sections ──
|
|
167
214
|
const scopeNote = isDiffScope
|