@skyramp/mcp 0.0.62 → 0.0.63-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +11 -11
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +59 -0
- package/build/prompts/test-maintenance/driftAnalysisSections.js +153 -0
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +21 -9
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +9 -2
- package/build/prompts/testbot/testbot-prompts.js +106 -100
- package/build/services/ScenarioGenerationService.js +5 -1
- package/build/services/TestExecutionService.js +2 -24
- package/build/services/TestExecutionService.test.js +167 -0
- package/build/services/containerEnv.js +36 -0
- package/build/tools/generate-tests/generateScenarioRestTool.js +7 -1
- package/build/tools/submitReportTool.js +6 -6
- package/build/tools/test-management/actionsTool.js +389 -0
- package/build/tools/test-management/analyzeChangesTool.js +653 -0
- package/build/tools/test-management/analyzeTestHealthTool.js +134 -0
- package/build/tools/test-management/executeTestsTool.js +198 -0
- package/build/tools/test-management/index.js +5 -0
- package/build/tools/test-management/stateCleanupTool.js +163 -0
- package/build/utils/pr-comment-parser.js +156 -36
- package/package.json +1 -1
package/build/index.js
CHANGED
|
@@ -24,12 +24,7 @@ import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/regi
|
|
|
24
24
|
import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
|
|
25
25
|
import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
|
|
26
26
|
import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
|
|
27
|
-
import {
|
|
28
|
-
import { registerAnalyzeTestDriftTool } from "./tools/test-maintenance/analyzeTestDriftTool.js";
|
|
29
|
-
import { registerExecuteBatchTestsTool } from "./tools/test-maintenance/executeBatchTestsTool.js";
|
|
30
|
-
import { registerCalculateHealthScoresTool } from "./tools/test-maintenance/calculateHealthScoresTool.js";
|
|
31
|
-
import { registerActionsTool } from "./tools/test-maintenance/actionsTool.js";
|
|
32
|
-
import { registerStateCleanupTool } from "./tools/test-maintenance/stateCleanupTool.js";
|
|
27
|
+
import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerExecuteTestsTool, registerActionsTool, registerStateCleanupTool, } from "./tools/test-management/index.js";
|
|
33
28
|
import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
|
|
34
29
|
import { registerInitTestbotTool } from "./tools/initTestbotTool.js";
|
|
35
30
|
import { registerSubmitReportTool } from "./tools/submitReportTool.js";
|
|
@@ -66,6 +61,12 @@ const server = new McpServer({
|
|
|
66
61
|
2. Call \`skyramp_recommend_tests\` with \`sessionId\` → the LLM reasons over the
|
|
67
62
|
enriched data to recommend tests, referencing specific interactions and scenarios.
|
|
68
63
|
|
|
64
|
+
## Test Health Analysis Flow (4-step)
|
|
65
|
+
1. Call \`skyramp_analyze_changes\` with \`repositoryPath\` and \`scope\` → discovers existing tests, scans endpoints, computes branch diff → returns a \`stateFile\`.
|
|
66
|
+
2. Call \`skyramp_analyze_test_health\` with \`stateFile\` → runs drift analysis + health scoring + LLM semantic assessment → returns enriched \`stateFile\`.
|
|
67
|
+
3. (Optional) Call \`skyramp_execute_tests\` with \`stateFile\` → runs tests live to verify status.
|
|
68
|
+
4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations.
|
|
69
|
+
|
|
69
70
|
After analysis, you can also inspect data via MCP Resources:
|
|
70
71
|
- \`skyramp://analysis/{sessionId}/summary\` — high-level overview
|
|
71
72
|
- \`skyramp://analysis/{sessionId}/endpoints\` — compact endpoint listing
|
|
@@ -188,11 +189,10 @@ registerAnalyzeRepositoryTool(server);
|
|
|
188
189
|
registerRecommendTestsTool(server);
|
|
189
190
|
// Register analysis resources (MCP Resources for enriched data access)
|
|
190
191
|
registerAnalysisResources(server);
|
|
191
|
-
// Register test maintenance tools
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
registerCalculateHealthScoresTool(server);
|
|
192
|
+
// Register unified test-management tools (replaces separate test-maintenance tools)
|
|
193
|
+
registerAnalyzeChangesTool(server);
|
|
194
|
+
registerAnalyzeTestHealthTool(server);
|
|
195
|
+
registerExecuteTestsTool(server);
|
|
196
196
|
registerActionsTool(server);
|
|
197
197
|
registerStateCleanupTool(server);
|
|
198
198
|
// Register workspace management tools
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { buildDriftScoringGuide, buildActionDecisionMatrix, buildBreakingChangePatterns, buildTestAssessmentGuidelines, buildAddRecommendationGuidelines, buildDriftOutputChecklist, } from "./driftAnalysisSections.js";
|
|
2
|
+
export function buildDriftAnalysisPrompt(params) {
|
|
3
|
+
const { existingTests, parsedDiff, scannedEndpoints, repositoryPath, stateFile } = params;
|
|
4
|
+
// Detect new endpoints count from parsedDiff
|
|
5
|
+
let newEndpointCount = 0;
|
|
6
|
+
let diffSection = "";
|
|
7
|
+
if (parsedDiff) {
|
|
8
|
+
const lines = parsedDiff.split("\n");
|
|
9
|
+
const epMatches = parsedDiff.match(/(?:^|\n)\*\*(GET|POST|PUT|PATCH|DELETE)\s+[^\*]+\*\*/gm);
|
|
10
|
+
if (epMatches)
|
|
11
|
+
newEndpointCount = epMatches.length;
|
|
12
|
+
diffSection = `## Branch Diff
|
|
13
|
+
\`\`\`
|
|
14
|
+
${lines.slice(0, 200).join("\n")}
|
|
15
|
+
\`\`\`
|
|
16
|
+
`;
|
|
17
|
+
}
|
|
18
|
+
const testListSection = existingTests.length > 0
|
|
19
|
+
? `## Existing Test Files (${existingTests.length})
|
|
20
|
+
${existingTests
|
|
21
|
+
.map((t) => {
|
|
22
|
+
const score = t.drift?.driftScore !== undefined ? ` [drift: ${t.drift.driftScore}]` : "";
|
|
23
|
+
return `- ${t.testFile} (${t.testType})${score}`;
|
|
24
|
+
})
|
|
25
|
+
.join("\n")}
|
|
26
|
+
`
|
|
27
|
+
: `## Existing Test Files
|
|
28
|
+
No existing Skyramp tests found in repository.
|
|
29
|
+
`;
|
|
30
|
+
const scannedSection = scannedEndpoints.length > 0
|
|
31
|
+
? `## Scanned Endpoints (${scannedEndpoints.length})
|
|
32
|
+
${scannedEndpoints.map((ep) => `- ${Array.isArray(ep.methods) ? ep.methods.join("|") : ep.method} ${ep.path}`).join("\n")}
|
|
33
|
+
`
|
|
34
|
+
: "";
|
|
35
|
+
return `# Test Health Analysis
|
|
36
|
+
|
|
37
|
+
**Repository**: \`${repositoryPath}\`
|
|
38
|
+
**Existing tests**: ${existingTests.length}
|
|
39
|
+
**New endpoints in diff**: ${newEndpointCount}
|
|
40
|
+
|
|
41
|
+
${diffSection}
|
|
42
|
+
${testListSection}
|
|
43
|
+
${scannedSection}
|
|
44
|
+
${buildDriftScoringGuide()}
|
|
45
|
+
|
|
46
|
+
${buildActionDecisionMatrix()}
|
|
47
|
+
|
|
48
|
+
${buildBreakingChangePatterns()}
|
|
49
|
+
|
|
50
|
+
${buildTestAssessmentGuidelines()}
|
|
51
|
+
|
|
52
|
+
${buildAddRecommendationGuidelines()}
|
|
53
|
+
|
|
54
|
+
${buildDriftOutputChecklist(existingTests.length, newEndpointCount)}
|
|
55
|
+
|
|
56
|
+
After completing the assessment above, call \`skyramp_actions\` with \`stateFile: "${stateFile}"\`
|
|
57
|
+
|
|
58
|
+
**CRITICAL**: Do NOT create any .json or .md files. Only call skyramp_actions when done.`;
|
|
59
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Modular section builders for the Drift Analysis prompt,
|
|
3
|
+
* mirroring the recommendationSections.ts pattern.
|
|
4
|
+
*/
|
|
5
|
+
export function buildDriftScoringGuide() {
|
|
6
|
+
return `## Drift Score Guide (0–100)
|
|
7
|
+
|
|
8
|
+
| Score | Label | Meaning |
|
|
9
|
+
|-------|-------|---------|
|
|
10
|
+
| 0–20 | IGNORE | No meaningful drift — test is still valid as-is |
|
|
11
|
+
| 21–40 | VERIFY | Minor changes detected — review but likely fine |
|
|
12
|
+
| 41–70 | UPDATE | Breaking changes detected — test needs edits |
|
|
13
|
+
| 71–100 | REGENERATE | Major structural changes — regenerate from scratch |
|
|
14
|
+
|
|
15
|
+
Assign each existing test a score based on how much the codebase has changed relative to what the test expects.`;
|
|
16
|
+
}
|
|
17
|
+
export function buildActionDecisionMatrix() {
|
|
18
|
+
return `## Action Decision Matrix
|
|
19
|
+
|
|
20
|
+
For each test, choose one of:
|
|
21
|
+
|
|
22
|
+
| Action | When to use |
|
|
23
|
+
|--------|------------|
|
|
24
|
+
| **IGNORE** | Drift score 0–20; no breaking changes AND no additive field gaps detected |
|
|
25
|
+
| **VERIFY** | Drift score 21–40; minor changes, manual review recommended |
|
|
26
|
+
| **UPDATE** | Drift score 25–70; breaking changes OR additive fields added to a covered endpoint (new response field the test doesn't assert) |
|
|
27
|
+
| **REGENERATE** | Drift score 71–100; endpoint removed, major restructuring, or test is fundamentally broken |
|
|
28
|
+
| **ADD** | New endpoint detected in diff that has no corresponding test yet |
|
|
29
|
+
|
|
30
|
+
Rules:
|
|
31
|
+
- Prefer UPDATE over REGENERATE when changes are localized (e.g., only the URL path changed).
|
|
32
|
+
- Prefer IGNORE over VERIFY when all changed files are unrelated to the test's endpoint.
|
|
33
|
+
- Always use ADD for new endpoints when the action is scoped to new test creation.
|
|
34
|
+
- **Additive changes (new response fields) on a covered endpoint always trigger UPDATE** — even if existing assertions still pass. The test needs a new assertion for the added field.`;
|
|
35
|
+
}
|
|
36
|
+
export function buildBreakingChangePatterns() {
|
|
37
|
+
return `## Breaking Change Patterns to Detect
|
|
38
|
+
|
|
39
|
+
Scan the diff lines for these high-signal patterns:
|
|
40
|
+
|
|
41
|
+
### Endpoint-level breaking changes
|
|
42
|
+
- \`- @app.route("/old-path")\` / \`+ @app.route("/new-path")\` — renamed endpoint
|
|
43
|
+
- \`- router.get("/old")\` / \`+ router.get("/new")\` — renamed route
|
|
44
|
+
- \`- @GetMapping("/old")\` / \`+ @GetMapping("/new")\` — Spring rename
|
|
45
|
+
- Lines removing a route decorator entirely (endpoint removed)
|
|
46
|
+
|
|
47
|
+
### Request/response shape changes
|
|
48
|
+
- Field type changes: \`- field: int\` → \`+ field: string\`
|
|
49
|
+
- Required field added: \`+ required: [..., "newField"]\`
|
|
50
|
+
- Response field removed: \`- "responseField":\`
|
|
51
|
+
- Enum value changes: \`- status: "active"\` → \`+ status: "enabled"\`
|
|
52
|
+
|
|
53
|
+
### Auth changes
|
|
54
|
+
- \`+ @require_auth\`, \`+ @login_required\`, \`+ middleware(authMiddleware)\`
|
|
55
|
+
- \`- @require_auth\` (auth removed)
|
|
56
|
+
- Token type changed: Bearer → Cookie
|
|
57
|
+
|
|
58
|
+
### Status code changes
|
|
59
|
+
- \`- return 200\` → \`+ return 201\`
|
|
60
|
+
- \`- status_code=200\` → \`+ status_code=204\`
|
|
61
|
+
- \`- res.status(201)\` → \`+ res.status(200)\`
|
|
62
|
+
|
|
63
|
+
### Additive response field changes (non-breaking but coverage gap)
|
|
64
|
+
These do NOT break existing assertions but leave the new field untested. Always flag as UPDATE for covered endpoints.
|
|
65
|
+
- \`+ "newField": queryset.filter(...).count()\` added inside a \`Response({...})\` or \`res.json({...})\`
|
|
66
|
+
- \`+ newField = serializers.XXXField()\` added to a serializer used by a tested endpoint
|
|
67
|
+
- \`+ "newField":\` added to a response body dict returned by the endpoint
|
|
68
|
+
- New key added inside an existing dict/object returned by the endpoint`;
|
|
69
|
+
}
|
|
70
|
+
export function buildTestAssessmentGuidelines() {
|
|
71
|
+
return `## Per-Test Assessment (4 Steps)
|
|
72
|
+
|
|
73
|
+
For each existing test file, follow these steps:
|
|
74
|
+
|
|
75
|
+
### Step 1: Check endpoint existence
|
|
76
|
+
Does the endpoint the test targets still exist in the codebase?
|
|
77
|
+
- If the endpoint path/method is no longer present → score 80+, action: REGENERATE
|
|
78
|
+
- If the endpoint was renamed → score 50–70, action: UPDATE (path substitution)
|
|
79
|
+
|
|
80
|
+
### Step 2: Check request/response shape (breaking changes)
|
|
81
|
+
Has the request body or response structure changed in a way that breaks the test?
|
|
82
|
+
- Compare test's expected fields against current schema/model definitions
|
|
83
|
+
- Type changes (string→int, int→string) → score 60+, action: UPDATE or REGENERATE
|
|
84
|
+
- New required fields the test doesn't send → score 50+, action: UPDATE
|
|
85
|
+
- Response fields the test asserts on have been removed → score 50+, action: UPDATE
|
|
86
|
+
|
|
87
|
+
### Step 2b: Check additive response field changes (coverage gaps)
|
|
88
|
+
**Even if existing assertions still pass**, does the diff add a new field to the response of an endpoint this test already covers?
|
|
89
|
+
- Look at the diff for lines like \`+ "newField":\` or \`+ newField =\` inside a view/serializer this test hits
|
|
90
|
+
- If YES → score 30, action: UPDATE — add an assertion for the new field (e.g. \`assert "newField" in response_body\` or \`assert response_body["newField"] >= 0\`)
|
|
91
|
+
- This applies even when the test only checks status codes — the test should be extended to cover the new field
|
|
92
|
+
- **Do NOT score IGNORE if a new response field was added to a covered endpoint**
|
|
93
|
+
|
|
94
|
+
### Step 3: Check auth changes
|
|
95
|
+
Has the authentication mechanism for this endpoint changed?
|
|
96
|
+
- Auth added where none existed → score 40+, action: UPDATE
|
|
97
|
+
- Auth method changed (bearer→cookie) → score 50+, action: UPDATE
|
|
98
|
+
- Auth removed → score 30+, action: VERIFY or UPDATE
|
|
99
|
+
|
|
100
|
+
### Step 4: Assign score and action
|
|
101
|
+
Based on the above, assign a final drift score 0–100 and choose the action (IGNORE / VERIFY / UPDATE / REGENERATE).
|
|
102
|
+
Provide a 1-2 sentence rationale.
|
|
103
|
+
- If Step 2b flagged an additive field → score must be ≥ 30 and action must be UPDATE, even if Steps 2/3 found no breaking changes.`;
|
|
104
|
+
}
|
|
105
|
+
export function buildAddRecommendationGuidelines() {
|
|
106
|
+
return `## ADD Recommendations for New Endpoints
|
|
107
|
+
|
|
108
|
+
For each new endpoint detected in the diff (not yet covered by any existing test):
|
|
109
|
+
|
|
110
|
+
### Test type priority by HTTP method
|
|
111
|
+
| Method | Recommended test types |
|
|
112
|
+
|--------|----------------------|
|
|
113
|
+
| POST / PUT / PATCH | integration, contract |
|
|
114
|
+
| GET | contract, smoke |
|
|
115
|
+
| DELETE | integration, smoke |
|
|
116
|
+
|
|
117
|
+
### ADD recommendation format
|
|
118
|
+
For each new endpoint, include:
|
|
119
|
+
1. The endpoint path and method
|
|
120
|
+
2. The recommended test types (from the table above)
|
|
121
|
+
3. The Skyramp tool to call (e.g., \`skyramp_contract_test_generation\`, \`skyramp_integration_test_generation\`)
|
|
122
|
+
4. The \`endpointURL\` to use (combine base URL + path)
|
|
123
|
+
5. The language/framework to use (from workspace config or project metadata)`;
|
|
124
|
+
}
|
|
125
|
+
export function buildDriftOutputChecklist(existingTestCount, newEndpointCount) {
|
|
126
|
+
return `## Output Checklist
|
|
127
|
+
|
|
128
|
+
Complete ALL of the following before calling skyramp_actions:
|
|
129
|
+
|
|
130
|
+
### Existing tests (${existingTestCount} total)
|
|
131
|
+
For EACH existing test, output:
|
|
132
|
+
\`\`\`
|
|
133
|
+
Test: <testFile>
|
|
134
|
+
Drift Score: <0-100>
|
|
135
|
+
Action: <IGNORE | VERIFY | UPDATE | REGENERATE>
|
|
136
|
+
Rationale: <1-2 sentence explanation>
|
|
137
|
+
\`\`\`
|
|
138
|
+
|
|
139
|
+
${newEndpointCount > 0
|
|
140
|
+
? `### New endpoints (${newEndpointCount} detected)
|
|
141
|
+
For EACH new endpoint, output:
|
|
142
|
+
\`\`\`
|
|
143
|
+
Endpoint: <METHOD> <path>
|
|
144
|
+
Action: ADD
|
|
145
|
+
Test types: <contract | integration | smoke | ...>
|
|
146
|
+
Rationale: <1 sentence>
|
|
147
|
+
\`\`\``
|
|
148
|
+
: `### New endpoints
|
|
149
|
+
No new endpoints detected in this diff.`}
|
|
150
|
+
|
|
151
|
+
### Final step
|
|
152
|
+
After completing all assessments above, call \`skyramp_actions\` with the stateFile to execute the recommended changes.`;
|
|
153
|
+
}
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
function buildEnrichmentInstructions(p) {
|
|
2
2
|
const isDiffScope = p.analysisScope === "current_branch_diff";
|
|
3
|
+
const useHealthFlow = p.nextTool === "skyramp_analyze_test_health";
|
|
3
4
|
if (!isDiffScope) {
|
|
5
|
+
const nextStep = useHealthFlow
|
|
6
|
+
? `### Step 3: Identify tests at risk of drift
|
|
7
|
+
Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.sessionId}"\``
|
|
8
|
+
: `### Step 3: Call recommend tests
|
|
9
|
+
Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
|
|
4
10
|
return `## Your Task — Enrich & Recommend (full repo)
|
|
5
11
|
|
|
6
12
|
### Step 1: Read key files
|
|
@@ -12,8 +18,7 @@ to understand the tech stack, endpoint shapes, auth mechanisms, and request/resp
|
|
|
12
18
|
Map how endpoints relate to each other — which POST creates resources consumed by other endpoints?
|
|
13
19
|
**Resolve nested/sub-router paths** from the Router Mounting section above.
|
|
14
20
|
|
|
15
|
-
|
|
16
|
-
Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
|
|
21
|
+
${nextStep}`;
|
|
17
22
|
}
|
|
18
23
|
const changedFiles = p.parsedDiff?.changedFiles.join(", ") ?? "";
|
|
19
24
|
const hasApiEndpoints = p.parsedDiff && (p.parsedDiff.newEndpoints.length > 0 || p.parsedDiff.modifiedEndpoints.length > 0);
|
|
@@ -29,14 +34,13 @@ Mounting context.`
|
|
|
29
34
|
UI-only PR — read changed components to find API calls (fetch, axios, hooks).`
|
|
30
35
|
: `### Step 2: Identify affected endpoints
|
|
31
36
|
No API route changes detected — read changed files to identify affected endpoints.`;
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
${changedFiles}
|
|
36
|
-
|
|
37
|
-
${step2}
|
|
37
|
+
const step3Content = useHealthFlow
|
|
38
|
+
? `### Step 3: Identify tests at risk of drift
|
|
39
|
+
Assess which existing tests may be broken by the changes in this diff.
|
|
38
40
|
|
|
39
|
-
### Step
|
|
41
|
+
### Step 4: Call analyze test health
|
|
42
|
+
Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.sessionId}"\``
|
|
43
|
+
: `### Step 3: Draft integration scenarios
|
|
40
44
|
Draft multi-step scenarios simulating realistic user workflows:
|
|
41
45
|
- **Cross-resource data flow**: Foreign key relationships, parent→child creation, verification
|
|
42
46
|
- **Search/filter verification**: Create data, search, verify results
|
|
@@ -47,6 +51,14 @@ Draft multi-step scenarios simulating realistic user workflows:
|
|
|
47
51
|
|
|
48
52
|
### Step 4: Call recommend tests
|
|
49
53
|
Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
|
|
54
|
+
return `## Your Task — Enrich & Recommend (PR-scoped)
|
|
55
|
+
|
|
56
|
+
### Step 1: Read the changed files
|
|
57
|
+
${changedFiles}
|
|
58
|
+
|
|
59
|
+
${step2}
|
|
60
|
+
|
|
61
|
+
${step3Content}`;
|
|
50
62
|
}
|
|
51
63
|
export function buildAnalysisOutputText(p) {
|
|
52
64
|
const isDiffScope = p.analysisScope === "current_branch_diff";
|
|
@@ -10,11 +10,18 @@ export function buildRecommendationPrompt(analysis, analysisScope = "full_repo",
|
|
|
10
10
|
const isDiffScope = analysisScope === "current_branch_diff";
|
|
11
11
|
const diffContext = analysis.branchDiffContext;
|
|
12
12
|
const openApiSpec = analysis.artifacts?.openApiSpecs?.[0];
|
|
13
|
+
// ── Filter out bot-generated test files from changedFiles ──
|
|
14
|
+
// Prevents bot-committed test files from being treated as application changes
|
|
15
|
+
// on subsequent testbot runs on the same PR.
|
|
16
|
+
const SKYRAMP_TEST_FILE_PATTERN = /(?:_test|_smoke|_contract|_fuzz|_integration|_load|_e2e|_ui)\.[^/]+$|scenario_[^/]+\.json$/;
|
|
17
|
+
const filteredChangedFiles = diffContext
|
|
18
|
+
? diffContext.changedFiles.filter(f => !SKYRAMP_TEST_FILE_PATTERN.test(f))
|
|
19
|
+
: [];
|
|
13
20
|
// ── Frontend / UI change detection ──
|
|
14
21
|
const FRONTEND_FILE_PATTERN = /\.(tsx?|jsx?|vue|svelte|css|scss|less|html)$/;
|
|
15
22
|
const API_DIR_PATTERN = /\/(api|routes?|controllers?|routers?|handlers?|endpoints?|server)\//;
|
|
16
23
|
const hasFrontendChanges = isDiffScope && diffContext
|
|
17
|
-
?
|
|
24
|
+
? filteredChangedFiles.some(f => FRONTEND_FILE_PATTERN.test(f) &&
|
|
18
25
|
!API_DIR_PATTERN.test(f) &&
|
|
19
26
|
/\/(components?|pages?|views?|layouts?|app|src\/app|frontend|client|public|styles?)\//i.test(f))
|
|
20
27
|
: false;
|
|
@@ -62,7 +69,7 @@ ${endpointLines}
|
|
|
62
69
|
diffSection = `
|
|
63
70
|
## Branch Diff Context
|
|
64
71
|
Branch: \`${diffContext.currentBranch}\` → base: \`${diffContext.baseBranch}\`
|
|
65
|
-
Changed files: ${
|
|
72
|
+
Changed files: ${filteredChangedFiles.join(", ")}
|
|
66
73
|
New endpoints:
|
|
67
74
|
${fmtEps(diffContext.newEndpoints, (m) => `${m.sourceFile}, ${m.interactionCount} interactions`)}
|
|
68
75
|
Modified endpoints:
|
|
@@ -3,115 +3,116 @@ import { z } from "zod";
|
|
|
3
3
|
import { logger } from "../../utils/logger.js";
|
|
4
4
|
import { AnalyticsService } from "../../services/AnalyticsService.js";
|
|
5
5
|
import { MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS } from "../test-recommendation/recommendationSections.js";
|
|
6
|
-
function getTestbotPrompt(prTitle, prDescription, diffFile, testDirectory, summaryOutputFile, repositoryPath, baseBranch, maxRecommendations = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE) {
|
|
6
|
+
function getTestbotPrompt(prTitle, prDescription, diffFile, testDirectory, summaryOutputFile, repositoryPath, baseBranch, maxRecommendations = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, prNumber) {
|
|
7
7
|
return `<TITLE>${prTitle}</TITLE>
|
|
8
8
|
<DESCRIPTION>${prDescription}</DESCRIPTION>
|
|
9
9
|
<CODE CHANGES>${diffFile}</CODE CHANGES>
|
|
10
10
|
<TEST DIRECTORY>${testDirectory}</TEST DIRECTORY>
|
|
11
11
|
<REPOSITORY PATH>${repositoryPath}</REPOSITORY PATH>
|
|
12
12
|
|
|
13
|
-
Use the Skyramp MCP server tools
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
13
|
+
Use the Skyramp MCP server tools. Follow the steps below in order.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Step 1: Analyze
|
|
18
|
+
|
|
19
|
+
Read the diff at \`${diffFile}\`.
|
|
20
|
+
If all changed files are non-application (CI/CD, docs, lock files, config only) → skip to Step 4 (Submit Report) with empty arrays.
|
|
21
|
+
|
|
22
|
+
Otherwise:
|
|
23
|
+
1. Call \`skyramp_analyze_changes\` with \`repositoryPath\`: "${repositoryPath}", \`scope\`: "branch_diff", \`topN\`: ${maxRecommendations}${prNumber ? `, \`prNumber\`: ${prNumber}` : ""} — discovers existing Skyramp tests, scans endpoints changed in the diff, loads workspace config, and returns ${maxRecommendations} ranked ADD recommendations.${prNumber ? " Uses PR comment history to avoid re-recommending already-generated tests." : ""}
|
|
24
|
+
2. Call \`skyramp_analyze_test_health\` with the \`stateFile\` from step 1 (skip if zero existing tests found) — scores each existing test for drift against the diff and assigns UPDATE / REGENERATE / VERIFY / ADD actions.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Step 2: Decide — one action per affected test / endpoint
|
|
29
|
+
|
|
30
|
+
Using the diff, the recommendations, and the health assessment, assign exactly one action to each item:
|
|
31
|
+
|
|
32
|
+
### For each **existing Skyramp test**:
|
|
33
|
+
- **UPDATE** — the diff touches the endpoint this test covers AND adds/changes fields the test should assert (e.g. new response field, changed status code, renamed path). The test still runs but has a coverage gap or will break.
|
|
34
|
+
- **REGENERATE** — the endpoint was substantially restructured or the test is fundamentally broken by the diff.
|
|
35
|
+
- **VERIFY** — the diff touches related code but the test is unaffected; no action needed.
|
|
36
|
+
- **DELETE** — the endpoint the test covers was removed entirely.
|
|
37
|
+
- **ADD** — existing tests for this endpoint do not capture a new scenario introduced by the diff (e.g. a new flow, a new field combination). A net-new test is needed alongside the existing ones.
|
|
38
|
+
|
|
39
|
+
### For each **endpoint whose route definition is new in the diff** (no existing Skyramp test):
|
|
40
|
+
- **ADD** — the diff introduced this route; generate a new test.
|
|
41
|
+
- **VERIFY** — the endpoint existed before this diff (only a model/field change touched it); log as a coverage gap but do not generate a test.
|
|
42
|
+
|
|
43
|
+
### Decision rules (apply in order):
|
|
44
|
+
1. If the diff adds/removes/renames a field in a response this test asserts → **UPDATE** (not ADD).
|
|
45
|
+
2. If the diff adds a **brand-new route definition** (e.g. a new \`@router.get\`, \`@app.route\`, \`router.get()\` line) → **ADD**.
|
|
46
|
+
3. If an existing test covers the endpoint but doesn't test the specific new scenario (e.g. archived=true flow) → **ADD** (alongside the existing test).
|
|
47
|
+
4. If the test is unrelated to the diff → **VERIFY** (no action).
|
|
48
|
+
5. Only use **ADD** for endpoints whose route was introduced in this diff. An endpoint that existed before but now lacks a test is a pre-existing coverage gap — log it in \`additionalRecommendations\`, do NOT generate a test for it.
|
|
49
|
+
6. Do NOT add a new test when an UPDATE to an existing test is the right fix.
|
|
50
|
+
|
|
51
|
+
Output your decision table:
|
|
52
|
+
\`\`\`
|
|
53
|
+
Test/Endpoint | Action | Reason
|
|
54
|
+
<file or METHOD /path> | <ACTION> | <1 sentence>
|
|
55
|
+
\`\`\`
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Step 3: Act
|
|
60
|
+
|
|
61
|
+
Execute the actions from Step 2. Limit total generated/updated tests to ${maxGenerate}.
|
|
62
|
+
|
|
63
|
+
### UPDATE
|
|
64
|
+
Edit the existing test file directly:
|
|
65
|
+
- Add missing assertions for new response fields (e.g. \`assert "archived" in resp\` or \`assert resp["archived"] >= 0\`).
|
|
66
|
+
- Fix path/method changes in the test.
|
|
67
|
+
- Do not regenerate — only apply the minimal change needed.
|
|
68
|
+
|
|
69
|
+
### REGENERATE
|
|
70
|
+
Call the appropriate generation tool to replace the existing test from scratch.
|
|
71
|
+
Use the same filename so it overwrites the old file.
|
|
72
|
+
|
|
73
|
+
### ADD
|
|
74
|
+
Generate a net-new test. Use a unique descriptive filename to avoid overwriting existing files.
|
|
75
|
+
|
|
76
|
+
**How to generate each type (for ADD and REGENERATE):**
|
|
77
|
+
- **Integration**: call \`skyramp_scenario_test_generation\` per step (sequentially), then \`skyramp_integration_test_generation\` with the scenario file.
|
|
78
|
+
Scenario JSON goes in the same \`outputDir\` (e.g. \`tests/scenario_<name>.json\`), not \`.skyramp/\`.
|
|
79
|
+
- **Contract**: call \`skyramp_contract_test_generation\` with \`endpointURL\`, \`method\`, and \`requestData\` for POST/PUT/PATCH.
|
|
80
|
+
Pass \`apiSchema\` if an OpenAPI spec exists.
|
|
81
|
+
- **Fuzz**: call \`skyramp_fuzz_test_generation\` with \`endpointURL\`, \`method\`, \`requestData\`.
|
|
82
|
+
- **E2E/UI**: only if relevant Playwright traces exist in \`${testDirectory}\`, repo root, or \`.skyramp/\`.
|
|
83
|
+
Without traces, move to \`additionalRecommendations\` with scenario steps and trace recording instructions.
|
|
84
|
+
- Skip smoke tests entirely.
|
|
85
|
+
|
|
86
|
+
**Scenario quality:** Verify preconditions before each step (e.g. create before update).
|
|
87
|
+
|
|
88
|
+
**After generation, fix chaining only:**
|
|
89
|
+
- Path params like \`id = 'id'\` → \`skyramp.get_response_value(prev_response, "id")\`
|
|
90
|
+
- Hardcoded IDs in request bodies → dynamic values from prior response
|
|
91
|
+
- Change ONLY chaining values. Preserve everything else exactly as generated.
|
|
92
|
+
|
|
93
|
+
After all actions, execute the changed/generated test files and record pass/fail.
|
|
94
|
+
|
|
95
|
+
### VERIFY / DELETE
|
|
96
|
+
- VERIFY: no file changes. Note in \`testMaintenance\`.
|
|
97
|
+
- DELETE: remove the test file.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Step 4: Submit Report
|
|
102
|
+
|
|
103
|
+
Call \`skyramp_submit_report\` with \`summaryOutputFile\`: "${summaryOutputFile}".
|
|
101
104
|
|
|
102
105
|
\`commitMessage\`: under 72 chars, e.g. "add integration tests for /products and /orders"
|
|
103
106
|
|
|
104
|
-
**newTestsCreated** —
|
|
105
|
-
\`testType\`, \`endpoint\`, \`fileName\`, \`description\`, \`scenarioFile
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
Every test file in the commit should appear here. If you over-generated, delete extras first.
|
|
109
|
-
If no tests were generated (e.g. frontend-only PR without traces), pass an empty array.
|
|
107
|
+
**newTestsCreated** — every file generated or updated (at most ${maxGenerate}):
|
|
108
|
+
\`testType\`, \`endpoint\`, \`fileName\`, \`description\`, \`scenarioFile\`
|
|
109
|
+
If action was UPDATE, set \`testType\` to \`"<type> (updated)"\`.
|
|
110
|
+
If no tests were generated or updated, pass an empty array.
|
|
110
111
|
|
|
111
|
-
**additionalRecommendations** —
|
|
112
|
-
\`testType\`, \`scenarioName\`, \`priority\`, \`description\`, \`steps
|
|
112
|
+
**additionalRecommendations** — items you could not act on (quota exceeded, no traces, etc.):
|
|
113
|
+
\`testType\`, \`scenarioName\`, \`priority\`, \`description\`, \`steps\`
|
|
113
114
|
|
|
114
|
-
**businessCaseAnalysis** — based only on
|
|
115
|
+
**businessCaseAnalysis** — 2-3 sentences based only on the diff and tool outputs.`;
|
|
115
116
|
}
|
|
116
117
|
export function registerTestbotPrompt(server) {
|
|
117
118
|
logger.info("Registering testbot prompt");
|
|
@@ -144,9 +145,13 @@ export function registerTestbotPrompt(server) {
|
|
|
144
145
|
.number()
|
|
145
146
|
.default(MAX_TESTS_TO_GENERATE)
|
|
146
147
|
.describe(`Maximum number of tests to generate.`),
|
|
148
|
+
prNumber: z
|
|
149
|
+
.number()
|
|
150
|
+
.optional()
|
|
151
|
+
.describe("GitHub PR number. Passed to skyramp_analyze_changes to fetch previous TestBot comments for recommendation consistency across commits."),
|
|
147
152
|
},
|
|
148
153
|
}, (args) => {
|
|
149
|
-
const prompt = getTestbotPrompt(args.prTitle, args.prDescription, args.diffFile, args.testDirectory, args.summaryOutputFile, args.repositoryPath, args.baseBranch, args.maxRecommendations, args.maxGenerate);
|
|
154
|
+
const prompt = getTestbotPrompt(args.prTitle, args.prDescription, args.diffFile, args.testDirectory, args.summaryOutputFile, args.repositoryPath, args.baseBranch, args.maxRecommendations, args.maxGenerate, args.prNumber);
|
|
150
155
|
AnalyticsService.pushMCPToolEvent("skyramp_testbot_prompt", undefined, {}).catch(() => { });
|
|
151
156
|
return {
|
|
152
157
|
messages: [
|
|
@@ -179,7 +184,8 @@ export function registerTestbotResource(server) {
|
|
|
179
184
|
const param = (name, fallback) => uri.searchParams.get(name) ?? fallback;
|
|
180
185
|
const maxRec = parseInt(uri.searchParams.get("maxRecommendations") || "", 10);
|
|
181
186
|
const maxGen = parseInt(uri.searchParams.get("maxGenerate") || "", 10);
|
|
182
|
-
const
|
|
187
|
+
const prNum = parseInt(uri.searchParams.get("prNumber") || "", 10);
|
|
188
|
+
const prompt = getTestbotPrompt(param("prTitle", ""), param("prDescription", ""), param("diffFile", ".skyramp_git_diff"), param("testDirectory", "tests"), param("summaryOutputFile", ""), param("repositoryPath", "."), uri.searchParams.get("baseBranch") || undefined, isNaN(maxRec) ? MAX_RECOMMENDATIONS : maxRec, isNaN(maxGen) ? MAX_TESTS_TO_GENERATE : maxGen, isNaN(prNum) ? undefined : prNum);
|
|
183
189
|
AnalyticsService.pushMCPToolEvent("skyramp_testbot_prompt", undefined, {}).catch(() => { });
|
|
184
190
|
return {
|
|
185
191
|
contents: [
|
|
@@ -129,9 +129,13 @@ ${JSON.stringify(traceRequest, null, 2)}
|
|
|
129
129
|
.some(v => v.includes("application/json"));
|
|
130
130
|
const responseBody = params.responseBody || (isJsonResponse ? "{}" : "");
|
|
131
131
|
const authHeaderName = params.authHeader || "Authorization";
|
|
132
|
+
let authValue = params.authToken ?? "";
|
|
133
|
+
if (!authValue && authHeaderName === "Authorization") {
|
|
134
|
+
authValue = "Bearer SKYRAMP_PLACEHOLDER_TOKEN";
|
|
135
|
+
}
|
|
132
136
|
const requestHeaders = {
|
|
133
137
|
"Content-Type": ["application/json"],
|
|
134
|
-
[authHeaderName]: [
|
|
138
|
+
[authHeaderName]: [authValue],
|
|
135
139
|
};
|
|
136
140
|
return {
|
|
137
141
|
Source: "192.168.65.1:39998",
|