@skyramp/mcp 0.0.62 → 0.0.63-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/build/index.js +18 -26
  2. package/build/prompts/test-maintenance/drift-analysis-prompt.js +59 -0
  3. package/build/prompts/test-maintenance/driftAnalysisSections.js +153 -0
  4. package/build/prompts/test-recommendation/analysisOutputPrompt.js +21 -9
  5. package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +34 -38
  6. package/build/prompts/test-recommendation/test-recommendation-prompt.js +56 -9
  7. package/build/prompts/testbot/testbot-prompts.js +113 -100
  8. package/build/services/DriftAnalysisService.js +1 -1
  9. package/build/services/ScenarioGenerationService.js +5 -1
  10. package/build/services/TestExecutionService.js +2 -24
  11. package/build/services/TestExecutionService.test.js +167 -0
  12. package/build/services/containerEnv.js +35 -0
  13. package/build/tools/generate-tests/generateScenarioRestTool.js +7 -1
  14. package/build/tools/submitReportTool.js +6 -6
  15. package/build/tools/test-management/actionsTool.js +396 -0
  16. package/build/tools/test-management/analyzeChangesTool.js +750 -0
  17. package/build/tools/test-management/analyzeTestHealthTool.js +132 -0
  18. package/build/tools/test-management/executeTestsTool.js +198 -0
  19. package/build/tools/test-management/index.js +5 -0
  20. package/build/tools/test-management/stateCleanupTool.js +163 -0
  21. package/build/tools/test-recommendation/recommendTestsTool.js +1 -1
  22. package/build/utils/analyze-openapi.js +2 -2
  23. package/build/utils/pr-comment-parser.js +157 -36
  24. package/build/utils/pr-comment-parser.test.js +427 -0
  25. package/package.json +1 -1
  26. package/build/tools/initTestbotTool.js +0 -187
  27. package/build/tools/initTestbotTool.test.js +0 -194
  28. package/build/tools/test-recommendation/analyzeRepositoryTool.js +0 -505
package/build/index.js CHANGED
@@ -18,20 +18,12 @@ import { registerE2ETestTool } from "./tools/generate-tests/generateE2ERestTool.
18
18
  import { registerLoginTool } from "./tools/auth/loginTool.js";
19
19
  import { registerLogoutTool } from "./tools/auth/logoutTool.js";
20
20
  import { registerFixErrorTool } from "./tools/fixErrorTool.js";
21
- import { registerAnalyzeRepositoryTool } from "./tools/test-recommendation/analyzeRepositoryTool.js";
22
- import { registerRecommendTestsTool } from "./tools/test-recommendation/recommendTestsTool.js";
23
21
  import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
24
22
  import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
25
23
  import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
26
24
  import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
27
- import { registerDiscoverTestsTool } from "./tools/test-maintenance/discoverTestsTool.js";
28
- import { registerAnalyzeTestDriftTool } from "./tools/test-maintenance/analyzeTestDriftTool.js";
29
- import { registerExecuteBatchTestsTool } from "./tools/test-maintenance/executeBatchTestsTool.js";
30
- import { registerCalculateHealthScoresTool } from "./tools/test-maintenance/calculateHealthScoresTool.js";
31
- import { registerActionsTool } from "./tools/test-maintenance/actionsTool.js";
32
- import { registerStateCleanupTool } from "./tools/test-maintenance/stateCleanupTool.js";
25
+ import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerExecuteTestsTool, registerActionsTool, registerStateCleanupTool, } from "./tools/test-management/index.js";
33
26
  import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
34
- import { registerInitTestbotTool } from "./tools/initTestbotTool.js";
35
27
  import { registerSubmitReportTool } from "./tools/submitReportTool.js";
36
28
  import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
37
29
  import { registerAnalysisResources } from "./resources/analysisResources.js";
@@ -58,15 +50,20 @@ const server = new McpServer({
58
50
  - NEVER show CLI commands. ALWAYS use the MCP tools provided.
59
51
  - For UI and E2E tests, use the trace collection start/stop tools.
60
52
 
61
- ## Test Recommendation Flow (2-step)
62
- 1. Call \`skyramp_analyze_repository\` returns a \`sessionId\`.
63
- The analysis scans source code (code-first) to build enriched endpoints
64
- (Path → Method → Interaction with request/response bodies, headers, cookies)
65
- and draft user-flow scenarios for integration/E2E tests.
66
- 2. Call \`skyramp_recommend_tests\` with \`sessionId\` → the LLM reasons over the
67
- enriched data to recommend tests, referencing specific interactions and scenarios.
53
+ ## Test Management Flow
54
+ Use \`skyramp_analyze_changes\` as the single entry point for both test recommendations and test health analysis.
68
55
 
69
- After analysis, you can also inspect data via MCP Resources:
56
+ ### Recommendations (2-step)
57
+ 1. Call \`skyramp_analyze_changes\` with \`repositoryPath\` and \`scope\` → scans endpoints, computes branch diff, discovers existing tests → returns a \`stateFile\` **and** inline ranked test recommendations.
58
+ 2. (Optional) Use the \`skyramp_recommend_tests\` prompt with \`stateFile\` in Cursor/IDE chat for a focused recommendation view.
59
+
60
+ ### Health Analysis (4-step)
61
+ 1. Call \`skyramp_analyze_changes\` with \`repositoryPath\` and \`scope\` → returns a \`stateFile\`.
62
+ 2. Call \`skyramp_analyze_test_health\` with \`stateFile\` → runs drift analysis + health scoring + LLM semantic assessment.
63
+ 3. (Optional) Call \`skyramp_execute_tests\` with \`stateFile\` → runs tests live to verify status.
64
+ 4. Call \`skyramp_actions\` with \`stateFile\` → executes UPDATE/REGENERATE/ADD recommendations.
65
+
66
+ After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use the \`sessionId\` returned in the output):
70
67
  - \`skyramp://analysis/{sessionId}/summary\` — high-level overview
71
68
  - \`skyramp://analysis/{sessionId}/endpoints\` — compact endpoint listing
72
69
  - \`skyramp://analysis/{sessionId}/endpoints/{path}\` — full path detail
@@ -183,16 +180,12 @@ const codeQualityTools = [
183
180
  registerCodeReuseTool,
184
181
  ];
185
182
  codeQualityTools.forEach((registerTool) => registerTool(server));
186
- // Register test recommendation tools
187
- registerAnalyzeRepositoryTool(server);
188
- registerRecommendTestsTool(server);
189
183
  // Register analysis resources (MCP Resources for enriched data access)
190
184
  registerAnalysisResources(server);
191
- // Register test maintenance tools
192
- registerDiscoverTestsTool(server);
193
- registerAnalyzeTestDriftTool(server);
194
- registerExecuteBatchTestsTool(server);
195
- registerCalculateHealthScoresTool(server);
185
+ // Register unified test-management tools (replaces separate test-maintenance tools)
186
+ registerAnalyzeChangesTool(server);
187
+ registerAnalyzeTestHealthTool(server);
188
+ registerExecuteTestsTool(server);
196
189
  registerActionsTool(server);
197
190
  registerStateCleanupTool(server);
198
191
  // Register workspace management tools
@@ -206,7 +199,6 @@ const infrastructureTools = [
206
199
  registerTraceStopTool,
207
200
  ];
208
201
  if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
209
- infrastructureTools.push(registerInitTestbotTool);
210
202
  infrastructureTools.push(registerSubmitReportTool);
211
203
  logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
212
204
  }
@@ -0,0 +1,59 @@
1
+ import { buildDriftScoringGuide, buildActionDecisionMatrix, buildBreakingChangePatterns, buildTestAssessmentGuidelines, buildAddRecommendationGuidelines, buildDriftOutputChecklist, } from "./driftAnalysisSections.js";
2
+ export function buildDriftAnalysisPrompt(params) {
3
+ const { existingTests, parsedDiff, scannedEndpoints, repositoryPath, stateFile } = params;
4
+ // Detect new endpoints count from parsedDiff
5
+ let newEndpointCount = 0;
6
+ let diffSection = "";
7
+ if (parsedDiff) {
8
+ const lines = parsedDiff.split("\n");
9
+ const epMatches = parsedDiff.match(/(?:^|\n)\*\*(GET|POST|PUT|PATCH|DELETE)\s+[^\*]+\*\*/gm);
10
+ if (epMatches)
11
+ newEndpointCount = epMatches.length;
12
+ diffSection = `## Branch Diff
13
+ \`\`\`
14
+ ${lines.slice(0, 200).join("\n")}
15
+ \`\`\`
16
+ `;
17
+ }
18
+ const testListSection = existingTests.length > 0
19
+ ? `## Existing Test Files (${existingTests.length})
20
+ ${existingTests
21
+ .map((t) => {
22
+ const score = t.drift?.driftScore !== undefined ? ` [drift: ${t.drift.driftScore}]` : "";
23
+ return `- ${t.testFile} (${t.testType})${score}`;
24
+ })
25
+ .join("\n")}
26
+ `
27
+ : `## Existing Test Files
28
+ No existing Skyramp tests found in repository.
29
+ `;
30
+ const scannedSection = scannedEndpoints.length > 0
31
+ ? `## Scanned Endpoints (${scannedEndpoints.length})
32
+ ${scannedEndpoints.map((ep) => `- ${Array.isArray(ep.methods) ? ep.methods.join("|") : ep.method} ${ep.path}`).join("\n")}
33
+ `
34
+ : "";
35
+ return `# Test Health Analysis
36
+
37
+ **Repository**: \`${repositoryPath}\`
38
+ **Existing tests**: ${existingTests.length}
39
+ **New endpoints in diff**: ${newEndpointCount}
40
+
41
+ ${diffSection}
42
+ ${testListSection}
43
+ ${scannedSection}
44
+ ${buildDriftScoringGuide()}
45
+
46
+ ${buildActionDecisionMatrix()}
47
+
48
+ ${buildBreakingChangePatterns()}
49
+
50
+ ${buildTestAssessmentGuidelines()}
51
+
52
+ ${buildAddRecommendationGuidelines()}
53
+
54
+ ${buildDriftOutputChecklist(existingTests.length, newEndpointCount)}
55
+
56
+ After completing the assessment above, call \`skyramp_actions\` with \`stateFile: "${stateFile}"\`
57
+
58
+ **CRITICAL**: Do NOT create any .json or .md files. Only call skyramp_actions when done.`;
59
+ }
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Modular section builders for the Drift Analysis prompt,
3
+ * mirroring the recommendationSections.ts pattern.
4
+ */
5
+ export function buildDriftScoringGuide() {
6
+ return `## Drift Score Guide (0–100)
7
+
8
+ | Score | Label | Meaning |
9
+ |-------|-------|---------|
10
+ | 0–20 | IGNORE | No meaningful drift — test is still valid as-is |
11
+ | 21–40 | VERIFY | Minor changes detected — review but likely fine |
12
+ | 41–70 | UPDATE | Breaking changes detected — test needs edits |
13
+ | 71–100 | REGENERATE | Major structural changes — regenerate from scratch |
14
+
15
+ Assign each existing test a score based on how much the codebase has changed relative to what the test expects.`;
16
+ }
17
+ export function buildActionDecisionMatrix() {
18
+ return `## Action Decision Matrix
19
+
20
+ For each test, choose one of:
21
+
22
+ | Action | When to use |
23
+ |--------|------------|
24
+ | **IGNORE** | Drift score 0–20; no breaking changes AND no additive field gaps detected |
25
+ | **VERIFY** | Drift score 21–40; minor changes, manual review recommended |
26
+ | **UPDATE** | Drift score 25–70; breaking changes OR additive fields added to a covered endpoint (new response field the test doesn't assert) |
27
+ | **REGENERATE** | Drift score 71–100; endpoint removed, major restructuring, or test is fundamentally broken |
28
+ | **ADD** | New endpoint detected in diff that has no corresponding test yet |
29
+
30
+ Rules:
31
+ - Prefer UPDATE over REGENERATE when changes are localized (e.g., only the URL path changed).
32
+ - Prefer IGNORE over VERIFY when all changed files are unrelated to the test's endpoint.
33
+ - Always use ADD for new endpoints when the action is scoped to new test creation.
34
+ - **Additive changes (new response fields) on a covered endpoint always trigger UPDATE** — even if existing assertions still pass. The test needs a new assertion for the added field.`;
35
+ }
36
+ export function buildBreakingChangePatterns() {
37
+ return `## Breaking Change Patterns to Detect
38
+
39
+ Scan the diff lines for these high-signal patterns:
40
+
41
+ ### Endpoint-level breaking changes
42
+ - \`- @app.route("/old-path")\` / \`+ @app.route("/new-path")\` — renamed endpoint
43
+ - \`- router.get("/old")\` / \`+ router.get("/new")\` — renamed route
44
+ - \`- @GetMapping("/old")\` / \`+ @GetMapping("/new")\` — Spring rename
45
+ - Lines removing a route decorator entirely (endpoint removed)
46
+
47
+ ### Request/response shape changes
48
+ - Field type changes: \`- field: int\` → \`+ field: string\`
49
+ - Required field added: \`+ required: [..., "newField"]\`
50
+ - Response field removed: \`- "responseField":\`
51
+ - Enum value changes: \`- status: "active"\` → \`+ status: "enabled"\`
52
+
53
+ ### Auth changes
54
+ - \`+ @require_auth\`, \`+ @login_required\`, \`+ middleware(authMiddleware)\`
55
+ - \`- @require_auth\` (auth removed)
56
+ - Token type changed: Bearer → Cookie
57
+
58
+ ### Status code changes
59
+ - \`- return 200\` → \`+ return 201\`
60
+ - \`- status_code=200\` → \`+ status_code=204\`
61
+ - \`- res.status(201)\` → \`+ res.status(200)\`
62
+
63
+ ### Additive response field changes (non-breaking but coverage gap)
64
+ These do NOT break existing assertions but leave the new field untested. Always flag as UPDATE for covered endpoints.
65
+ - \`+ "newField": queryset.filter(...).count()\` added inside a \`Response({...})\` or \`res.json({...})\`
66
+ - \`+ newField = serializers.XXXField()\` added to a serializer used by a tested endpoint
67
+ - \`+ "newField":\` added to a response body dict returned by the endpoint
68
+ - New key added inside an existing dict/object returned by the endpoint`;
69
+ }
70
+ export function buildTestAssessmentGuidelines() {
71
+ return `## Per-Test Assessment (4 Steps)
72
+
73
+ For each existing test file, follow these steps:
74
+
75
+ ### Step 1: Check endpoint existence
76
+ Does the endpoint the test targets still exist in the codebase?
77
+ - If the endpoint path/method is no longer present → score 80+, action: REGENERATE
78
+ - If the endpoint was renamed → score 50–70, action: UPDATE (path substitution)
79
+
80
+ ### Step 2: Check request/response shape (breaking changes)
81
+ Has the request body or response structure changed in a way that breaks the test?
82
+ - Compare test's expected fields against current schema/model definitions
83
+ - Type changes (string→int, int→string) → score 60+, action: UPDATE or REGENERATE
84
+ - New required fields the test doesn't send → score 50+, action: UPDATE
85
+ - Response fields the test asserts on have been removed → score 50+, action: UPDATE
86
+
87
+ ### Step 2b: Check additive response field changes (coverage gaps)
88
+ **Even if existing assertions still pass**, does the diff add a new field to the response of an endpoint this test already covers?
89
+ - Look at the diff for lines like \`+ "newField":\` or \`+ newField =\` inside a view/serializer this test hits
90
+ - If YES → score 30, action: UPDATE — add an assertion for the new field (e.g. \`assert "newField" in response_body\` or \`assert response_body["newField"] >= 0\`)
91
+ - This applies even when the test only checks status codes — the test should be extended to cover the new field
92
+ - **Do NOT score IGNORE if a new response field was added to a covered endpoint**
93
+
94
+ ### Step 3: Check auth changes
95
+ Has the authentication mechanism for this endpoint changed?
96
+ - Auth added where none existed → score 40+, action: UPDATE
97
+ - Auth method changed (bearer→cookie) → score 50+, action: UPDATE
98
+ - Auth removed → score 30+, action: VERIFY or UPDATE
99
+
100
+ ### Step 4: Assign score and action
101
+ Based on the above, assign a final drift score 0–100 and choose the action (IGNORE / VERIFY / UPDATE / REGENERATE).
102
+ Provide a 1-2 sentence rationale.
103
+ - If Step 2b flagged an additive field → score must be ≥ 30 and action must be UPDATE, even if Steps 2/3 found no breaking changes.`;
104
+ }
105
+ export function buildAddRecommendationGuidelines() {
106
+ return `## ADD Recommendations for New Endpoints
107
+
108
+ For each new endpoint detected in the diff (not yet covered by any existing test):
109
+
110
+ ### Test type priority by HTTP method
111
+ | Method | Recommended test types |
112
+ |--------|----------------------|
113
+ | POST / PUT / PATCH | integration, contract |
114
+ | GET | contract, smoke |
115
+ | DELETE | integration, smoke |
116
+
117
+ ### ADD recommendation format
118
+ For each new endpoint, include:
119
+ 1. The endpoint path and method
120
+ 2. The recommended test types (from the table above)
121
+ 3. The Skyramp tool to call (e.g., \`skyramp_contract_test_generation\`, \`skyramp_integration_test_generation\`)
122
+ 4. The \`endpointURL\` to use (combine base URL + path)
123
+ 5. The language/framework to use (from workspace config or project metadata)`;
124
+ }
125
+ export function buildDriftOutputChecklist(existingTestCount, newEndpointCount) {
126
+ return `## Output Checklist
127
+
128
+ Complete ALL of the following before calling skyramp_actions:
129
+
130
+ ### Existing tests (${existingTestCount} total)
131
+ For EACH existing test, output:
132
+ \`\`\`
133
+ Test: <testFile>
134
+ Drift Score: <0-100>
135
+ Action: <IGNORE | VERIFY | UPDATE | REGENERATE>
136
+ Rationale: <1-2 sentence explanation>
137
+ \`\`\`
138
+
139
+ ${newEndpointCount > 0
140
+ ? `### New endpoints (${newEndpointCount} detected)
141
+ For EACH new endpoint, output:
142
+ \`\`\`
143
+ Endpoint: <METHOD> <path>
144
+ Action: ADD
145
+ Test types: <contract | integration | smoke | ...>
146
+ Rationale: <1 sentence>
147
+ \`\`\``
148
+ : `### New endpoints
149
+ No new endpoints detected in this diff.`}
150
+
151
+ ### Final step
152
+ After completing all assessments above, call \`skyramp_actions\` with the stateFile to execute the recommended changes.`;
153
+ }
@@ -1,6 +1,12 @@
1
1
  function buildEnrichmentInstructions(p) {
2
2
  const isDiffScope = p.analysisScope === "current_branch_diff";
3
+ const useHealthFlow = p.nextTool === "skyramp_analyze_test_health";
3
4
  if (!isDiffScope) {
5
+ const nextStep = useHealthFlow
6
+ ? `### Step 3: Identify tests at risk of drift
7
+ Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\``
8
+ : `### Step 3: Call recommend tests
9
+ Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
4
10
  return `## Your Task — Enrich & Recommend (full repo)
5
11
 
6
12
  ### Step 1: Read key files
@@ -12,8 +18,7 @@ to understand the tech stack, endpoint shapes, auth mechanisms, and request/resp
12
18
  Map how endpoints relate to each other — which POST creates resources consumed by other endpoints?
13
19
  **Resolve nested/sub-router paths** from the Router Mounting section above.
14
20
 
15
- ### Step 3: Call recommend tests
16
- Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
21
+ ${nextStep}`;
17
22
  }
18
23
  const changedFiles = p.parsedDiff?.changedFiles.join(", ") ?? "";
19
24
  const hasApiEndpoints = p.parsedDiff && (p.parsedDiff.newEndpoints.length > 0 || p.parsedDiff.modifiedEndpoints.length > 0);
@@ -29,14 +34,13 @@ Mounting context.`
29
34
  UI-only PR — read changed components to find API calls (fetch, axios, hooks).`
30
35
  : `### Step 2: Identify affected endpoints
31
36
  No API route changes detected — read changed files to identify affected endpoints.`;
32
- return `## Your Task — Enrich & Recommend (PR-scoped)
33
-
34
- ### Step 1: Read the changed files
35
- ${changedFiles}
36
-
37
- ${step2}
37
+ const step3Content = useHealthFlow
38
+ ? `### Step 3: Identify tests at risk of drift
39
+ Assess which existing tests may be broken by the changes in this diff.
38
40
 
39
- ### Step 3: Draft integration scenarios
41
+ ### Step 4: Call analyze test health
42
+ Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\``
43
+ : `### Step 3: Draft integration scenarios
40
44
  Draft multi-step scenarios simulating realistic user workflows:
41
45
  - **Cross-resource data flow**: Foreign key relationships, parent→child creation, verification
42
46
  - **Search/filter verification**: Create data, search, verify results
@@ -47,6 +51,14 @@ Draft multi-step scenarios simulating realistic user workflows:
47
51
 
48
52
  ### Step 4: Call recommend tests
49
53
  Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
54
+ return `## Your Task — Enrich & Recommend (PR-scoped)
55
+
56
+ ### Step 1: Read the changed files
57
+ ${changedFiles}
58
+
59
+ ${step2}
60
+
61
+ ${step3Content}`;
50
62
  }
51
63
  export function buildAnalysisOutputText(p) {
52
64
  const isDiffScope = p.analysisScope === "current_branch_diff";
@@ -1,62 +1,58 @@
1
1
  import { z } from "zod";
2
- import { StateManager, getSessionFilePath, hasSessionData, getSessionData, } from "../../utils/AnalysisStateManager.js";
2
+ import { StateManager, } from "../../utils/AnalysisStateManager.js";
3
3
  import { logger } from "../../utils/logger.js";
4
4
  import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
5
- import { getWorkspaceAuthHeader } from "../../utils/workspaceAuth.js";
6
5
  export function registerRecommendTestsPrompt(server) {
7
6
  server.registerPrompt("skyramp_recommend_tests", {
8
- description: "Generate test recommendations from enriched repository analysis. " +
9
- "Provide a sessionId from skyramp_analyze_repository.",
7
+ description: "Generate ranked test recommendations from a test-management analysis. " +
8
+ "Provide a stateFile path from skyramp_analyze_changes.",
10
9
  argsSchema: {
11
- sessionId: z
10
+ stateFile: z
12
11
  .string()
13
- .describe("Session ID from skyramp_analyze_repository"),
14
- scope: z
15
- .enum(["full_repo", "current_branch_diff"])
16
- .default("full_repo")
12
+ .describe("State file path returned by skyramp_analyze_changes"),
13
+ topN: z
14
+ .number()
15
+ .int()
16
+ .positive()
17
+ .default(10)
17
18
  .optional()
18
- .describe("Analysis scope (defaults to the scope used during analysis)"),
19
+ .describe("Maximum number of ranked recommendations to return (default: 10)"),
19
20
  },
20
21
  }, async (args) => {
21
- const sessionId = args.sessionId;
22
- if (!sessionId) {
23
- throw new Error("sessionId is required");
22
+ const stateFile = args.stateFile;
23
+ if (!stateFile) {
24
+ throw new Error("stateFile is required");
24
25
  }
25
- // Try process memory first, then fall back to state file
26
- let data = null;
27
- if (hasSessionData(sessionId)) {
28
- data = getSessionData(sessionId);
26
+ const mgr = StateManager.fromStatePath(stateFile);
27
+ if (!mgr.exists()) {
28
+ throw new Error(`State file "${stateFile}" not found. Run skyramp_analyze_changes first.`);
29
29
  }
30
- else {
31
- const registeredPath = getSessionFilePath(sessionId);
32
- const mgr = registeredPath
33
- ? StateManager.fromStatePath(registeredPath)
34
- : StateManager.fromSessionId(sessionId);
35
- if (!mgr.exists()) {
36
- throw new Error(`Analysis session "${sessionId}" not found. Run skyramp_analyze_repository first.`);
37
- }
38
- data = await mgr.readData();
30
+ const fullState = await mgr.readFullState();
31
+ const state = fullState ?? null;
32
+ if (!state?.repositoryAnalysis?.fullAnalysis) {
33
+ throw new Error(`State file "${stateFile}" has no analysis data. Re-run skyramp_analyze_changes.`);
39
34
  }
40
- if (!data?.analysis) {
41
- throw new Error(`Session "${sessionId}" has no analysis data.`);
42
- }
43
- const scope = args.scope || data.analysisScope || "full_repo";
44
- const effectiveTopN = scope === "current_branch_diff" ? 7 : 10;
45
- const workspaceAuthHeader = data.repositoryPath
46
- ? await getWorkspaceAuthHeader(data.repositoryPath)
47
- : undefined;
48
- const prompt = buildRecommendationPrompt(data.analysis, scope, effectiveTopN, data.prContext, workspaceAuthHeader);
35
+ const { fullAnalysis, sessionId, wsAuthHeader } = state.repositoryAnalysis;
36
+ const repositoryPath = fullState?.metadata?.repositoryPath ?? "";
37
+ const analysisScope = state.analysisScope === "branch_diff"
38
+ ? "current_branch_diff"
39
+ : "full_repo";
40
+ const effectiveTopN = args.topN;
41
+ const prompt = buildRecommendationPrompt(fullAnalysis, analysisScope, effectiveTopN, undefined, wsAuthHeader);
49
42
  logger.info("Serving recommendation prompt via MCP Prompt", {
50
- sessionId,
51
- scope,
43
+ stateFile,
44
+ analysisScope,
52
45
  });
46
+ const resourceLinks = sessionId
47
+ ? `\nAvailable MCP Resources:\n- skyramp://analysis/${sessionId}/summary\n- skyramp://analysis/${sessionId}/endpoints\n- skyramp://analysis/${sessionId}/scenarios\n- skyramp://analysis/${sessionId}/diff\n`
48
+ : "";
53
49
  return {
54
50
  messages: [
55
51
  {
56
52
  role: "user",
57
53
  content: {
58
54
  type: "text",
59
- text: `Session: ${sessionId}\nRepository: ${data.repositoryPath}\nScope: ${scope}\n\nAvailable MCP Resources:\n- skyramp://analysis/${sessionId}/summary\n- skyramp://analysis/${sessionId}/endpoints\n- skyramp://analysis/${sessionId}/scenarios\n- skyramp://analysis/${sessionId}/diff\n\n${prompt}`,
55
+ text: `State file: ${stateFile}\nRepository: ${repositoryPath}\nScope: ${analysisScope}\n${resourceLinks}\n${prompt}`,
60
56
  },
61
57
  },
62
58
  ],
@@ -1,4 +1,4 @@
1
- import { buildPrioritizationDimensions, buildTestExamples, buildTestPatternGuidelines, buildTestQualityCriteria, buildGenerationRules, buildToolWorkflows, buildCoverageChecklist, } from "./recommendationSections.js";
1
+ import { buildPrioritizationDimensions, buildTestExamples, buildTestPatternGuidelines, buildTestQualityCriteria, buildGenerationRules, buildToolWorkflows, buildCoverageChecklist, MAX_TESTS_TO_GENERATE, } from "./recommendationSections.js";
2
2
  function formatTestLocations(locs) {
3
3
  const entries = Object.entries(locs || {});
4
4
  if (entries.length === 0)
@@ -10,11 +10,18 @@ export function buildRecommendationPrompt(analysis, analysisScope = "full_repo",
10
10
  const isDiffScope = analysisScope === "current_branch_diff";
11
11
  const diffContext = analysis.branchDiffContext;
12
12
  const openApiSpec = analysis.artifacts?.openApiSpecs?.[0];
13
+ // ── Filter out bot-generated test files from changedFiles ──
14
+ // Prevents bot-committed test files from being treated as application changes
15
+ // on subsequent testbot runs on the same PR.
16
+ const SKYRAMP_TEST_FILE_PATTERN = /(?:_test|_smoke|_contract|_fuzz|_integration|_load|_e2e|_ui)\.[^/]+$|scenario_[^/]+\.json$/;
17
+ const filteredChangedFiles = diffContext
18
+ ? diffContext.changedFiles.filter(f => !SKYRAMP_TEST_FILE_PATTERN.test(f))
19
+ : [];
13
20
  // ── Frontend / UI change detection ──
14
21
  const FRONTEND_FILE_PATTERN = /\.(tsx?|jsx?|vue|svelte|css|scss|less|html)$/;
15
22
  const API_DIR_PATTERN = /\/(api|routes?|controllers?|routers?|handlers?|endpoints?|server)\//;
16
23
  const hasFrontendChanges = isDiffScope && diffContext
17
- ? diffContext.changedFiles.some(f => FRONTEND_FILE_PATTERN.test(f) &&
24
+ ? filteredChangedFiles.some(f => FRONTEND_FILE_PATTERN.test(f) &&
18
25
  !API_DIR_PATTERN.test(f) &&
19
26
  /\/(components?|pages?|views?|layouts?|app|src\/app|frontend|client|public|styles?)\//i.test(f))
20
27
  : false;
@@ -62,7 +69,7 @@ ${endpointLines}
62
69
  diffSection = `
63
70
  ## Branch Diff Context
64
71
  Branch: \`${diffContext.currentBranch}\` → base: \`${diffContext.baseBranch}\`
65
- Changed files: ${diffContext.changedFiles.join(", ")}
72
+ Changed files: ${filteredChangedFiles.join(", ")}
66
73
  New endpoints:
67
74
  ${fmtEps(diffContext.newEndpoints, (m) => `${m.sourceFile}, ${m.interactionCount} interactions`)}
68
75
  Modified endpoints:
@@ -154,14 +161,54 @@ Use base URL: \`${analysis.apiEndpoints.baseUrl}\` and auth: \`${authHeaderValue
154
161
  // ── PR History ──
155
162
  let prHistorySection = "";
156
163
  if (prContext && prContext.previousRecommendations.length > 0) {
157
- const recLines = prContext.previousRecommendations
158
- .map((r) => ` - [${r.status.toUpperCase()}] ${r.testType} ${r.endpoint}${r.scenarioName ? ` (scenario: ${r.scenarioName})` : ""}`)
159
- .join("\n");
160
- prHistorySection = `
161
- ## PR History (PR #${prContext.prNumber})
164
+ const implemented = prContext.previousRecommendations.filter(r => r.status === "implemented");
165
+ const recommended = prContext.previousRecommendations.filter(r => r.status === "recommended");
166
+ let historyBody = "";
167
+ if (implemented.length > 0) {
168
+ const implLines = implemented
169
+ .map((r) => ` - ${r.testType} — ${r.endpoint}`)
170
+ .join("\n");
171
+ const fileLines = prContext.implementedTestFiles.length > 0
172
+ ? `\nExisting test files (in working tree):\n${prContext.implementedTestFiles.map(f => " - \`" + f + "\`").join("\n")}\n`
173
+ : "";
174
+ historyBody += `### Previously Generated Tests
175
+ ${implLines}${fileLines}`;
176
+ }
177
+ if (prContext.executionResults.length > 0) {
178
+ const resultLines = prContext.executionResults
179
+ .map((r) => ` - \`${r.testFile}\` — ${r.status}`)
180
+ .join("\n");
181
+ historyBody += `### Execution Results from Prior Run
182
+ ${resultLines}
183
+ If a test failed previously, check whether the failure was environmental or a real bug,
184
+ and adjust the test approach if needed.
185
+ `;
186
+ }
187
+ if (recommended.length > 0) {
188
+ const recLines = recommended
189
+ .map((r) => ` - ${r.testType} — ${r.endpoint}${r.scenarioName ? ` (scenario: ${r.scenarioName})` : ""}`)
190
+ .join("\n");
191
+ historyBody += `
192
+ ### Previously Recommended (not generated)
162
193
  ${recLines}
163
- **Do NOT re-recommend tests already listed above.**
194
+ These were recommended but not generated in the prior run. Consider promoting them
195
+ to generation if they still apply to the current code changes.
164
196
  `;
197
+ }
198
+ prHistorySection = `
199
+ ## PR History (PR #${prContext.prNumber})
200
+ Tests from prior bot runs are still in the working tree — the maintenance pipeline
201
+ (Task 2) keeps them up to date. Use the history below to **avoid duplicating** existing
202
+ coverage and to fill gaps:
203
+ - **Do NOT re-recommend** tests listed under "Previously Generated Tests" — they already
204
+ exist and are maintained automatically.
205
+ - **Promote** previously recommended-but-not-generated tests into the top
206
+ ${MAX_TESTS_TO_GENERATE} generation slots if they still apply to the current code.
207
+ - **Add new** recommendations only for endpoints or code paths introduced in the latest
208
+ commit that aren't covered by existing tests.
209
+ - If prior execution results show failures, note the issue but do not re-recommend
210
+ the test — Task 2 handles fixes for existing tests.
211
+ ${historyBody}`;
165
212
  }
166
213
  // ── Compose all sections ──
167
214
  const scopeNote = isDiffScope