@skyramp/mcp 0.1.0-rc.1 → 0.1.0-rc.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/build/index.js +17 -68
  2. package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +134 -0
  3. package/build/prompts/test-maintenance/drift-analysis-prompt.js +10 -3
  4. package/build/prompts/test-maintenance/driftAnalysisSections.js +13 -13
  5. package/build/prompts/test-recommendation/analysisOutputPrompt.js +42 -50
  6. package/build/prompts/test-recommendation/mergeEnrichedScenarios.test.js +125 -0
  7. package/build/prompts/test-recommendation/recommendationSections.js +131 -25
  8. package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +149 -9
  9. package/build/prompts/test-recommendation/test-recommendation-prompt.js +432 -111
  10. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +455 -63
  11. package/build/prompts/testbot/testbot-prompts.js +34 -13
  12. package/build/prompts/testbot/testbot-prompts.test.js +29 -0
  13. package/build/resources/analysisResources.js +13 -5
  14. package/build/services/TestExecutionService.js +2 -12
  15. package/build/tool-phases.js +2 -2
  16. package/build/tools/generate-tests/generateBatchScenarioRestTool.js +30 -23
  17. package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +88 -0
  18. package/build/tools/generate-tests/generateContractRestTool.js +5 -1
  19. package/build/tools/generate-tests/generateIntegrationRestTool.js +13 -4
  20. package/build/tools/submitReportTool.js +23 -5
  21. package/build/tools/submitReportTool.test.js +84 -6
  22. package/build/tools/test-management/analyzeChangesTool.js +24 -7
  23. package/build/tools/workspace/initScanWorkspaceTool.js +76 -0
  24. package/build/tools/workspace/initializeWorkspaceTool.js +39 -119
  25. package/build/types/RepositoryAnalysis.js +25 -3
  26. package/build/types/TestRecommendation.js +5 -4
  27. package/build/types/TestTypes.js +28 -2
  28. package/build/utils/AnalysisStateManager.js +30 -4
  29. package/build/utils/docker.js +118 -0
  30. package/build/utils/docker.test.js +113 -0
  31. package/build/utils/initAgent.js +75 -13
  32. package/build/utils/routeParsers.js +35 -0
  33. package/build/utils/routeParsers.test.js +66 -1
  34. package/build/utils/scenarioDrafting.js +207 -360
  35. package/build/utils/scenarioDrafting.test.js +191 -256
  36. package/build/utils/skyrampMdContent.js +0 -1
  37. package/build/utils/trace-parser.js +24 -6
  38. package/build/utils/trace-parser.test.js +140 -0
  39. package/build/utils/versions.js +3 -0
  40. package/package.json +1 -1
  41. package/build/prompts/testGenerationPrompt.js +0 -207
  42. package/build/prompts/testHealthPrompt.js +0 -85
  43. package/build/services/DriftAnalysisService.js +0 -1075
  44. package/build/services/DriftAnalysisService.test.js +0 -168
  45. package/build/tools/generate-tests/generateScenarioRestTool.js +0 -131
package/build/index.js CHANGED
@@ -2,11 +2,9 @@
2
2
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
4
  import { registerStartTraceCollectionPrompt } from "./prompts/startTraceCollectionPrompts.js";
5
- import { registerTestHealthPrompt } from "./prompts/testHealthPrompt.js";
6
5
  import { registerTraceTool } from "./tools/trace/startTraceCollectionTool.js";
7
6
  import { registerTraceStopTool } from "./tools/trace/stopTraceCollectionTool.js";
8
7
  import { registerExecuteSkyrampTestTool } from "./tools/executeSkyrampTestTool.js";
9
- import { registerTestGenerationPrompt } from "./prompts/testGenerationPrompt.js";
10
8
  import { AUTH_PLACEHOLDER_TOKEN } from "./types/TestTypes.js";
11
9
  import { logger } from "./utils/logger.js";
12
10
  import { registerUITestTool } from "./tools/generate-tests/generateUIRestTool.js";
@@ -22,18 +20,18 @@ import { registerFixErrorTool } from "./tools/fixErrorTool.js";
22
20
  import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
23
21
  import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
24
22
  import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
25
- import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
26
23
  import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
27
24
  import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
28
25
  import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerExecuteTestsTool, registerActionsTool, registerStateCleanupTool, } from "./tools/test-management/index.js";
29
26
  import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
30
27
  import { registerSubmitReportTool } from "./tools/submitReportTool.js";
31
28
  import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
29
+ import { registerInitScanWorkspaceTool } from "./tools/workspace/initScanWorkspaceTool.js";
32
30
  import { registerOneClickTool } from "./tools/one-click/oneClickTool.js";
33
31
  import { registerAnalysisResources } from "./resources/analysisResources.js";
34
32
  import { registerProgressResource } from "./resources/progressResource.js";
35
33
  import { AnalyticsService } from "./services/AnalyticsService.js";
36
- import { initCheck } from "./utils/initAgent.js";
34
+ import { registerInitTriggerOnMCPInitialized } from "./utils/initAgent.js";
37
35
  import { registerPlaywrightTools, registerTraceRecordingPrompt, getPlaywrightTraceService, } from "./playwright/index.js";
38
36
  const server = new McpServer({
39
37
  name: "Skyramp MCP Server",
@@ -52,6 +50,13 @@ const server = new McpServer({
52
50
  },
53
51
  instructions: `Skyramp MCP Server — generates and executes API tests (fuzz, contract, integration, E2E, UI).
54
52
 
53
+ ## Workspace Initialization (REQUIRED before the first Skyramp tool call)
54
+ If the workspace root is a git repo AND \`.skyramp/workspace.yml\` does not exist:
55
+ 1. Call \`skyramp_init_scan\` with \`workspacePath\` → follow the returned instructions to discover all services.
56
+ 2. Call \`skyramp_init_workspace\` with \`workspacePath\`, \`services\`, and the \`scanToken\` from step 1.
57
+ 3. Proceed with the originally requested tool.
58
+ Skip only if: not a git repo, \`.skyramp/workspace.yml\` already exists, or user explicitly declines.
59
+
55
60
  ## Rules
56
61
  - NEVER show CLI commands. ALWAYS use the MCP tools provided.
57
62
  - For UI and E2E tests, there are TWO recording modes:
@@ -81,23 +86,6 @@ After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use
81
86
  - \`skyramp://analysis/{sessionId}/scenarios\` — drafted scenarios
82
87
  - \`skyramp://analysis/{sessionId}/diff\` — branch diff context
83
88
 
84
- ## Workspace Initialization (before ANY other Skyramp tool)
85
- Follow this flow EVERY time before calling any Skyramp tool:
86
-
87
- 1. **Check**: Is the workspace root a git repository? (i.e. does a \`.git\` directory exist at the root?)
88
- - **If NO** → it is a non-git repo. Do NOT call \`skyramp_initialize_workspace\`. Proceed directly with the requested tool. STOP — do not continue to step 2.
89
- - **If YES** → it is a git repo. Continue to step 2.
90
- 2. **Check**: Does .skyramp/workspace.yml exist at the workspace root?
91
- - **If YES** → workspace is already initialized. Proceed with the requested tool. STOP here.
92
- - **If NO** → you MUST call \`skyramp_initialize_workspace\` BEFORE doing anything else.
93
- - Do NOT skip this step. Do NOT proceed to the requested tool first.
94
- - Scan the repo for ALL services (see the tool description for detailed steps).
95
- - A fullstack or monorepo MUST produce multiple services — never just one.
96
- - After workspace init completes, THEN proceed with the originally requested tool.
97
- 3. **ONLY skip init in these two cases: non-git repo (step 1) or explicit user decline** (i.e. user EXPLICITLY says "no", "skip", "don't create workspace", or similar).
98
- - A request like "execute tests" or "generate tests" is NOT a signal to skip init.
99
- - If the user does decline, respect it — do NOT ask again, and proceed with the requested tool.
100
-
101
89
  ## Workspace Defaults for Test Generation (MANDATORY)
102
90
  Before calling ANY test generation tool, you MUST follow this flow:
103
91
 
@@ -105,7 +93,7 @@ Before calling ANY test generation tool, you MUST follow this flow:
105
93
  2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the services section.
106
94
  3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
107
95
  4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
108
- 5. **CRITICAL — scenario generation**: When calling \`skyramp_scenario_test_generation\`, ALWAYS pass:
96
+ 5. **CRITICAL — scenario generation**: When calling \`skyramp_batch_scenario_test_generation\`, ALWAYS pass:
109
97
  - \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
110
98
  - \`authHeader\`: Which HTTP header carries the auth credential. Get it from \`api.authHeader\` in workspace config. Examples: \`Authorization\` (Bearer/Token auth), \`X-Api-Key\` (API key auth), \`Cookie\` (session/cookie auth like NextAuth). Pass \`""\` to skip auth entirely (unauthenticated endpoints or \`api.authType: "none"\`).
111
99
  - \`authScheme\`: Only when \`authHeader\` is \`Authorization\`. The prefix before the token (e.g., \`"Bearer"\` → \`Authorization: Bearer <token>\`). **Derive from**: (1) OpenAPI spec \`securitySchemes\`/\`securityDefinitions\`, (2) source code auth middleware, (3) workspace \`api.authType\`. **Do NOT guess.**
@@ -118,54 +106,10 @@ Before calling ANY test generation tool, you MUST follow this flow:
118
106
  8. The user can always override workspace defaults by explicitly specifying values in their request.
119
107
  `,
120
108
  });
121
- // Check for first-time invocation after version update (runs in background, doesn't block)
122
- let initCheckInFlight = false;
123
- let initCheckDone = false;
124
- const INIT_MESSAGE = "Skyramp init: Triggering pull of Skyramp worker and executor images if not present locally.";
125
- const originalRegisterTool = server.registerTool.bind(server);
126
- server.registerTool = function (name, definition, handler) {
127
- const wrappedHandler = async (...args) => {
128
- let triggeredInitThisCall = false;
129
- if (!initCheckDone && !initCheckInFlight) {
130
- // Guard with inFlight so concurrent tool calls don't each spawn a new initCheck(),
131
- // but allow retry on failure (initCheckInFlight is reset to false on error).
132
- // SkyrampClient constructor calls checkForUpdate("npm") via synchronous koffi FFI,
133
- // which can block the event loop for up to 60 s if the update-check server is
134
- // unreachable. Deferring via setImmediate ensures the tool response is written to
135
- // stdout (and acknowledged by the MCP client) before any blocking FFI call runs.
136
- initCheckInFlight = true;
137
- triggeredInitThisCall = true;
138
- setImmediate(() => {
139
- initCheck()
140
- .then(() => {
141
- initCheckDone = true;
142
- })
143
- .catch((err) => {
144
- logger.error("Background initialization check failed", { error: err });
145
- })
146
- .finally(() => {
147
- initCheckInFlight = false;
148
- });
149
- });
150
- }
151
- const result = await handler(...args);
152
- if (triggeredInitThisCall && result) {
153
- const content = result.content ?? [];
154
- result.content = [
155
- { type: "text", text: INIT_MESSAGE },
156
- ...content,
157
- ];
158
- }
159
- return result;
160
- };
161
- return originalRegisterTool(name, definition, wrappedHandler);
162
- };
163
109
  // Register prompts
164
110
  logger.info("Starting prompt registration process");
165
111
  const prompts = [
166
- registerTestGenerationPrompt,
167
112
  registerStartTraceCollectionPrompt,
168
- registerTestHealthPrompt,
169
113
  registerRecommendTestsPrompt,
170
114
  registerTraceRecordingPrompt,
171
115
  ];
@@ -185,7 +129,7 @@ const testGenerationTools = [
185
129
  registerIntegrationTestTool,
186
130
  registerE2ETestTool,
187
131
  registerUITestTool,
188
- registerScenarioTestTool,
132
+ registerBatchScenarioTestTool,
189
133
  registerMockTool,
190
134
  ];
191
135
  testGenerationTools.forEach((registerTool) => registerTool(server));
@@ -206,6 +150,7 @@ registerExecuteTestsTool(server);
206
150
  registerActionsTool(server);
207
151
  registerStateCleanupTool(server);
208
152
  // Register workspace management tools
153
+ registerInitScanWorkspaceTool(server);
209
154
  registerInitializeWorkspaceTool(server);
210
155
  // Register one-click orchestrated workflows
211
156
  registerOneClickTool(server);
@@ -219,7 +164,6 @@ const infrastructureTools = [
219
164
  ];
220
165
  if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
221
166
  infrastructureTools.push(registerSubmitReportTool);
222
- registerBatchScenarioTestTool(server);
223
167
  logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
224
168
  }
225
169
  infrastructureTools.forEach((registerTool) => registerTool(server));
@@ -249,6 +193,11 @@ process.on("uncaughtException", async (error) => {
249
193
  // Start MCP server
250
194
  async function main() {
251
195
  const transport = new StdioServerTransport();
196
+ server.server.oninitialized = () => {
197
+ registerInitTriggerOnMCPInitialized().catch((err) => {
198
+ logger.error("Failed to run MCP initialized trigger", { error: err });
199
+ });
200
+ };
252
201
  await server.connect(transport);
253
202
  logger.info("MCP Server started successfully");
254
203
  // Listen for stdin closure (parent process disconnected)
@@ -0,0 +1,134 @@
1
+ import { getPersonaPrefix } from "../architectPersona.js";
2
+ export const INIT_WORKSPACE_INSTRUCTIONS = `${getPersonaPrefix()}Your task is to scan this repository, discover ALL services, and call the \`skyramp_init_workspace\` tool with the discovered services array and the scanToken.
3
+
4
+ After scanning the workspace, before calling the \`skyramp_init_workspace\` tool, you MUST:
5
+
6
+ **1. Output a \`<thinking>\` block** to justify the reasoning behind each field mapping for every discovered service.
7
+
8
+ **2. Then output a Discovery Summary** with the exact services array you will pass to the tool:
9
+
10
+ \`\`\`json
11
+ [
12
+ {
13
+ "serviceName": "<name>",
14
+ "language": "<language>",
15
+ "framework": "<framework>",
16
+ "testDirectory": "<path>",
17
+ "api": { "schemaPath": "<path-or-url>", "baseUrl": "<url>", "authType": "<type>", "authHeader": "<header>" },
18
+ "runtimeDetails": { "runtime": "<runtime>", "serverStartCommand": "<command>", "dockerNetwork": "<network>" }
19
+ }
20
+ // ... one entry per discovered service
21
+ ]
22
+ \`\`\`
23
+
24
+ ## Step 1 — List ALL Top-Level Directories
25
+
26
+ Run a directory listing of the workspace root. Every top-level directory is a potential service. Common layouts:
27
+
28
+ | Layout | Example dirs | Expect |
29
+ |--------|-------------|--------|
30
+ | Monorepo | apps/web, apps/api, packages/shared | 1 service per app |
31
+ | Microservices | services/auth, services/orders | 1 service per service dir |
32
+ | Single service | src/, lib/ | 1 service (the root) |
33
+
34
+ ## Step 2 — Inspect EVERY Candidate Directory
35
+
36
+ For **each** top-level directory, check for service indicator files:
37
+
38
+ **Language indicators** (presence of ANY = independent service):
39
+ - package.json → typescript / javascript
40
+ - requirements.txt, pyproject.toml, Pipfile → python
41
+ - pom.xml, build.gradle → java
42
+
43
+ **Test framework** (look inside the service dir):
44
+ - playwright.config.* → playwright
45
+ - pytest.ini, conftest.py, pyproject.toml [tool.pytest] → pytest
46
+ - junit in pom.xml → junit
47
+
48
+ **API schemas** (look inside the service dir AND check known framework defaults):
49
+ - openapi.json/yaml, swagger.json/yaml → schema file path
50
+ - FastAPI projects → http://localhost:{port}/openapi.json
51
+ - Express with swagger-ui → http://localhost:{port}/api-docs
52
+ - Spring Boot → http://localhost:{port}/v3/api-docs
53
+ - Always use localhost URLs — NEVER use external or production URLs
54
+
55
+ ## Step 3 — Check Root-Level Runtime Config
56
+
57
+ Inspect the repo root (and subdirectories like .devcontainer/) for shared runtime configuration:
58
+ - docker-compose.yml → extract service names, ports, start commands
59
+ Docker Compose ALWAYS prefixes the network name with "<project-name>_".
60
+ If compose has "networks: { my-net: ... }" → actual network = "<project-name>_my-net".
61
+ If no explicit networks section → default network = "<project-name>_default".
62
+ Project name = basename of the CWD where docker compose runs.
63
+ - Makefile → extract start/dev targets
64
+ - Root package.json scripts → workspace-level commands
65
+
66
+ ## Step 4 — Build the Complete Services Array
67
+
68
+ Create one service entry per deployable unit. You MUST include:
69
+ - Every backend/API service (Python, Java, Go, Node.js)
70
+ - Every frontend service (React, Vue, Angular, Next.js)
71
+ - Set runtime fields from docker-compose.yml if present
72
+
73
+ **Basic fields:**
74
+ - \`serviceName\` *(required)* — unique identifier, e.g. "api-gateway", "user-service"
75
+ - \`language\` — \`python\` | \`typescript\` | \`javascript\` | \`java\`
76
+ Detect from: package.json → typescript/javascript | requirements.txt/pyproject.toml → python | pom.xml/build.gradle → java
77
+ - \`framework\` — \`playwright\` | \`pytest\` | \`robot\` | \`junit\`
78
+ Detect from: pytest.ini/playwright.config/jest.config/junit in pom.xml
79
+ MUST match the language: python → pytest or robot | typescript/javascript → playwright | java → junit
80
+ - \`testDirectory\` — path relative to repo root where tests exist or will be generated; prefer existing test dirs over source dirs, e.g. "tests", "api/tests", "test"
81
+
82
+ **API fields:**
83
+ - \`api.schemaPath\` — path or URL to OpenAPI/Protobuf/GraphQL schema
84
+ Search for: openapi.json, swagger.yaml, *.proto, *.graphql
85
+ Framework defaults: FastAPI → /openapi.json | Express → /api-docs | Spring → /v3/api-docs
86
+ ⚠️ NEVER use external or production URLs — always use localhost.
87
+ - \`api.baseUrl\` *(required)* — local base URL, e.g. "http://localhost:3000"
88
+ Derive from docker-compose ports, app config, or README.
89
+ ⚠️ MUST be a localhost URL. NEVER use external or production URLs.
90
+ - \`api.authType\` — \`bearer\` | \`basic\` | \`oauth\` | \`apiKey\` | \`none\`
91
+ Detect by checking in order:
92
+ 1. Dependencies: \`jsonwebtoken\`/\`passport-jwt\` → \`bearer\` | \`passport-http\` → \`basic\` | \`passport-oauth2\`/\`openid-client\` → \`oauth\`
93
+ 2. Env vars: \`JWT_SECRET\`/\`ACCESS_TOKEN\` → \`bearer\` | \`API_KEY\`/\`X_API_KEY\` → \`apiKey\` | \`CLIENT_ID\`+\`CLIENT_SECRET\` → \`oauth\`
94
+ 3. Middleware/source: \`req.headers.authorization\` + \`Bearer\` → \`bearer\` | custom header check → \`apiKey\`
95
+ 4. Fallback: frontend/UI service → \`none\` | backend API with no signals → \`bearer\`
96
+ - \`api.authHeader\` — header name, e.g. "Authorization" for bearer/basic/oauth, "X-API-Key" for apiKey, "" for none
97
+
98
+ **Runtime fields:**
99
+ - \`runtimeDetails.runtime\` — \`local\` | \`docker\` | \`k8s\`
100
+ Detect per service:
101
+ - Service listed in docker-compose.yml → \`"docker"\`
102
+ - Service has only a Dockerfile (no compose entry) → \`"local"\` or \`"docker"\`
103
+ - k8s manifests exist (charts/, k8s/, deploy/) → \`"k8s"\`
104
+ ⚠️ A repo may have MIXED runtimes — a backend in docker-compose.yml uses "docker" while a frontend run with pnpm/npm locally uses "local". Include ALL services regardless of runtime.
105
+
106
+ - \`runtimeDetails.serverStartCommand\` — command to start the service. MUST match runtime:
107
+ - \`"local"\` → application command: "uvicorn main:app", "npm run dev", "java -jar app.jar"
108
+ - \`"docker"\` → Docker command: "docker compose up -d \<service-name\>" ← prefer service-scoped
109
+ - \`"k8s"\` → k8s command: "kubectl apply -f deploy/", "helm install myrelease ."
110
+ ⚠️ NEVER mix (e.g. "uvicorn …" with runtime "docker" will cause errors).
111
+
112
+ - \`runtimeDetails.dockerNetwork\` — Docker network name. ONLY set when runtime is \`"docker"\`. NEVER set for "local" or "k8s".
113
+ - \`runtimeDetails.k8sNamespace\` — Kubernetes namespace. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
114
+ - \`runtimeDetails.k8sContext\` — Kubernetes context. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
115
+
116
+ ## Verification Steps
117
+
118
+ Before calling \`skyramp_init_workspace\`, confirm all of the following:
119
+ - ALWAYS SCAN REPO AND FIND SERVICES. A REPO SHOULD HAVE AT LEAST ONE SERVICE.
120
+ - **CRITICAL**: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
121
+ - Services NOT in docker-compose.yml (e.g. a frontend run with pnpm/npm locally) MUST still be included with runtime "local".
122
+ - Every service has \`api.baseUrl\` set to a localhost URL — NEVER a production or external URL.
123
+ - \`framework\` matches \`language\` (python → pytest/robot | typescript/javascript → playwright | java → junit)
124
+ - \`serverStartCommand\` matches \`runtime\`
125
+ - For services in docker-compose.yml: runtime MUST be "docker" and command MUST be a docker command (e.g. "docker compose up -d <service-name>").
126
+ - NEVER use application-level commands (uvicorn, npm, node, python, java, etc.) with runtime "docker".
127
+ - \`dockerNetwork\` is set only when runtime is "docker"
128
+ - \`k8sNamespace\` and \`k8sContext\` are set only when runtime is "k8s"
129
+
130
+ Once verified, call \`skyramp_init_workspace\` with:
131
+ - \`workspacePath\`: the repository root path
132
+ - \`services\`: the array built above
133
+ - \`scanToken\`: the token returned by the first call to \`skyramp_init_workspace\` (called with only workspacePath)
134
+ - \`force\`: defaults to false — only set to true if the user explicitly asks to overwrite an existing \`.skyramp/workspace.yml\``;
@@ -58,15 +58,22 @@ ${scannedSection}`;
58
58
  if (inlineMode) {
59
59
  // Testbot inline mode: all maintenance logic lives here so the testbot
60
60
  // prompt only orchestrates steps without duplicating rules.
61
- return `${buildActionDecisionMatrix()}
61
+ return `<drift_analysis_rules>
62
+ You are acting as a Skyramp Integration Architect.
63
+ For this maintenance step: assess each existing test against the diff returned by \`skyramp_analyze_changes\` and apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) directly — no separate analysis step.
64
+
65
+ ${buildActionDecisionMatrix()}
62
66
 
63
67
  ${buildUpdateExecutionRules()}
64
68
 
65
69
  ${buildDriftOutputChecklist(existingTests.length, newEndpointCount, inlineMode)}
66
70
 
67
- **Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.`;
71
+ **Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.
72
+ </drift_analysis_rules>`;
68
73
  }
69
- return `${contextSection}
74
+ return `You are acting as a Skyramp Integration Architect. Your responsibility is to assess each existing test against the branch diff and score it for drift. Apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) based on the scoring guide below.
75
+
76
+ ${contextSection}
70
77
  ${buildDriftScoringGuide()}
71
78
 
72
79
  ${buildActionDecisionMatrix()}
@@ -176,24 +176,24 @@ After completing all assessments above, call \`skyramp_actions\` with \`stateFil
176
176
  const existingTestSection = inlineMode
177
177
  ? `### Existing tests
178
178
  For each existing test reported by \`skyramp_analyze_changes\`:
179
- - **IGNORE/VERIFY tests**: list on a single line: \`<testFile> — IGNORE\` or \`<testFile> — VERIFY (score <N>)\`. Do NOT write detailed rationale.
179
+ - **IGNORE/VERIFY tests**: list on a single line: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY (score {N})\`. Do NOT write detailed rationale.
180
180
  - **UPDATE/REGENERATE/DELETE tests**: output the full block:
181
181
  \`\`\`
182
- Test: <testFile>
183
- Drift Score: <0-100>
184
- Action: <UPDATE | REGENERATE | DELETE>
185
- Rationale: <1-2 sentence explanation>
182
+ Test: {testFile}
183
+ Drift Score: {0-100}
184
+ Action: {UPDATE | REGENERATE | DELETE}
185
+ Rationale: {1-2 sentence explanation}
186
186
  \`\`\`
187
187
  Focus your analysis on tests that need action — do not spend time analyzing unchanged tests.`
188
188
  : `### Existing tests (${existingTestCount} total)
189
189
  For each existing test:
190
- - **IGNORE/VERIFY tests**: list on a single line: \`<testFile> — IGNORE\` or \`<testFile> — VERIFY (score <N>)\`. Do NOT write detailed rationale.
190
+ - **IGNORE/VERIFY tests**: list on a single line: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY (score {N})\`. Do NOT write detailed rationale.
191
191
  - **UPDATE/REGENERATE/DELETE tests**: output the full block:
192
192
  \`\`\`
193
- Test: <testFile>
194
- Drift Score: <0-100>
195
- Action: <UPDATE | REGENERATE | DELETE>
196
- Rationale: <1-2 sentence explanation>
193
+ Test: {testFile}
194
+ Drift Score: {0-100}
195
+ Action: {UPDATE | REGENERATE | DELETE}
196
+ Rationale: {1-2 sentence explanation}
197
197
  \`\`\``;
198
198
  const newEndpointSection = inlineMode
199
199
  ? ""
@@ -201,10 +201,10 @@ Rationale: <1-2 sentence explanation>
201
201
  ? `### New endpoints (${newEndpointCount} detected)
202
202
  For EACH new endpoint, output:
203
203
  \`\`\`
204
- Endpoint: <METHOD> <path>
204
+ Endpoint: {METHOD} {path}
205
205
  Action: ADD
206
- Test types: <contract | integration | smoke | ...>
207
- Rationale: <1 sentence>
206
+ Test types: {contract | integration | smoke | ...}
207
+ Rationale: {1 sentence}
208
208
  \`\`\``
209
209
  : `### New endpoints
210
210
  No new endpoints detected in this diff.`;
@@ -1,27 +1,32 @@
1
+ import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
1
2
  function buildEnrichmentInstructions(p) {
2
- const isDiffScope = p.analysisScope === "current_branch_diff";
3
+ const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
3
4
  const useHealthFlow = p.nextTool === "skyramp_analyze_test_health";
4
5
  if (!isDiffScope) {
5
6
  const nextStep = useHealthFlow
6
7
  ? `### Step 3: Identify tests at risk of drift
7
8
  Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\``
8
- : `### Step 3: Call recommend tests
9
- Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
10
- return `## Your Task — Enrich & Recommend (full repo)
9
+ : `### Step 3: Present the catalog
10
+ The ranked test recommendation catalog is pre-built and shown below (after the separator line).
11
+
12
+ **Your only job is to present it.**
13
+
14
+ 1. Fill in every \`<…from source>\` placeholder using the field names, computed formulas, and auth details you found in Steps 1–2.
15
+ 2. Output the completed catalog **exactly as formatted — grouped by test type (### E2E / ### UI / ### Integration / ### Contract)**. Do NOT restructure, reorder, rename sections, or generate a new format.
16
+ 3. Do NOT call any Skyramp generation tools. The catalog shows ready-to-use tool calls that can be executed on demand.
17
+
18
+ **If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or FK relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
19
+ return `## Your Task — Fill in and Present the Catalog (full repo)
11
20
 
12
21
  ### Step 1: Read key files
13
- Read \`package.json\` / \`requirements.txt\`, \`docker-compose.yml\`, route/controller files,
14
- and model/schema files (Zod schemas, Pydantic models, TypeScript interfaces, DTOs)
15
- to understand the tech stack, endpoint shapes, auth mechanisms, and request/response schemas.
16
-
17
- ### Step 2: Identify resource relationships and parameter locations
18
- Map how endpoints relate to each other which POST creates resources consumed by other endpoints?
19
- **Resolve nested/sub-router paths** from the Router Mounting section above.
20
- **CRITICAL Distinguish query params vs request body:** For each endpoint, determine whether
21
- parameters are sent as URL query params (typical for GET search/filter/list) or request body
22
- (typical for POST/PUT/PATCH). Look at FastAPI \`Query()\` annotations, Express \`req.query\` usage,
23
- Spring \`@RequestParam\`, Flask \`request.args\`, etc. Populate \`queryParams\` in interactions
24
- for GET endpoints that accept search/filter/pagination parameters.
22
+ Read route/controller files and model/schema files (Pydantic models, Zod schemas, DTOs)
23
+ to find: required request body fields, computed response fields and formulas, auth middleware type, storage backend, and how sub-routers are mounted (cross-check against Router Mounting section above).
24
+
25
+ ### Step 2: Map cross-resource relationships and resolve endpoint paths
26
+ (Distinct from Step 1 — Step 1 reads individual schemas; Step 2 maps how endpoints relate to each other.)
27
+ For each endpoint: which POST creates resources consumed by other endpoints?
28
+ **Resolve nested paths** from the Router Mounting section — a router mounted at \`/products/{product_id}/reviews\` means \`GET /\` in that file is actually \`GET /api/v1/products/{product_id}/reviews\`.
29
+ For GET list endpoints: identify query params (\`limit\`, \`offset\`, \`order\`, \`orderBy\`) from framework annotations (FastAPI \`Query()\`, Express \`req.query\`, etc.).
25
30
 
26
31
  ${nextStep}`;
27
32
  }
@@ -67,8 +72,20 @@ Draft multi-step scenarios simulating realistic user workflows:
67
72
  response data verification, actual field names for chaining.
68
73
  **Parameter placement:** GET search/filter endpoints MUST use \`queryParams\`, not \`requestBody\`.
69
74
 
75
+ **No duplicate scenarios.** Each scenario must cover a distinct code path (unique method + path + expected status). Do NOT draft two scenarios that differ only in request body values but hit the same code path (e.g. discount=10% vs discount=25% — both succeed with 200, same logic). A negative-case variant with a different expected status (e.g. discount=-10% → 422) IS a distinct scenario — use a single-step contract test for it (see below).
76
+
77
+ **For each new or modified endpoint, ensure at least one error-path scenario is drafted** — a single-step contract test that triggers a specific error (404 for a missing resource ID, 422 for an invalid field value) that the source code explicitly handles. One auth-boundary scenario (missing auth → 401/403) is enough across all endpoints — do not repeat it per endpoint.
78
+
79
+ **For every scenario you draft, fill \`bugCatchingTarget\`** with the specific formula, constraint, or failure mode the test is designed to expose. Examples:
80
+ - \`"discount formula: total_amount = subtotal * (1 - discount_value / 100) — wrong if addition is used instead of subtraction"\`
81
+ - \`"items not recalculated after PATCH — total_amount stays at old value if collection update is ignored"\`
82
+ - \`"missing 404 guard on resource ID — returns 500 instead of 404 for unknown IDs"\`
83
+ This field is used at test generation time to compute exact assertion values. Leave it empty only if no specific formula or constraint applies.
84
+
70
85
  ### Step 4: Call recommend tests
71
- Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
86
+ Call \`skyramp_recommend_tests\` with:
87
+ - \`stateFile: "${p.stateFile}"\`
88
+ - \`enrichedScenarios\`: (optional) JSON array of your Step 3 scenarios — see the tool's inputSchema for the exact shape. Your enriched scenarios override server-side ones with the same \`scenarioName\` and are prioritized in ranking. Omit if you drafted nothing in Step 3.`;
72
89
  return `## Your Task — Enrich & Recommend (PR-scoped)
73
90
 
74
91
  ### Step 1: Read the changed files
@@ -81,39 +98,19 @@ ${criticalPatternStep}
81
98
  ${step3Content}`;
82
99
  }
83
100
  export function buildAnalysisOutputText(p) {
84
- const isDiffScope = p.analysisScope === "current_branch_diff";
85
- const diffSection = p.parsedDiff
101
+ const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
102
+ // Router mounting context is unique to this prompt (not in recommendationPrompt).
103
+ // Branch diff, endpoint catalog, auth config, and OpenAPI spec are omitted here
104
+ // because they are already present in the recommendation prompt that is
105
+ // concatenated in the same tool response.
106
+ const routerSection = !p.wsSchemaPath && p.routerMountContext
86
107
  ? `
87
- ## Branch Diff Context
88
- **Branch**: \`${p.parsedDiff.currentBranch}\` → base: \`${p.parsedDiff.baseBranch}\`
89
- **Changed Files** (${p.parsedDiff.changedFiles.length}): ${p.parsedDiff.changedFiles.join(", ")}
90
- **New Endpoints** (${p.parsedDiff.newEndpoints.length}): ${p.parsedDiff.newEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
91
- **Modified Endpoints** (${p.parsedDiff.modifiedEndpoints.length}): ${p.parsedDiff.modifiedEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
92
- **Affected Services**: ${p.parsedDiff.affectedServices.join(", ") || "none"}
93
- `
94
- : "";
95
- const endpointCatalog = p.scannedEndpoints.length > 0
96
- ? `
97
- ## Pre-Scanned Endpoint Catalog (${p.scannedEndpoints.length} routes)
98
- ${p.scannedEndpoints.map((ep) => ` ${ep.methods.join("|")} ${ep.path} (${ep.sourceFile})`).join("\n")}
99
- `
100
- : "";
101
- const wsLine = p.wsBaseUrl
102
- ? `**Base URL**: \`${p.wsBaseUrl}\`${p.wsAuthHeader ? ` | **Auth header**: \`${p.wsAuthHeader}\`` : ""}${p.wsAuthType ? ` | **Auth type**: \`${p.wsAuthType}\`` : ""}`
103
- : "";
104
- const specSection = p.wsSchemaPath
105
- ? `
106
- ## OpenAPI Spec Available
107
- Spec at \`${p.wsSchemaPath}\`. **Read it** for authoritative paths and schemas.
108
- Pass \`apiSchema: "${p.wsSchemaPath}"\` to ALL test generation tool calls.`
109
- : p.routerMountContext
110
- ? `
111
108
  ## Router Mounting / Nesting
112
109
  \`\`\`
113
110
  ${p.routerMountContext}
114
111
  \`\`\`
115
112
  Use this to resolve full URL paths for nested endpoints.`
116
- : "";
113
+ : "";
117
114
  const enrichment = buildEnrichmentInstructions(p);
118
115
  return `# Repository Analysis
119
116
 
@@ -121,12 +118,7 @@ Use this to resolve full URL paths for nested endpoints.`
121
118
  **Repository**: \`${p.repositoryPath}\`
122
119
  **Analysis Scope**: \`${p.analysisScope}\`
123
120
  ${isDiffScope ? `**Diff endpoints**: ${(p.parsedDiff?.newEndpoints.length ?? 0) + (p.parsedDiff?.modifiedEndpoints.length ?? 0)}` : `**Pre-scanned endpoints**: ${p.scannedEndpoints.length}`}
124
- ${wsLine}
125
- ${p.wsSchemaPath ? `**OpenAPI Spec**: \`${p.wsSchemaPath}\` (spec-based flow)` : "**Flow**: Code-scanning (may miss nesting)"}
126
-
127
- ${diffSection}
128
- ${endpointCatalog}
129
- ${specSection}
121
+ ${routerSection}
130
122
  ${enrichment}
131
123
 
132
124
  **CRITICAL**: No .json/.md file creation. Prioritize cross-resource workflows.`;
@@ -0,0 +1,125 @@
1
+ jest.mock("@skyramp/skyramp", () => ({ Skyramp: class {
2
+ } }));
3
+ import { mergeEnrichedScenarios } from "./registerRecommendTestsPrompt.js";
4
+ import { ScenarioSource } from "../../types/RepositoryAnalysis.js";
5
+ import { TestType } from "../../types/TestTypes.js";
6
+ function makeScenario(overrides = {}) {
7
+ return {
8
+ scenarioName: "base-scenario",
9
+ description: "base",
10
+ category: "crud",
11
+ priority: "medium",
12
+ steps: [{ order: 1, method: "GET", path: "/api/items", description: "list", interactionType: "success", expectedStatusCode: 200 }],
13
+ chainingKeys: [],
14
+ requiresAuth: true,
15
+ estimatedComplexity: "simple",
16
+ source: ScenarioSource.CodeInferred,
17
+ testType: TestType.CONTRACT,
18
+ ...overrides,
19
+ };
20
+ }
21
+ const VALID_STEP = { order: 1, method: "post", path: "/api/orders", expectedStatusCode: 201 };
22
+ describe("mergeEnrichedScenarios — happy path", () => {
23
+ it("merges a valid agent scenario into server scenarios", () => {
24
+ const server = [makeScenario({ scenarioName: "existing" })];
25
+ const raw = JSON.stringify([{
26
+ scenarioName: "new-orders-flow",
27
+ category: "business_rule",
28
+ steps: [VALID_STEP],
29
+ }]);
30
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, raw);
31
+ expect(rejectionNotes).toHaveLength(0);
32
+ expect(scenarios.find(s => s.scenarioName === "new-orders-flow")).toBeDefined();
33
+ expect(scenarios.find(s => s.scenarioName === "existing")).toBeDefined();
34
+ expect(scenarios).toHaveLength(2);
35
+ });
36
+ it("overrides a server scenario when agent provides same scenarioName", () => {
37
+ const server = [makeScenario({ scenarioName: "orders-flow", description: "server version" })];
38
+ const raw = JSON.stringify([{
39
+ scenarioName: "orders-flow",
40
+ category: "business_rule",
41
+ description: "agent version",
42
+ steps: [VALID_STEP],
43
+ }]);
44
+ const { scenarios } = mergeEnrichedScenarios(server, raw);
45
+ expect(scenarios).toHaveLength(1);
46
+ expect(scenarios[0].description).toBe("agent version");
47
+ expect(scenarios[0].source).toBe("agent-enriched");
48
+ });
49
+ it("normalizes method to uppercase", () => {
50
+ const raw = JSON.stringify([{
51
+ scenarioName: "uppercase-test",
52
+ category: "crud",
53
+ steps: [{ order: 1, method: "post", path: "/api/items", expectedStatusCode: 201 }],
54
+ }]);
55
+ const { scenarios } = mergeEnrichedScenarios([], raw);
56
+ expect(scenarios[0].steps[0].method).toBe("POST");
57
+ });
58
+ it("preserves bugCatchingTarget when provided", () => {
59
+ const raw = JSON.stringify([{
60
+ scenarioName: "formula-test",
61
+ category: "business_rule",
62
+ bugCatchingTarget: "total = price * qty",
63
+ steps: [VALID_STEP],
64
+ }]);
65
+ const { scenarios } = mergeEnrichedScenarios([], raw);
66
+ expect(scenarios[0].bugCatchingTarget).toBe("total = price * qty");
67
+ });
68
+ it("falls back to server scenarios on empty agent array", () => {
69
+ const server = [makeScenario({ scenarioName: "server-only" })];
70
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, "[]");
71
+ // Empty array → no agent scenarios, return server ones unchanged
72
+ expect(scenarios).toEqual(server);
73
+ expect(rejectionNotes).toHaveLength(0);
74
+ });
75
+ });
76
+ describe("mergeEnrichedScenarios — rejection cases", () => {
77
+ it("rejects scenario with missing scenarioName", () => {
78
+ const raw = JSON.stringify([{ category: "crud", steps: [VALID_STEP] }]);
79
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios([], raw);
80
+ expect(scenarios).toHaveLength(0);
81
+ expect(rejectionNotes[0]).toMatch(/missing scenarioName/);
82
+ });
83
+ it("rejects scenario with missing steps array", () => {
84
+ const raw = JSON.stringify([{ scenarioName: "no-steps", category: "crud" }]);
85
+ const { rejectionNotes } = mergeEnrichedScenarios([], raw);
86
+ expect(rejectionNotes[0]).toMatch(/missing or empty steps/);
87
+ });
88
+ it("rejects scenario with empty steps array", () => {
89
+ const raw = JSON.stringify([{ scenarioName: "empty-steps", category: "crud", steps: [] }]);
90
+ const { rejectionNotes } = mergeEnrichedScenarios([], raw);
91
+ expect(rejectionNotes[0]).toMatch(/missing or empty steps/);
92
+ });
93
+ it("rejects scenario with missing category", () => {
94
+ const raw = JSON.stringify([{ scenarioName: "no-cat", steps: [VALID_STEP] }]);
95
+ const { rejectionNotes } = mergeEnrichedScenarios([], raw);
96
+ expect(rejectionNotes[0]).toMatch(/missing category/);
97
+ });
98
+ it("rejects scenario with unknown category", () => {
99
+ const raw = JSON.stringify([{ scenarioName: "bad-cat", category: "not_a_real_category", steps: [VALID_STEP] }]);
100
+ const { rejectionNotes } = mergeEnrichedScenarios([], raw);
101
+ expect(rejectionNotes[0]).toMatch(/unknown category/);
102
+ });
103
+ it("falls back to server scenarios on invalid JSON", () => {
104
+ const server = [makeScenario()];
105
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, "{ bad json");
106
+ expect(scenarios).toEqual(server);
107
+ expect(rejectionNotes[0]).toMatch(/invalid JSON/);
108
+ });
109
+ it("falls back to server scenarios when JSON is not an array", () => {
110
+ const server = [makeScenario()];
111
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, JSON.stringify({ not: "array" }));
112
+ expect(scenarios).toEqual(server);
113
+ expect(rejectionNotes[0]).toMatch(/expected a JSON array/);
114
+ });
115
+ it("accepts valid scenarios and rejects invalid ones in the same batch", () => {
116
+ const raw = JSON.stringify([
117
+ { scenarioName: "valid-one", category: "crud", steps: [VALID_STEP] },
118
+ { category: "crud", steps: [VALID_STEP] }, // missing scenarioName
119
+ ]);
120
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios([], raw);
121
+ expect(scenarios).toHaveLength(1);
122
+ expect(scenarios[0].scenarioName).toBe("valid-one");
123
+ expect(rejectionNotes).toHaveLength(1);
124
+ });
125
+ });