@skyramp/mcp 0.1.0-rc.2 → 0.1.0-rc.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +17 -68
- package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +134 -0
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +10 -3
- package/build/prompts/test-maintenance/driftAnalysisSections.js +13 -13
- package/build/prompts/test-recommendation/recommendationSections.js +14 -25
- package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +1 -3
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +46 -59
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +2 -2
- package/build/prompts/testbot/testbot-prompts.js +10 -10
- package/build/prompts/testbot/testbot-prompts.test.js +29 -0
- package/build/services/TestExecutionService.js +2 -12
- package/build/tool-phases.js +2 -2
- package/build/tools/generate-tests/generateBatchScenarioRestTool.js +26 -20
- package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +88 -0
- package/build/tools/generate-tests/generateContractRestTool.js +5 -1
- package/build/tools/generate-tests/generateIntegrationRestTool.js +13 -4
- package/build/tools/submitReportTool.js +13 -4
- package/build/tools/submitReportTool.test.js +84 -6
- package/build/tools/workspace/initScanWorkspaceTool.js +76 -0
- package/build/tools/workspace/initializeWorkspaceTool.js +39 -119
- package/build/utils/docker.js +118 -0
- package/build/utils/docker.test.js +113 -0
- package/build/utils/initAgent.js +75 -13
- package/build/utils/skyrampMdContent.js +0 -1
- package/build/utils/versions.js +3 -0
- package/package.json +1 -1
- package/build/prompts/testGenerationPrompt.js +0 -207
- package/build/prompts/testHealthPrompt.js +0 -85
- package/build/services/DriftAnalysisService.js +0 -1075
- package/build/services/DriftAnalysisService.test.js +0 -168
- package/build/tools/generate-tests/generateScenarioRestTool.js +0 -131
package/build/index.js
CHANGED
|
@@ -2,11 +2,9 @@
|
|
|
2
2
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
3
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
4
|
import { registerStartTraceCollectionPrompt } from "./prompts/startTraceCollectionPrompts.js";
|
|
5
|
-
import { registerTestHealthPrompt } from "./prompts/testHealthPrompt.js";
|
|
6
5
|
import { registerTraceTool } from "./tools/trace/startTraceCollectionTool.js";
|
|
7
6
|
import { registerTraceStopTool } from "./tools/trace/stopTraceCollectionTool.js";
|
|
8
7
|
import { registerExecuteSkyrampTestTool } from "./tools/executeSkyrampTestTool.js";
|
|
9
|
-
import { registerTestGenerationPrompt } from "./prompts/testGenerationPrompt.js";
|
|
10
8
|
import { AUTH_PLACEHOLDER_TOKEN } from "./types/TestTypes.js";
|
|
11
9
|
import { logger } from "./utils/logger.js";
|
|
12
10
|
import { registerUITestTool } from "./tools/generate-tests/generateUIRestTool.js";
|
|
@@ -22,18 +20,18 @@ import { registerFixErrorTool } from "./tools/fixErrorTool.js";
|
|
|
22
20
|
import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
|
|
23
21
|
import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
|
|
24
22
|
import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
|
|
25
|
-
import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
|
|
26
23
|
import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
|
|
27
24
|
import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
|
|
28
25
|
import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerExecuteTestsTool, registerActionsTool, registerStateCleanupTool, } from "./tools/test-management/index.js";
|
|
29
26
|
import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
|
|
30
27
|
import { registerSubmitReportTool } from "./tools/submitReportTool.js";
|
|
31
28
|
import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
|
|
29
|
+
import { registerInitScanWorkspaceTool } from "./tools/workspace/initScanWorkspaceTool.js";
|
|
32
30
|
import { registerOneClickTool } from "./tools/one-click/oneClickTool.js";
|
|
33
31
|
import { registerAnalysisResources } from "./resources/analysisResources.js";
|
|
34
32
|
import { registerProgressResource } from "./resources/progressResource.js";
|
|
35
33
|
import { AnalyticsService } from "./services/AnalyticsService.js";
|
|
36
|
-
import {
|
|
34
|
+
import { registerInitTriggerOnMCPInitialized } from "./utils/initAgent.js";
|
|
37
35
|
import { registerPlaywrightTools, registerTraceRecordingPrompt, getPlaywrightTraceService, } from "./playwright/index.js";
|
|
38
36
|
const server = new McpServer({
|
|
39
37
|
name: "Skyramp MCP Server",
|
|
@@ -52,6 +50,13 @@ const server = new McpServer({
|
|
|
52
50
|
},
|
|
53
51
|
instructions: `Skyramp MCP Server — generates and executes API tests (fuzz, contract, integration, E2E, UI).
|
|
54
52
|
|
|
53
|
+
## Workspace Initialization (REQUIRED before the first Skyramp tool call)
|
|
54
|
+
If the workspace root is a git repo AND \`.skyramp/workspace.yml\` does not exist:
|
|
55
|
+
1. Call \`skyramp_init_scan\` with \`workspacePath\` → follow the returned instructions to discover all services.
|
|
56
|
+
2. Call \`skyramp_init_workspace\` with \`workspacePath\`, \`services\`, and the \`scanToken\` from step 1.
|
|
57
|
+
3. Proceed with the originally requested tool.
|
|
58
|
+
Skip only if: not a git repo, \`.skyramp/workspace.yml\` already exists, or user explicitly declines.
|
|
59
|
+
|
|
55
60
|
## Rules
|
|
56
61
|
- NEVER show CLI commands. ALWAYS use the MCP tools provided.
|
|
57
62
|
- For UI and E2E tests, there are TWO recording modes:
|
|
@@ -81,23 +86,6 @@ After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use
|
|
|
81
86
|
- \`skyramp://analysis/{sessionId}/scenarios\` — drafted scenarios
|
|
82
87
|
- \`skyramp://analysis/{sessionId}/diff\` — branch diff context
|
|
83
88
|
|
|
84
|
-
## Workspace Initialization (before ANY other Skyramp tool)
|
|
85
|
-
Follow this flow EVERY time before calling any Skyramp tool:
|
|
86
|
-
|
|
87
|
-
1. **Check**: Is the workspace root a git repository? (i.e. does a \`.git\` directory exist at the root?)
|
|
88
|
-
- **If NO** → it is a non-git repo. Do NOT call \`skyramp_initialize_workspace\`. Proceed directly with the requested tool. STOP — do not continue to step 2.
|
|
89
|
-
- **If YES** → it is a git repo. Continue to step 2.
|
|
90
|
-
2. **Check**: Does .skyramp/workspace.yml exist at the workspace root?
|
|
91
|
-
- **If YES** → workspace is already initialized. Proceed with the requested tool. STOP here.
|
|
92
|
-
- **If NO** → you MUST call \`skyramp_initialize_workspace\` BEFORE doing anything else.
|
|
93
|
-
- Do NOT skip this step. Do NOT proceed to the requested tool first.
|
|
94
|
-
- Scan the repo for ALL services (see the tool description for detailed steps).
|
|
95
|
-
- A fullstack or monorepo MUST produce multiple services — never just one.
|
|
96
|
-
- After workspace init completes, THEN proceed with the originally requested tool.
|
|
97
|
-
3. **ONLY skip init in these two cases: non-git repo (step 1) or explicit user decline** (i.e. user EXPLICITLY says "no", "skip", "don't create workspace", or similar).
|
|
98
|
-
- A request like "execute tests" or "generate tests" is NOT a signal to skip init.
|
|
99
|
-
- If the user does decline, respect it — do NOT ask again, and proceed with the requested tool.
|
|
100
|
-
|
|
101
89
|
## Workspace Defaults for Test Generation (MANDATORY)
|
|
102
90
|
Before calling ANY test generation tool, you MUST follow this flow:
|
|
103
91
|
|
|
@@ -105,7 +93,7 @@ Before calling ANY test generation tool, you MUST follow this flow:
|
|
|
105
93
|
2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the services section.
|
|
106
94
|
3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
|
|
107
95
|
4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
|
|
108
|
-
5. **CRITICAL — scenario generation**: When calling \`
|
|
96
|
+
5. **CRITICAL — scenario generation**: When calling \`skyramp_batch_scenario_test_generation\`, ALWAYS pass:
|
|
109
97
|
- \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
|
|
110
98
|
- \`authHeader\`: Which HTTP header carries the auth credential. Get it from \`api.authHeader\` in workspace config. Examples: \`Authorization\` (Bearer/Token auth), \`X-Api-Key\` (API key auth), \`Cookie\` (session/cookie auth like NextAuth). Pass \`""\` to skip auth entirely (unauthenticated endpoints or \`api.authType: "none"\`).
|
|
111
99
|
- \`authScheme\`: Only when \`authHeader\` is \`Authorization\`. The prefix before the token (e.g., \`"Bearer"\` → \`Authorization: Bearer <token>\`). **Derive from**: (1) OpenAPI spec \`securitySchemes\`/\`securityDefinitions\`, (2) source code auth middleware, (3) workspace \`api.authType\`. **Do NOT guess.**
|
|
@@ -118,54 +106,10 @@ Before calling ANY test generation tool, you MUST follow this flow:
|
|
|
118
106
|
8. The user can always override workspace defaults by explicitly specifying values in their request.
|
|
119
107
|
`,
|
|
120
108
|
});
|
|
121
|
-
// Check for first-time invocation after version update (runs in background, doesn't block)
|
|
122
|
-
let initCheckInFlight = false;
|
|
123
|
-
let initCheckDone = false;
|
|
124
|
-
const INIT_MESSAGE = "Skyramp init: Triggering pull of Skyramp worker and executor images if not present locally.";
|
|
125
|
-
const originalRegisterTool = server.registerTool.bind(server);
|
|
126
|
-
server.registerTool = function (name, definition, handler) {
|
|
127
|
-
const wrappedHandler = async (...args) => {
|
|
128
|
-
let triggeredInitThisCall = false;
|
|
129
|
-
if (!initCheckDone && !initCheckInFlight) {
|
|
130
|
-
// Guard with inFlight so concurrent tool calls don't each spawn a new initCheck(),
|
|
131
|
-
// but allow retry on failure (initCheckInFlight is reset to false on error).
|
|
132
|
-
// SkyrampClient constructor calls checkForUpdate("npm") via synchronous koffi FFI,
|
|
133
|
-
// which can block the event loop for up to 60 s if the update-check server is
|
|
134
|
-
// unreachable. Deferring via setImmediate ensures the tool response is written to
|
|
135
|
-
// stdout (and acknowledged by the MCP client) before any blocking FFI call runs.
|
|
136
|
-
initCheckInFlight = true;
|
|
137
|
-
triggeredInitThisCall = true;
|
|
138
|
-
setImmediate(() => {
|
|
139
|
-
initCheck()
|
|
140
|
-
.then(() => {
|
|
141
|
-
initCheckDone = true;
|
|
142
|
-
})
|
|
143
|
-
.catch((err) => {
|
|
144
|
-
logger.error("Background initialization check failed", { error: err });
|
|
145
|
-
})
|
|
146
|
-
.finally(() => {
|
|
147
|
-
initCheckInFlight = false;
|
|
148
|
-
});
|
|
149
|
-
});
|
|
150
|
-
}
|
|
151
|
-
const result = await handler(...args);
|
|
152
|
-
if (triggeredInitThisCall && result) {
|
|
153
|
-
const content = result.content ?? [];
|
|
154
|
-
result.content = [
|
|
155
|
-
{ type: "text", text: INIT_MESSAGE },
|
|
156
|
-
...content,
|
|
157
|
-
];
|
|
158
|
-
}
|
|
159
|
-
return result;
|
|
160
|
-
};
|
|
161
|
-
return originalRegisterTool(name, definition, wrappedHandler);
|
|
162
|
-
};
|
|
163
109
|
// Register prompts
|
|
164
110
|
logger.info("Starting prompt registration process");
|
|
165
111
|
const prompts = [
|
|
166
|
-
registerTestGenerationPrompt,
|
|
167
112
|
registerStartTraceCollectionPrompt,
|
|
168
|
-
registerTestHealthPrompt,
|
|
169
113
|
registerRecommendTestsPrompt,
|
|
170
114
|
registerTraceRecordingPrompt,
|
|
171
115
|
];
|
|
@@ -185,7 +129,7 @@ const testGenerationTools = [
|
|
|
185
129
|
registerIntegrationTestTool,
|
|
186
130
|
registerE2ETestTool,
|
|
187
131
|
registerUITestTool,
|
|
188
|
-
|
|
132
|
+
registerBatchScenarioTestTool,
|
|
189
133
|
registerMockTool,
|
|
190
134
|
];
|
|
191
135
|
testGenerationTools.forEach((registerTool) => registerTool(server));
|
|
@@ -206,6 +150,7 @@ registerExecuteTestsTool(server);
|
|
|
206
150
|
registerActionsTool(server);
|
|
207
151
|
registerStateCleanupTool(server);
|
|
208
152
|
// Register workspace management tools
|
|
153
|
+
registerInitScanWorkspaceTool(server);
|
|
209
154
|
registerInitializeWorkspaceTool(server);
|
|
210
155
|
// Register one-click orchestrated workflows
|
|
211
156
|
registerOneClickTool(server);
|
|
@@ -219,7 +164,6 @@ const infrastructureTools = [
|
|
|
219
164
|
];
|
|
220
165
|
if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
|
|
221
166
|
infrastructureTools.push(registerSubmitReportTool);
|
|
222
|
-
registerBatchScenarioTestTool(server);
|
|
223
167
|
logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
|
|
224
168
|
}
|
|
225
169
|
infrastructureTools.forEach((registerTool) => registerTool(server));
|
|
@@ -249,6 +193,11 @@ process.on("uncaughtException", async (error) => {
|
|
|
249
193
|
// Start MCP server
|
|
250
194
|
async function main() {
|
|
251
195
|
const transport = new StdioServerTransport();
|
|
196
|
+
server.server.oninitialized = () => {
|
|
197
|
+
registerInitTriggerOnMCPInitialized().catch((err) => {
|
|
198
|
+
logger.error("Failed to run MCP initialized trigger", { error: err });
|
|
199
|
+
});
|
|
200
|
+
};
|
|
252
201
|
await server.connect(transport);
|
|
253
202
|
logger.info("MCP Server started successfully");
|
|
254
203
|
// Listen for stdin closure (parent process disconnected)
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import { getPersonaPrefix } from "../architectPersona.js";
|
|
2
|
+
export const INIT_WORKSPACE_INSTRUCTIONS = `${getPersonaPrefix()}Your task is to scan this repository, discover ALL services, and call the \`skyramp_init_workspace\` tool with the discovered services array and the scanToken.
|
|
3
|
+
|
|
4
|
+
After scanning the workspace, before calling the \`skyramp_init_workspace\` tool, you MUST:
|
|
5
|
+
|
|
6
|
+
**1. Output a \`<thinking>\` block** to justify the reasoning behind each field mapping for every discovered service.
|
|
7
|
+
|
|
8
|
+
**2. Then output a Discovery Summary** with the exact services array you will pass to the tool:
|
|
9
|
+
|
|
10
|
+
\`\`\`json
|
|
11
|
+
[
|
|
12
|
+
{
|
|
13
|
+
"serviceName": "<name>",
|
|
14
|
+
"language": "<language>",
|
|
15
|
+
"framework": "<framework>",
|
|
16
|
+
"testDirectory": "<path>",
|
|
17
|
+
"api": { "schemaPath": "<path-or-url>", "baseUrl": "<url>", "authType": "<type>", "authHeader": "<header>" },
|
|
18
|
+
"runtimeDetails": { "runtime": "<runtime>", "serverStartCommand": "<command>", "dockerNetwork": "<network>" }
|
|
19
|
+
}
|
|
20
|
+
// ... one entry per discovered service
|
|
21
|
+
]
|
|
22
|
+
\`\`\`
|
|
23
|
+
|
|
24
|
+
## Step 1 — List ALL Top-Level Directories
|
|
25
|
+
|
|
26
|
+
Run a directory listing of the workspace root. Every top-level directory is a potential service. Common layouts:
|
|
27
|
+
|
|
28
|
+
| Layout | Example dirs | Expect |
|
|
29
|
+
|--------|-------------|--------|
|
|
30
|
+
| Monorepo | apps/web, apps/api, packages/shared | 1 service per app |
|
|
31
|
+
| Microservices | services/auth, services/orders | 1 service per service dir |
|
|
32
|
+
| Single service | src/, lib/ | 1 service (the root) |
|
|
33
|
+
|
|
34
|
+
## Step 2 — Inspect EVERY Candidate Directory
|
|
35
|
+
|
|
36
|
+
For **each** top-level directory, check for service indicator files:
|
|
37
|
+
|
|
38
|
+
**Language indicators** (presence of ANY = independent service):
|
|
39
|
+
- package.json → typescript / javascript
|
|
40
|
+
- requirements.txt, pyproject.toml, Pipfile → python
|
|
41
|
+
- pom.xml, build.gradle → java
|
|
42
|
+
|
|
43
|
+
**Test framework** (look inside the service dir):
|
|
44
|
+
- playwright.config.* → playwright
|
|
45
|
+
- pytest.ini, conftest.py, pyproject.toml [tool.pytest] → pytest
|
|
46
|
+
- junit in pom.xml → junit
|
|
47
|
+
|
|
48
|
+
**API schemas** (look inside the service dir AND check known framework defaults):
|
|
49
|
+
- openapi.json/yaml, swagger.json/yaml → schema file path
|
|
50
|
+
- FastAPI projects → http://localhost:{port}/openapi.json
|
|
51
|
+
- Express with swagger-ui → http://localhost:{port}/api-docs
|
|
52
|
+
- Spring Boot → http://localhost:{port}/v3/api-docs
|
|
53
|
+
- Always use localhost URLs — NEVER use external or production URLs
|
|
54
|
+
|
|
55
|
+
## Step 3 — Check Root-Level Runtime Config
|
|
56
|
+
|
|
57
|
+
Inspect the repo root (and subdirectories like .devcontainer/) for shared runtime configuration:
|
|
58
|
+
- docker-compose.yml → extract service names, ports, start commands
|
|
59
|
+
Docker Compose ALWAYS prefixes the network name with "<project-name>_".
|
|
60
|
+
If compose has "networks: { my-net: ... }" → actual network = "<project-name>_my-net".
|
|
61
|
+
If no explicit networks section → default network = "<project-name>_default".
|
|
62
|
+
Project name = basename of the CWD where docker compose runs.
|
|
63
|
+
- Makefile → extract start/dev targets
|
|
64
|
+
- Root package.json scripts → workspace-level commands
|
|
65
|
+
|
|
66
|
+
## Step 4 — Build the Complete Services Array
|
|
67
|
+
|
|
68
|
+
Create one service entry per deployable unit. You MUST include:
|
|
69
|
+
- Every backend/API service (Python, Java, Go, Node.js)
|
|
70
|
+
- Every frontend service (React, Vue, Angular, Next.js)
|
|
71
|
+
- Set runtime fields from docker-compose.yml if present
|
|
72
|
+
|
|
73
|
+
**Basic fields:**
|
|
74
|
+
- \`serviceName\` *(required)* — unique identifier, e.g. "api-gateway", "user-service"
|
|
75
|
+
- \`language\` — \`python\` | \`typescript\` | \`javascript\` | \`java\`
|
|
76
|
+
Detect from: package.json → typescript/javascript | requirements.txt/pyproject.toml → python | pom.xml/build.gradle → java
|
|
77
|
+
- \`framework\` — \`playwright\` | \`pytest\` | \`robot\` | \`junit\`
|
|
78
|
+
Detect from: pytest.ini/playwright.config/jest.config/junit in pom.xml
|
|
79
|
+
MUST match the language: python → pytest or robot | typescript/javascript → playwright | java → junit
|
|
80
|
+
- \`testDirectory\` — path relative to repo root where tests exist or will be generated; prefer existing test dirs over source dirs, e.g. "tests", "api/tests", "test"
|
|
81
|
+
|
|
82
|
+
**API fields:**
|
|
83
|
+
- \`api.schemaPath\` — path or URL to OpenAPI/Protobuf/GraphQL schema
|
|
84
|
+
Search for: openapi.json, swagger.yaml, *.proto, *.graphql
|
|
85
|
+
Framework defaults: FastAPI → /openapi.json | Express → /api-docs | Spring → /v3/api-docs
|
|
86
|
+
⚠️ NEVER use external or production URLs — always use localhost.
|
|
87
|
+
- \`api.baseUrl\` *(required)* — local base URL, e.g. "http://localhost:3000"
|
|
88
|
+
Derive from docker-compose ports, app config, or README.
|
|
89
|
+
⚠️ MUST be a localhost URL. NEVER use external or production URLs.
|
|
90
|
+
- \`api.authType\` — \`bearer\` | \`basic\` | \`oauth\` | \`apiKey\` | \`none\`
|
|
91
|
+
Detect by checking in order:
|
|
92
|
+
1. Dependencies: \`jsonwebtoken\`/\`passport-jwt\` → \`bearer\` | \`passport-http\` → \`basic\` | \`passport-oauth2\`/\`openid-client\` → \`oauth\`
|
|
93
|
+
2. Env vars: \`JWT_SECRET\`/\`ACCESS_TOKEN\` → \`bearer\` | \`API_KEY\`/\`X_API_KEY\` → \`apiKey\` | \`CLIENT_ID\`+\`CLIENT_SECRET\` → \`oauth\`
|
|
94
|
+
3. Middleware/source: \`req.headers.authorization\` + \`Bearer\` → \`bearer\` | custom header check → \`apiKey\`
|
|
95
|
+
4. Fallback: frontend/UI service → \`none\` | backend API with no signals → \`bearer\`
|
|
96
|
+
- \`api.authHeader\` — header name, e.g. "Authorization" for bearer/basic/oauth, "X-API-Key" for apiKey, "" for none
|
|
97
|
+
|
|
98
|
+
**Runtime fields:**
|
|
99
|
+
- \`runtimeDetails.runtime\` — \`local\` | \`docker\` | \`k8s\`
|
|
100
|
+
Detect per service:
|
|
101
|
+
- Service listed in docker-compose.yml → \`"docker"\`
|
|
102
|
+
- Service has only a Dockerfile (no compose entry) → \`"local"\` or \`"docker"\`
|
|
103
|
+
- k8s manifests exist (charts/, k8s/, deploy/) → \`"k8s"\`
|
|
104
|
+
⚠️ A repo may have MIXED runtimes — a backend in docker-compose.yml uses "docker" while a frontend run with pnpm/npm locally uses "local". Include ALL services regardless of runtime.
|
|
105
|
+
|
|
106
|
+
- \`runtimeDetails.serverStartCommand\` — command to start the service. MUST match runtime:
|
|
107
|
+
- \`"local"\` → application command: "uvicorn main:app", "npm run dev", "java -jar app.jar"
|
|
108
|
+
- \`"docker"\` → Docker command: "docker compose up -d \<service-name\>" ← prefer service-scoped
|
|
109
|
+
- \`"k8s"\` → k8s command: "kubectl apply -f deploy/", "helm install myrelease ."
|
|
110
|
+
⚠️ NEVER mix (e.g. "uvicorn …" with runtime "docker" will cause errors).
|
|
111
|
+
|
|
112
|
+
- \`runtimeDetails.dockerNetwork\` — Docker network name. ONLY set when runtime is \`"docker"\`. NEVER set for "local" or "k8s".
|
|
113
|
+
- \`runtimeDetails.k8sNamespace\` — Kubernetes namespace. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
|
|
114
|
+
- \`runtimeDetails.k8sContext\` — Kubernetes context. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
|
|
115
|
+
|
|
116
|
+
## Verification Steps
|
|
117
|
+
|
|
118
|
+
Before calling \`skyramp_init_workspace\`, confirm all of the following:
|
|
119
|
+
- ALWAYS SCAN REPO AND FIND SERVICES. A REPO SHOULD HAVE AT LEAST ONE SERVICE.
|
|
120
|
+
- **CRITICAL**: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
|
|
121
|
+
- Services NOT in docker-compose.yml (e.g. a frontend run with pnpm/npm locally) MUST still be included with runtime "local".
|
|
122
|
+
- Every service has \`api.baseUrl\` set to a localhost URL — NEVER a production or external URL.
|
|
123
|
+
- \`framework\` matches \`language\` (python → pytest/robot | typescript/javascript → playwright | java → junit)
|
|
124
|
+
- \`serverStartCommand\` matches \`runtime\`
|
|
125
|
+
- For services in docker-compose.yml: runtime MUST be "docker" and command MUST be a docker command (e.g. "docker compose up -d <service-name>").
|
|
126
|
+
- NEVER use application-level commands (uvicorn, npm, node, python, java, etc.) with runtime "docker".
|
|
127
|
+
- \`dockerNetwork\` is set only when runtime is "docker"
|
|
128
|
+
- \`k8sNamespace\` and \`k8sContext\` are set only when runtime is "k8s"
|
|
129
|
+
|
|
130
|
+
Once verified, call \`skyramp_init_workspace\` with:
|
|
131
|
+
- \`workspacePath\`: the repository root path
|
|
132
|
+
- \`services\`: the array built above
|
|
133
|
+
- \`scanToken\`: the token returned by the first call to \`skyramp_init_workspace\` (called with only workspacePath)
|
|
134
|
+
- \`force\`: defaults to false — only set to true if the user explicitly asks to overwrite an existing \`.skyramp/workspace.yml\``;
|
|
@@ -58,15 +58,22 @@ ${scannedSection}`;
|
|
|
58
58
|
if (inlineMode) {
|
|
59
59
|
// Testbot inline mode: all maintenance logic lives here so the testbot
|
|
60
60
|
// prompt only orchestrates steps without duplicating rules.
|
|
61
|
-
return
|
|
61
|
+
return `<drift_analysis_rules>
|
|
62
|
+
You are acting as a Skyramp Integration Architect.
|
|
63
|
+
For this maintenance step: assess each existing test against the diff returned by \`skyramp_analyze_changes\` and apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) directly — no separate analysis step.
|
|
64
|
+
|
|
65
|
+
${buildActionDecisionMatrix()}
|
|
62
66
|
|
|
63
67
|
${buildUpdateExecutionRules()}
|
|
64
68
|
|
|
65
69
|
${buildDriftOutputChecklist(existingTests.length, newEndpointCount, inlineMode)}
|
|
66
70
|
|
|
67
|
-
**Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests
|
|
71
|
+
**Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.
|
|
72
|
+
</drift_analysis_rules>`;
|
|
68
73
|
}
|
|
69
|
-
return
|
|
74
|
+
return `You are acting as a Skyramp Integration Architect. Your responsibility is to assess each existing test against the branch diff and score it for drift. Apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) based on the scoring guide below.
|
|
75
|
+
|
|
76
|
+
${contextSection}
|
|
70
77
|
${buildDriftScoringGuide()}
|
|
71
78
|
|
|
72
79
|
${buildActionDecisionMatrix()}
|
|
@@ -176,24 +176,24 @@ After completing all assessments above, call \`skyramp_actions\` with \`stateFil
|
|
|
176
176
|
const existingTestSection = inlineMode
|
|
177
177
|
? `### Existing tests
|
|
178
178
|
For each existing test reported by \`skyramp_analyze_changes\`:
|
|
179
|
-
- **IGNORE/VERIFY tests**: list on a single line:
|
|
179
|
+
- **IGNORE/VERIFY tests**: list on a single line: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY (score {N})\`. Do NOT write detailed rationale.
|
|
180
180
|
- **UPDATE/REGENERATE/DELETE tests**: output the full block:
|
|
181
181
|
\`\`\`
|
|
182
|
-
Test:
|
|
183
|
-
Drift Score:
|
|
184
|
-
Action:
|
|
185
|
-
Rationale:
|
|
182
|
+
Test: {testFile}
|
|
183
|
+
Drift Score: {0-100}
|
|
184
|
+
Action: {UPDATE | REGENERATE | DELETE}
|
|
185
|
+
Rationale: {1-2 sentence explanation}
|
|
186
186
|
\`\`\`
|
|
187
187
|
Focus your analysis on tests that need action — do not spend time analyzing unchanged tests.`
|
|
188
188
|
: `### Existing tests (${existingTestCount} total)
|
|
189
189
|
For each existing test:
|
|
190
|
-
- **IGNORE/VERIFY tests**: list on a single line:
|
|
190
|
+
- **IGNORE/VERIFY tests**: list on a single line: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY (score {N})\`. Do NOT write detailed rationale.
|
|
191
191
|
- **UPDATE/REGENERATE/DELETE tests**: output the full block:
|
|
192
192
|
\`\`\`
|
|
193
|
-
Test:
|
|
194
|
-
Drift Score:
|
|
195
|
-
Action:
|
|
196
|
-
Rationale:
|
|
193
|
+
Test: {testFile}
|
|
194
|
+
Drift Score: {0-100}
|
|
195
|
+
Action: {UPDATE | REGENERATE | DELETE}
|
|
196
|
+
Rationale: {1-2 sentence explanation}
|
|
197
197
|
\`\`\``;
|
|
198
198
|
const newEndpointSection = inlineMode
|
|
199
199
|
? ""
|
|
@@ -201,10 +201,10 @@ Rationale: <1-2 sentence explanation>
|
|
|
201
201
|
? `### New endpoints (${newEndpointCount} detected)
|
|
202
202
|
For EACH new endpoint, output:
|
|
203
203
|
\`\`\`
|
|
204
|
-
Endpoint:
|
|
204
|
+
Endpoint: {METHOD} {path}
|
|
205
205
|
Action: ADD
|
|
206
|
-
Test types:
|
|
207
|
-
Rationale:
|
|
206
|
+
Test types: {contract | integration | smoke | ...}
|
|
207
|
+
Rationale: {1 sentence}
|
|
208
208
|
\`\`\``
|
|
209
209
|
: `### New endpoints
|
|
210
210
|
No new endpoints detected in this diff.`;
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
export const MAX_TESTS_TO_GENERATE = 3;
|
|
2
2
|
export const MAX_RECOMMENDATIONS = 20;
|
|
3
3
|
export const MAX_CRITICAL_TESTS = 3;
|
|
4
|
+
/**
|
|
5
|
+
* Error string emitted by skyramp_integration_test_generation when both
|
|
6
|
+
* an explicit authHeader and a workspace api.authType are passed simultaneously.
|
|
7
|
+
* Both the tool description and all prompt locations import this constant,
|
|
8
|
+
* so every occurrence is character-for-character identical by construction.
|
|
9
|
+
*/
|
|
10
|
+
export const AUTH_CONFLICT_ERROR_MSG = "Auth header and auth type cannot be supported at the same time.";
|
|
4
11
|
export function buildArchitectPreamble(isDiffScope) {
|
|
5
12
|
if (isDiffScope) {
|
|
6
13
|
return `You are acting as a Skyramp Integration Architect. You will receive a branch diff — changed endpoints, source code interactions, and existing tests. Your responsibility is to map test intent to the Skyramp generation spec with precision. No guessing — derive all parameters from the codebase, workspace config, and provided context only.
|
|
@@ -32,9 +39,9 @@ Before calling any tool, replace every \`<from source>\` placeholder in the tool
|
|
|
32
39
|
export function buildReasoningProtocol() {
|
|
33
40
|
return `<reasoning_protocol>
|
|
34
41
|
## Parameter Grounding Rule
|
|
35
|
-
Before each GENERATE tool call,
|
|
42
|
+
Before each GENERATE tool call, confirm WHERE each key value comes from:
|
|
36
43
|
|
|
37
|
-
- **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec —
|
|
44
|
+
- **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec. **The generation tool rejects empty \`{}\` request bodies for POST/PUT/PATCH** — read the source schema first if the fields are unknown.
|
|
38
45
|
- **endpointURL** → workspace \`baseUrl\` + endpoint path (both required — never path alone)
|
|
39
46
|
- **authHeader / authScheme** → workspace config or OpenAPI \`securitySchemes\`
|
|
40
47
|
- **FK path params** → chained from a prior step's response \`id\` field — not hardcoded
|
|
@@ -88,7 +95,7 @@ export function buildTestPatternGuidelines() {
|
|
|
88
95
|
- **Middleware chains**: If auth/rate-limit/logging middleware exists, test the chain (e.g., rate limit hit → auth still checked → correct error returned)
|
|
89
96
|
- **N+1 query risk**: If list endpoints join related data (e.g., orders with products), test with large datasets
|
|
90
97
|
- **State machines**: If resources have status transitions (draft→published→archived), test invalid transitions (e.g., archived→draft should fail)
|
|
91
|
-
- **Cascade deletes**:
|
|
98
|
+
- **Cascade deletes**: Only recommend after reading source code to confirm which resource holds the FK. The resource with the FK is the child; the one it points to is the parent. Example: if orders.product_id references products, then products is the parent — deleting a product tests whether orders are protected or cascade-deleted. Getting this backwards (treating the child as the parent) produces a nonsensical test.
|
|
92
99
|
- **Race conditions**: If concurrent writes are possible (inventory deduction, counter increment), test concurrent requests
|
|
93
100
|
- **Computed fields**: If response contains derived values (total, average, count), verify computation with known inputs (e.g., total_cost = compute_seconds * rate + memory_mb * rate + external_cost)
|
|
94
101
|
- **Mutation with collection modification**: If PUT/PATCH endpoints accept arrays of child items (e.g., order line items, cart products, invoice entries), test adding/removing items and verify that derived totals (e.g., total_amount, subtotal, item_count) are recalculated correctly. This is the most common source of user-reported bugs — always prioritize it for GENERATE over simple field-update tests.
|
|
@@ -104,7 +111,7 @@ export function buildTestQualityCriteria() {
|
|
|
104
111
|
**Integration tests** should demonstrate cross-resource data flow — step A creates data
|
|
105
112
|
that step B depends on (e.g., create product → create order referencing that product's ID →
|
|
106
113
|
verify order contains correct product). Single-resource CRUD alone is not an integration test.
|
|
107
|
-
Use
|
|
114
|
+
Use actual field names and values from the source code schema or OpenAPI schema (not \`{}\` or invented field names); verify response data, not just status codes.
|
|
108
115
|
When a PUT/PATCH updates a resource with child collections (e.g., order items), the request body
|
|
109
116
|
MUST include the child array with FK references chained from prior steps — and assertions MUST
|
|
110
117
|
verify the actual child items in the response (product_id, quantity, unit_price), not just
|
|
@@ -148,23 +155,6 @@ When no Playwright trace exists, use the Playwright browser tools (\`browser_nav
|
|
|
148
155
|
**No duplicate coverage.** If an existing test already covers an endpoint + test type,
|
|
149
156
|
recommend a different test that adds new coverage.`;
|
|
150
157
|
}
|
|
151
|
-
export function buildTestExamples() {
|
|
152
|
-
return `### Examples — what "good" looks like
|
|
153
|
-
|
|
154
|
-
**Impressive (these catch prod bugs):**
|
|
155
|
-
1. Cross-resource workflow: Register → login → create order → verify order appears in user's order list (category: workflow)
|
|
156
|
-
2. State machine + business rule: Create product with inventory=10 → place order qty=10 → verify inventory=0 → place another order → verify 409 out-of-stock (category: business_rule)
|
|
157
|
-
3. Computed field verification: POST /flow-costs with known compute_seconds/memory_mb/external_cost_usd → verify total_cost_usd = (compute_seconds × 0.00012) + (memory_mb × 0.000002 × compute_seconds) + external_cost_usd (category: business_rule)
|
|
158
|
-
4. Cross-user isolation: Create user A's resource → authenticate as user B → GET/PUT/DELETE user A's resource → verify 403 (category: security_boundary)
|
|
159
|
-
5. Cascade delete: Create parent → create child referencing parent → DELETE parent → GET child → verify 404 or 409 depending on FK policy (category: data_integrity)
|
|
160
|
-
6. Unique constraint with side-effect: POST /users with duplicate email → verify 409 → verify original user unchanged (category: business_rule)
|
|
161
|
-
7. Budget threshold: Create budget with alert_threshold=80% → record costs pushing spend to 85% → verify budget_warning=true on next cost record (category: business_rule)
|
|
162
|
-
|
|
163
|
-
**Deprioritise (low value):**
|
|
164
|
-
- GET /products → 200 (trivial health check, no assertions beyond status)
|
|
165
|
-
- Single-resource CRUD with no cross-resource or state verification
|
|
166
|
-
- POST with missing field → 422 (obvious validation, covered by contract tests)`;
|
|
167
|
-
}
|
|
168
158
|
export function buildVerificationChecklist(topN, maxGen) {
|
|
169
159
|
return `<verification>
|
|
170
160
|
Before finalizing your output, verify:
|
|
@@ -283,8 +273,8 @@ To skip auth for unauthenticated endpoints, pass \`authHeader: ""\`.`;
|
|
|
283
273
|
// and the executor sends the correct Authorization header at run time.
|
|
284
274
|
const authHeaderOnlyParams = serializeAuthCallParams({ authHeader: authParams.authHeader });
|
|
285
275
|
const integrationAuthNote = hasWorkspaceAuthType
|
|
286
|
-
? `omit auth params
|
|
287
|
-
: `pass \`${authHeaderOnlyParams}\` only (no \`authScheme\`).`;
|
|
276
|
+
? `omit ALL auth params (passing auth alongside workspace authType causes "${AUTH_CONFLICT_ERROR_MSG}").`
|
|
277
|
+
: `pass \`${authHeaderOnlyParams}\` only (no \`authScheme\`, no \`authToken\`).`;
|
|
288
278
|
const authHeaderLine = noAuth
|
|
289
279
|
? `**No Auth (from workspace config):** Workspace indicates no authentication. **Verify independently** — if you find auth in the OpenAPI spec or source code, override with the correct \`authHeader\` and \`authScheme\`.`
|
|
290
280
|
: `**Auth params:** \`${authCallParams}\` — pass to EVERY tool call below.`;
|
|
@@ -292,7 +282,7 @@ To skip auth for unauthenticated endpoints, pass \`authHeader: ""\`.`;
|
|
|
292
282
|
|
|
293
283
|
**Contract**: The following tool signatures are strict technical contracts. Every parameter should match the schema exactly. Omit optional parameters rather than guessing values. If a required field cannot be resolved, fetch context first.
|
|
294
284
|
|
|
295
|
-
**Before every tool call**:
|
|
285
|
+
**Before every tool call**: Confirm WHERE each key value comes from — source code schema, enriched scenario, or OpenAPI spec. See Mandatory Reasoning Protocol above.
|
|
296
286
|
|
|
297
287
|
${authHeaderLine}
|
|
298
288
|
${authGuidance}
|
|
@@ -300,7 +290,6 @@ ${authGuidance}
|
|
|
300
290
|
**For multi-endpoint workflows (integration tests) — Batch Scenario → Integration pipeline:**
|
|
301
291
|
1. Call \`skyramp_batch_scenario_test_generation\` with ALL steps in a single call: \`scenarioName\`, \`destination\`,
|
|
302
292
|
\`baseURL\`, \`${authCallParams}\`, and a \`steps\` array where each element has \`method\`, \`path\`, \`requestBody\` OR \`queryParams\`, \`responseBody\`, \`statusCode\`.
|
|
303
|
-
(Fallback: if batch tool is unavailable, call \`skyramp_scenario_test_generation\` once per step.)
|
|
304
293
|
\`statusCode\` is optional — defaults: POST→201, DELETE→204, GET/PUT/PATCH→200. Only override for non-standard codes.
|
|
305
294
|
**OpenAPI spec is NOT required.** \`apiSchema\` is OPTIONAL — omit it if no spec exists.
|
|
306
295
|
**CRITICAL — Query params vs request body:**
|
|
@@ -2,7 +2,6 @@ import { z } from "zod";
|
|
|
2
2
|
import { StateManager, hasSessionData, getSessionData, } from "../../utils/AnalysisStateManager.js";
|
|
3
3
|
import { logger } from "../../utils/logger.js";
|
|
4
4
|
import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
|
|
5
|
-
import { getPersonaPrefix } from "../architectPersona.js";
|
|
6
5
|
import { ScenarioSource, AnalysisScope } from "../../types/RepositoryAnalysis.js";
|
|
7
6
|
import { SCENARIO_CATEGORIES } from "../../types/TestRecommendation.js";
|
|
8
7
|
export function mergeEnrichedScenarios(serverScenarios, raw) {
|
|
@@ -87,8 +86,7 @@ export function mergeEnrichedScenarios(serverScenarios, raw) {
|
|
|
87
86
|
}
|
|
88
87
|
export function registerRecommendTestsPrompt(server) {
|
|
89
88
|
server.registerPrompt("skyramp_recommend_tests", {
|
|
90
|
-
description:
|
|
91
|
-
"Given the repository analysis in stateFile, produce ranked test recommendations split into " +
|
|
89
|
+
description: "Given the repository analysis in stateFile, produce ranked test recommendations split into " +
|
|
92
90
|
"GENERATE (call generation tools immediately) and ADDITIONAL (deferred, describe only).\n\n" +
|
|
93
91
|
"**Output contract:** Every GENERATE integration test targeting a business rule or formula " +
|
|
94
92
|
"MUST include a non-empty bugCatchingTarget. Parameters for generation tools must derive " +
|