npm - @skyramp/mcp - Versions diffs - 0.1.0-rc.2 → 0.1.0-rc.3 - Mend

@skyramp/mcp 0.1.0-rc.2 → 0.1.0-rc.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/build/index.js +17 -68
package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +134 -0
package/build/prompts/test-maintenance/drift-analysis-prompt.js +10 -3
package/build/prompts/test-maintenance/driftAnalysisSections.js +13 -13
package/build/prompts/test-recommendation/recommendationSections.js +14 -25
package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +1 -3
package/build/prompts/test-recommendation/test-recommendation-prompt.js +46 -59
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +2 -2
package/build/prompts/testbot/testbot-prompts.js +10 -10
package/build/prompts/testbot/testbot-prompts.test.js +29 -0
package/build/services/TestExecutionService.js +2 -12
package/build/tool-phases.js +2 -2
package/build/tools/generate-tests/generateBatchScenarioRestTool.js +26 -20
package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +88 -0
package/build/tools/generate-tests/generateContractRestTool.js +5 -1
package/build/tools/generate-tests/generateIntegrationRestTool.js +13 -4
package/build/tools/submitReportTool.js +13 -4
package/build/tools/submitReportTool.test.js +84 -6
package/build/tools/workspace/initScanWorkspaceTool.js +76 -0
package/build/tools/workspace/initializeWorkspaceTool.js +39 -119
package/build/utils/docker.js +118 -0
package/build/utils/docker.test.js +113 -0
package/build/utils/initAgent.js +75 -13
package/build/utils/skyrampMdContent.js +0 -1
package/build/utils/versions.js +3 -0
package/package.json +1 -1
package/build/prompts/testGenerationPrompt.js +0 -207
package/build/prompts/testHealthPrompt.js +0 -85
package/build/services/DriftAnalysisService.js +0 -1075
package/build/services/DriftAnalysisService.test.js +0 -168
package/build/tools/generate-tests/generateScenarioRestTool.js +0 -131

package/build/index.js CHANGED Viewed

@@ -2,11 +2,9 @@
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import { registerStartTraceCollectionPrompt } from "./prompts/startTraceCollectionPrompts.js";
-import { registerTestHealthPrompt } from "./prompts/testHealthPrompt.js";
 import { registerTraceTool } from "./tools/trace/startTraceCollectionTool.js";
 import { registerTraceStopTool } from "./tools/trace/stopTraceCollectionTool.js";
 import { registerExecuteSkyrampTestTool } from "./tools/executeSkyrampTestTool.js";
-import { registerTestGenerationPrompt } from "./prompts/testGenerationPrompt.js";
 import { AUTH_PLACEHOLDER_TOKEN } from "./types/TestTypes.js";
 import { logger } from "./utils/logger.js";
 import { registerUITestTool } from "./tools/generate-tests/generateUIRestTool.js";
@@ -22,18 +20,18 @@ import { registerFixErrorTool } from "./tools/fixErrorTool.js";
 import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
 import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
 import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
-import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
 import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
 import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
 import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerExecuteTestsTool, registerActionsTool, registerStateCleanupTool, } from "./tools/test-management/index.js";
 import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
 import { registerSubmitReportTool } from "./tools/submitReportTool.js";
 import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
+import { registerInitScanWorkspaceTool } from "./tools/workspace/initScanWorkspaceTool.js";
 import { registerOneClickTool } from "./tools/one-click/oneClickTool.js";
 import { registerAnalysisResources } from "./resources/analysisResources.js";
 import { registerProgressResource } from "./resources/progressResource.js";
 import { AnalyticsService } from "./services/AnalyticsService.js";
-import { initCheck } from "./utils/initAgent.js";
+import { registerInitTriggerOnMCPInitialized } from "./utils/initAgent.js";
 import { registerPlaywrightTools, registerTraceRecordingPrompt, getPlaywrightTraceService, } from "./playwright/index.js";
 const server = new McpServer({
     name: "Skyramp MCP Server",
@@ -52,6 +50,13 @@ const server = new McpServer({
     },
     instructions: `Skyramp MCP Server — generates and executes API tests (fuzz, contract, integration, E2E, UI).
+## Workspace Initialization (REQUIRED before the first Skyramp tool call)
+If the workspace root is a git repo AND \`.skyramp/workspace.yml\` does not exist:
+1. Call \`skyramp_init_scan\` with \`workspacePath\` → follow the returned instructions to discover all services.
+2. Call \`skyramp_init_workspace\` with \`workspacePath\`, \`services\`, and the \`scanToken\` from step 1.
+3. Proceed with the originally requested tool.
+Skip only if: not a git repo, \`.skyramp/workspace.yml\` already exists, or user explicitly declines.
 ## Rules
 - NEVER show CLI commands. ALWAYS use the MCP tools provided.
 - For UI and E2E tests, there are TWO recording modes:
@@ -81,23 +86,6 @@ After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use
 - \`skyramp://analysis/{sessionId}/scenarios\` — drafted scenarios
 - \`skyramp://analysis/{sessionId}/diff\` — branch diff context
-## Workspace Initialization (before ANY other Skyramp tool)
-Follow this flow EVERY time before calling any Skyramp tool:
-1. **Check**: Is the workspace root a git repository? (i.e. does a \`.git\` directory exist at the root?)
-   - **If NO** → it is a non-git repo. Do NOT call \`skyramp_initialize_workspace\`. Proceed directly with the requested tool. STOP — do not continue to step 2.
-   - **If YES** → it is a git repo. Continue to step 2.
-2. **Check**: Does .skyramp/workspace.yml exist at the workspace root?
-   - **If YES** → workspace is already initialized. Proceed with the requested tool. STOP here.
-   - **If NO** → you MUST call \`skyramp_initialize_workspace\` BEFORE doing anything else.
-     - Do NOT skip this step. Do NOT proceed to the requested tool first.
-     - Scan the repo for ALL services (see the tool description for detailed steps).
-     - A fullstack or monorepo MUST produce multiple services — never just one.
-     - After workspace init completes, THEN proceed with the originally requested tool.
-3. **ONLY skip init in these two cases: non-git repo (step 1) or explicit user decline** (i.e. user EXPLICITLY says "no", "skip", "don't create workspace", or similar).
-   - A request like "execute tests" or "generate tests" is NOT a signal to skip init.
-   - If the user does decline, respect it — do NOT ask again, and proceed with the requested tool.
 ## Workspace Defaults for Test Generation (MANDATORY)
 Before calling ANY test generation tool, you MUST follow this flow:
@@ -105,7 +93,7 @@ Before calling ANY test generation tool, you MUST follow this flow:
 2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the services section.
 3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
 4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
-5. **CRITICAL — scenario generation**: When calling \`skyramp_scenario_test_generation\`, ALWAYS pass:
+5. **CRITICAL — scenario generation**: When calling \`skyramp_batch_scenario_test_generation\`, ALWAYS pass:
    - \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
    - \`authHeader\`: Which HTTP header carries the auth credential. Get it from \`api.authHeader\` in workspace config. Examples: \`Authorization\` (Bearer/Token auth), \`X-Api-Key\` (API key auth), \`Cookie\` (session/cookie auth like NextAuth). Pass \`""\` to skip auth entirely (unauthenticated endpoints or \`api.authType: "none"\`).
    - \`authScheme\`: Only when \`authHeader\` is \`Authorization\`. The prefix before the token (e.g., \`"Bearer"\` → \`Authorization: Bearer <token>\`). **Derive from**: (1) OpenAPI spec \`securitySchemes\`/\`securityDefinitions\`, (2) source code auth middleware, (3) workspace \`api.authType\`. **Do NOT guess.**
@@ -118,54 +106,10 @@ Before calling ANY test generation tool, you MUST follow this flow:
 8. The user can always override workspace defaults by explicitly specifying values in their request.
 `,
 });
-// Check for first-time invocation after version update (runs in background, doesn't block)
-let initCheckInFlight = false;
-let initCheckDone = false;
-const INIT_MESSAGE = "Skyramp init: Triggering pull of Skyramp worker and executor images if not present locally.";
-const originalRegisterTool = server.registerTool.bind(server);
-server.registerTool = function (name, definition, handler) {
-    const wrappedHandler = async (...args) => {
-        let triggeredInitThisCall = false;
-        if (!initCheckDone && !initCheckInFlight) {
-            // Guard with inFlight so concurrent tool calls don't each spawn a new initCheck(),
-            // but allow retry on failure (initCheckInFlight is reset to false on error).
-            // SkyrampClient constructor calls checkForUpdate("npm") via synchronous koffi FFI,
-            // which can block the event loop for up to 60 s if the update-check server is
-            // unreachable.  Deferring via setImmediate ensures the tool response is written to
-            // stdout (and acknowledged by the MCP client) before any blocking FFI call runs.
-            initCheckInFlight = true;
-            triggeredInitThisCall = true;
-            setImmediate(() => {
-                initCheck()
-                    .then(() => {
-                    initCheckDone = true;
-                })
-                    .catch((err) => {
-                    logger.error("Background initialization check failed", { error: err });
-                })
-                    .finally(() => {
-                    initCheckInFlight = false;
-                });
-            });
-        }
-        const result = await handler(...args);
-        if (triggeredInitThisCall && result) {
-            const content = result.content ?? [];
-            result.content = [
-                { type: "text", text: INIT_MESSAGE },
-                ...content,
-            ];
-        }
-        return result;
-    };
-    return originalRegisterTool(name, definition, wrappedHandler);
-};
 // Register prompts
 logger.info("Starting prompt registration process");
 const prompts = [
-    registerTestGenerationPrompt,
     registerStartTraceCollectionPrompt,
-    registerTestHealthPrompt,
     registerRecommendTestsPrompt,
     registerTraceRecordingPrompt,
 ];
@@ -185,7 +129,7 @@ const testGenerationTools = [
     registerIntegrationTestTool,
     registerE2ETestTool,
     registerUITestTool,
-    registerScenarioTestTool,
+    registerBatchScenarioTestTool,
     registerMockTool,
 ];
 testGenerationTools.forEach((registerTool) => registerTool(server));
@@ -206,6 +150,7 @@ registerExecuteTestsTool(server);
 registerActionsTool(server);
 registerStateCleanupTool(server);
 // Register workspace management tools
+registerInitScanWorkspaceTool(server);
 registerInitializeWorkspaceTool(server);
 // Register one-click orchestrated workflows
 registerOneClickTool(server);
@@ -219,7 +164,6 @@ const infrastructureTools = [
 ];
 if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
     infrastructureTools.push(registerSubmitReportTool);
-    registerBatchScenarioTestTool(server);
     logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
 }
 infrastructureTools.forEach((registerTool) => registerTool(server));
@@ -249,6 +193,11 @@ process.on("uncaughtException", async (error) => {
 // Start MCP server
 async function main() {
     const transport = new StdioServerTransport();
+    server.server.oninitialized = () => {
+        registerInitTriggerOnMCPInitialized().catch((err) => {
+            logger.error("Failed to run MCP initialized trigger", { error: err });
+        });
+    };
     await server.connect(transport);
     logger.info("MCP Server started successfully");
     // Listen for stdin closure (parent process disconnected)

package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js ADDED Viewed

@@ -0,0 +1,134 @@
+import { getPersonaPrefix } from "../architectPersona.js";
+export const INIT_WORKSPACE_INSTRUCTIONS = `${getPersonaPrefix()}Your task is to scan this repository, discover ALL services, and call the \`skyramp_init_workspace\` tool with the discovered services array and the scanToken.
+After scanning the workspace, before calling the \`skyramp_init_workspace\` tool, you MUST:
+**1. Output a \`<thinking>\` block** to justify the reasoning behind each field mapping for every discovered service.
+**2. Then output a Discovery Summary** with the exact services array you will pass to the tool:
+\`\`\`json
+[
+  {
+    "serviceName": "<name>",
+    "language": "<language>",
+    "framework": "<framework>",
+    "testDirectory": "<path>",
+    "api": { "schemaPath": "<path-or-url>", "baseUrl": "<url>", "authType": "<type>", "authHeader": "<header>" },
+    "runtimeDetails": { "runtime": "<runtime>", "serverStartCommand": "<command>", "dockerNetwork": "<network>" }
+  }
+  // ... one entry per discovered service
+]
+\`\`\`
+## Step 1 — List ALL Top-Level Directories
+Run a directory listing of the workspace root. Every top-level directory is a potential service. Common layouts:
+| Layout | Example dirs | Expect |
+|--------|-------------|--------|
+| Monorepo | apps/web, apps/api, packages/shared | 1 service per app |
+| Microservices | services/auth, services/orders | 1 service per service dir |
+| Single service | src/, lib/ | 1 service (the root) |
+## Step 2 — Inspect EVERY Candidate Directory
+For **each** top-level directory, check for service indicator files:
+**Language indicators** (presence of ANY = independent service):
+- package.json → typescript / javascript
+- requirements.txt, pyproject.toml, Pipfile → python
+- pom.xml, build.gradle → java
+**Test framework** (look inside the service dir):
+- playwright.config.* → playwright
+- pytest.ini, conftest.py, pyproject.toml [tool.pytest] → pytest
+- junit in pom.xml → junit
+**API schemas** (look inside the service dir AND check known framework defaults):
+- openapi.json/yaml, swagger.json/yaml → schema file path
+- FastAPI projects → http://localhost:{port}/openapi.json
+- Express with swagger-ui → http://localhost:{port}/api-docs
+- Spring Boot → http://localhost:{port}/v3/api-docs
+- Always use localhost URLs — NEVER use external or production URLs
+## Step 3 — Check Root-Level Runtime Config
+Inspect the repo root (and subdirectories like .devcontainer/) for shared runtime configuration:
+- docker-compose.yml → extract service names, ports, start commands
+  Docker Compose ALWAYS prefixes the network name with "<project-name>_".
+  If compose has "networks: { my-net: ... }" → actual network = "<project-name>_my-net".
+  If no explicit networks section → default network = "<project-name>_default".
+  Project name = basename of the CWD where docker compose runs.
+- Makefile → extract start/dev targets
+- Root package.json scripts → workspace-level commands
+## Step 4 — Build the Complete Services Array
+Create one service entry per deployable unit. You MUST include:
+- Every backend/API service (Python, Java, Go, Node.js)
+- Every frontend service (React, Vue, Angular, Next.js)
+- Set runtime fields from docker-compose.yml if present
+**Basic fields:**
+- \`serviceName\` *(required)* — unique identifier, e.g. "api-gateway", "user-service"
+- \`language\` — \`python\` | \`typescript\` | \`javascript\` | \`java\`
+  Detect from: package.json → typescript/javascript | requirements.txt/pyproject.toml → python | pom.xml/build.gradle → java
+- \`framework\` — \`playwright\` | \`pytest\` | \`robot\` | \`junit\`
+  Detect from: pytest.ini/playwright.config/jest.config/junit in pom.xml
+  MUST match the language: python → pytest or robot | typescript/javascript → playwright | java → junit
+- \`testDirectory\` — path relative to repo root where tests exist or will be generated; prefer existing test dirs over source dirs, e.g. "tests", "api/tests", "test"
+**API fields:**
+- \`api.schemaPath\` — path or URL to OpenAPI/Protobuf/GraphQL schema
+  Search for: openapi.json, swagger.yaml, *.proto, *.graphql
+  Framework defaults: FastAPI → /openapi.json | Express → /api-docs | Spring → /v3/api-docs
+  ⚠️  NEVER use external or production URLs — always use localhost.
+- \`api.baseUrl\` *(required)* — local base URL, e.g. "http://localhost:3000"
+  Derive from docker-compose ports, app config, or README.
+  ⚠️  MUST be a localhost URL. NEVER use external or production URLs.
+- \`api.authType\` — \`bearer\` | \`basic\` | \`oauth\` | \`apiKey\` | \`none\`
+  Detect by checking in order:
+  1. Dependencies: \`jsonwebtoken\`/\`passport-jwt\` → \`bearer\` | \`passport-http\` → \`basic\` | \`passport-oauth2\`/\`openid-client\` → \`oauth\`
+  2. Env vars: \`JWT_SECRET\`/\`ACCESS_TOKEN\` → \`bearer\` | \`API_KEY\`/\`X_API_KEY\` → \`apiKey\` | \`CLIENT_ID\`+\`CLIENT_SECRET\` → \`oauth\`
+  3. Middleware/source: \`req.headers.authorization\` + \`Bearer\` → \`bearer\` | custom header check → \`apiKey\`
+  4. Fallback: frontend/UI service → \`none\` | backend API with no signals → \`bearer\`
+- \`api.authHeader\` — header name, e.g. "Authorization" for bearer/basic/oauth, "X-API-Key" for apiKey, "" for none
+**Runtime fields:**
+- \`runtimeDetails.runtime\` — \`local\` | \`docker\` | \`k8s\`
+  Detect per service:
+  - Service listed in docker-compose.yml → \`"docker"\`
+  - Service has only a Dockerfile (no compose entry) → \`"local"\` or \`"docker"\`
+  - k8s manifests exist (charts/, k8s/, deploy/) → \`"k8s"\`
+  ⚠️  A repo may have MIXED runtimes — a backend in docker-compose.yml uses "docker" while a frontend run with pnpm/npm locally uses "local". Include ALL services regardless of runtime.
+- \`runtimeDetails.serverStartCommand\` — command to start the service. MUST match runtime:
+  - \`"local"\`  → application command: "uvicorn main:app", "npm run dev", "java -jar app.jar"
+  - \`"docker"\` → Docker command: "docker compose up -d \<service-name\>"  ← prefer service-scoped
+  - \`"k8s"\`    → k8s command: "kubectl apply -f deploy/", "helm install myrelease ."
+  ⚠️  NEVER mix (e.g. "uvicorn …" with runtime "docker" will cause errors).
+- \`runtimeDetails.dockerNetwork\` — Docker network name. ONLY set when runtime is \`"docker"\`. NEVER set for "local" or "k8s".
+- \`runtimeDetails.k8sNamespace\` — Kubernetes namespace. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
+- \`runtimeDetails.k8sContext\` — Kubernetes context. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
+## Verification Steps
+Before calling \`skyramp_init_workspace\`, confirm all of the following:
+- ALWAYS SCAN REPO AND FIND SERVICES. A REPO SHOULD HAVE AT LEAST ONE SERVICE.
+- **CRITICAL**: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
+- Services NOT in docker-compose.yml (e.g. a frontend run with pnpm/npm locally) MUST still be included with runtime "local".
+- Every service has \`api.baseUrl\` set to a localhost URL — NEVER a production or external URL.
+- \`framework\` matches \`language\` (python → pytest/robot | typescript/javascript → playwright | java → junit)
+- \`serverStartCommand\` matches \`runtime\`
+- For services in docker-compose.yml: runtime MUST be "docker" and command MUST be a docker command (e.g. "docker compose up -d <service-name>").
+- NEVER use application-level commands (uvicorn, npm, node, python, java, etc.) with runtime "docker".
+- \`dockerNetwork\` is set only when runtime is "docker"
+- \`k8sNamespace\` and \`k8sContext\` are set only when runtime is "k8s"
+Once verified, call \`skyramp_init_workspace\` with:
+- \`workspacePath\`: the repository root path
+- \`services\`: the array built above
+- \`scanToken\`: the token returned by the first call to \`skyramp_init_workspace\` (called with only workspacePath)
+- \`force\`: defaults to false — only set to true if the user explicitly asks to overwrite an existing \`.skyramp/workspace.yml\``;

package/build/prompts/test-maintenance/drift-analysis-prompt.js CHANGED Viewed

@@ -58,15 +58,22 @@ ${scannedSection}`;
     if (inlineMode) {
         // Testbot inline mode: all maintenance logic lives here so the testbot
         // prompt only orchestrates steps without duplicating rules.
-        return `${buildActionDecisionMatrix()}
+        return `<drift_analysis_rules>
+You are acting as a Skyramp Integration Architect.
+For this maintenance step: assess each existing test against the diff returned by \`skyramp_analyze_changes\` and apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) directly — no separate analysis step.
+${buildActionDecisionMatrix()}
 ${buildUpdateExecutionRules()}
 ${buildDriftOutputChecklist(existingTests.length, newEndpointCount, inlineMode)}
-**Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.`;
+**Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.
+</drift_analysis_rules>`;
     }
-    return `${contextSection}
+    return `You are acting as a Skyramp Integration Architect. Your responsibility is to assess each existing test against the branch diff and score it for drift. Apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) based on the scoring guide below.
+${contextSection}
 ${buildDriftScoringGuide()}
 ${buildActionDecisionMatrix()}

package/build/prompts/test-maintenance/driftAnalysisSections.js CHANGED Viewed

@@ -176,24 +176,24 @@ After completing all assessments above, call \`skyramp_actions\` with \`stateFil
     const existingTestSection = inlineMode
         ? `### Existing tests
 For each existing test reported by \`skyramp_analyze_changes\`:
-- **IGNORE/VERIFY tests**: list on a single line: \`<testFile> — IGNORE\` or \`<testFile> — VERIFY (score <N>)\`. Do NOT write detailed rationale.
+- **IGNORE/VERIFY tests**: list on a single line: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY (score {N})\`. Do NOT write detailed rationale.
 - **UPDATE/REGENERATE/DELETE tests**: output the full block:
 \`\`\`
-Test: <testFile>
-Drift Score: <0-100>
-Action: <UPDATE | REGENERATE | DELETE>
-Rationale: <1-2 sentence explanation>
+Test: {testFile}
+Drift Score: {0-100}
+Action: {UPDATE | REGENERATE | DELETE}
+Rationale: {1-2 sentence explanation}
 \`\`\`
 Focus your analysis on tests that need action — do not spend time analyzing unchanged tests.`
         : `### Existing tests (${existingTestCount} total)
 For each existing test:
-- **IGNORE/VERIFY tests**: list on a single line: \`<testFile> — IGNORE\` or \`<testFile> — VERIFY (score <N>)\`. Do NOT write detailed rationale.
+- **IGNORE/VERIFY tests**: list on a single line: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY (score {N})\`. Do NOT write detailed rationale.
 - **UPDATE/REGENERATE/DELETE tests**: output the full block:
 \`\`\`
-Test: <testFile>
-Drift Score: <0-100>
-Action: <UPDATE | REGENERATE | DELETE>
-Rationale: <1-2 sentence explanation>
+Test: {testFile}
+Drift Score: {0-100}
+Action: {UPDATE | REGENERATE | DELETE}
+Rationale: {1-2 sentence explanation}
 \`\`\``;
     const newEndpointSection = inlineMode
         ? ""
@@ -201,10 +201,10 @@ Rationale: <1-2 sentence explanation>
             ? `### New endpoints (${newEndpointCount} detected)
 For EACH new endpoint, output:
 \`\`\`
-Endpoint: <METHOD> <path>
+Endpoint: {METHOD} {path}
 Action: ADD
-Test types: <contract | integration | smoke | ...>
-Rationale: <1 sentence>
+Test types: {contract | integration | smoke | ...}
+Rationale: {1 sentence}
 \`\`\``
             : `### New endpoints
 No new endpoints detected in this diff.`;

package/build/prompts/test-recommendation/recommendationSections.js CHANGED Viewed

@@ -1,6 +1,13 @@
 export const MAX_TESTS_TO_GENERATE = 3;
 export const MAX_RECOMMENDATIONS = 20;
 export const MAX_CRITICAL_TESTS = 3;
+/**
+ * Error string emitted by skyramp_integration_test_generation when both
+ * an explicit authHeader and a workspace api.authType are passed simultaneously.
+ * Both the tool description and all prompt locations import this constant,
+ * so every occurrence is character-for-character identical by construction.
+ */
+export const AUTH_CONFLICT_ERROR_MSG = "Auth header and auth type cannot be supported at the same time.";
 export function buildArchitectPreamble(isDiffScope) {
     if (isDiffScope) {
         return `You are acting as a Skyramp Integration Architect. You will receive a branch diff — changed endpoints, source code interactions, and existing tests. Your responsibility is to map test intent to the Skyramp generation spec with precision. No guessing — derive all parameters from the codebase, workspace config, and provided context only.
@@ -32,9 +39,9 @@ Before calling any tool, replace every \`<from source>\` placeholder in the tool
 export function buildReasoningProtocol() {
     return `<reasoning_protocol>
 ## Parameter Grounding Rule
-Before each GENERATE tool call, output a brief \`<thinking>\` block stating WHERE the key values come from:
+Before each GENERATE tool call, confirm WHERE each key value comes from:
-- **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec — state the file or schema name
+- **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec. **The generation tool rejects empty \`{}\` request bodies for POST/PUT/PATCH** — read the source schema first if the fields are unknown.
 - **endpointURL** → workspace \`baseUrl\` + endpoint path (both required — never path alone)
 - **authHeader / authScheme** → workspace config or OpenAPI \`securitySchemes\`
 - **FK path params** → chained from a prior step's response \`id\` field — not hardcoded
@@ -88,7 +95,7 @@ export function buildTestPatternGuidelines() {
 - **Middleware chains**: If auth/rate-limit/logging middleware exists, test the chain (e.g., rate limit hit → auth still checked → correct error returned)
 - **N+1 query risk**: If list endpoints join related data (e.g., orders with products), test with large datasets
 - **State machines**: If resources have status transitions (draft→published→archived), test invalid transitions (e.g., archived→draft should fail)
-- **Cascade deletes**: If deleting a parent removes children, verify cascade AND orphan prevention (delete product → orders referencing it get error or cascade)
+- **Cascade deletes**: Only recommend after reading source code to confirm which resource holds the FK. The resource with the FK is the child; the one it points to is the parent. Example: if orders.product_id references products, then products is the parent — deleting a product tests whether orders are protected or cascade-deleted. Getting this backwards (treating the child as the parent) produces a nonsensical test.
 - **Race conditions**: If concurrent writes are possible (inventory deduction, counter increment), test concurrent requests
 - **Computed fields**: If response contains derived values (total, average, count), verify computation with known inputs (e.g., total_cost = compute_seconds * rate + memory_mb * rate + external_cost)
 - **Mutation with collection modification**: If PUT/PATCH endpoints accept arrays of child items (e.g., order line items, cart products, invoice entries), test adding/removing items and verify that derived totals (e.g., total_amount, subtotal, item_count) are recalculated correctly. This is the most common source of user-reported bugs — always prioritize it for GENERATE over simple field-update tests.
@@ -104,7 +111,7 @@ export function buildTestQualityCriteria() {
 **Integration tests** should demonstrate cross-resource data flow — step A creates data
 that step B depends on (e.g., create product → create order referencing that product's ID →
 verify order contains correct product). Single-resource CRUD alone is not an integration test.
-Use realistic request bodies from source code schemas and verify response data, not just status codes.
+Use actual field names and values from the source code schema or OpenAPI schema (not \`{}\` or invented field names); verify response data, not just status codes.
 When a PUT/PATCH updates a resource with child collections (e.g., order items), the request body
 MUST include the child array with FK references chained from prior steps — and assertions MUST
 verify the actual child items in the response (product_id, quantity, unit_price), not just
@@ -148,23 +155,6 @@ When no Playwright trace exists, use the Playwright browser tools (\`browser_nav
 **No duplicate coverage.** If an existing test already covers an endpoint + test type,
 recommend a different test that adds new coverage.`;
 }
-export function buildTestExamples() {
-    return `### Examples — what "good" looks like
-**Impressive (these catch prod bugs):**
-1. Cross-resource workflow: Register → login → create order → verify order appears in user's order list (category: workflow)
-2. State machine + business rule: Create product with inventory=10 → place order qty=10 → verify inventory=0 → place another order → verify 409 out-of-stock (category: business_rule)
-3. Computed field verification: POST /flow-costs with known compute_seconds/memory_mb/external_cost_usd → verify total_cost_usd = (compute_seconds × 0.00012) + (memory_mb × 0.000002 × compute_seconds) + external_cost_usd (category: business_rule)
-4. Cross-user isolation: Create user A's resource → authenticate as user B → GET/PUT/DELETE user A's resource → verify 403 (category: security_boundary)
-5. Cascade delete: Create parent → create child referencing parent → DELETE parent → GET child → verify 404 or 409 depending on FK policy (category: data_integrity)
-6. Unique constraint with side-effect: POST /users with duplicate email → verify 409 → verify original user unchanged (category: business_rule)
-7. Budget threshold: Create budget with alert_threshold=80% → record costs pushing spend to 85% → verify budget_warning=true on next cost record (category: business_rule)
-**Deprioritise (low value):**
-- GET /products → 200 (trivial health check, no assertions beyond status)
-- Single-resource CRUD with no cross-resource or state verification
-- POST with missing field → 422 (obvious validation, covered by contract tests)`;
-}
 export function buildVerificationChecklist(topN, maxGen) {
     return `<verification>
 Before finalizing your output, verify:
@@ -283,8 +273,8 @@ To skip auth for unauthenticated endpoints, pass \`authHeader: ""\`.`;
     // and the executor sends the correct Authorization header at run time.
     const authHeaderOnlyParams = serializeAuthCallParams({ authHeader: authParams.authHeader });
     const integrationAuthNote = hasWorkspaceAuthType
-        ? `omit auth params entirely — workspace \`api.authType: "${authTypeValue}"\` handles it.`
-        : `pass \`${authHeaderOnlyParams}\` only (no \`authScheme\`).`;
+        ? `omit ALL auth params (passing auth alongside workspace authType causes "${AUTH_CONFLICT_ERROR_MSG}").`
+        : `pass \`${authHeaderOnlyParams}\` only (no \`authScheme\`, no \`authToken\`).`;
     const authHeaderLine = noAuth
         ? `**No Auth (from workspace config):** Workspace indicates no authentication. **Verify independently** — if you find auth in the OpenAPI spec or source code, override with the correct \`authHeader\` and \`authScheme\`.`
         : `**Auth params:** \`${authCallParams}\` — pass to EVERY tool call below.`;
@@ -292,7 +282,7 @@ To skip auth for unauthenticated endpoints, pass \`authHeader: ""\`.`;
 **Contract**: The following tool signatures are strict technical contracts. Every parameter should match the schema exactly. Omit optional parameters rather than guessing values. If a required field cannot be resolved, fetch context first.
-**Before every tool call**: Output a <thinking> block justifying the mapping of intent to endpoint to tool parameters. See Mandatory Reasoning Protocol above.
+**Before every tool call**: Confirm WHERE each key value comes from — source code schema, enriched scenario, or OpenAPI spec. See Mandatory Reasoning Protocol above.
 ${authHeaderLine}
 ${authGuidance}
@@ -300,7 +290,6 @@ ${authGuidance}
 **For multi-endpoint workflows (integration tests) — Batch Scenario → Integration pipeline:**
 1. Call \`skyramp_batch_scenario_test_generation\` with ALL steps in a single call: \`scenarioName\`, \`destination\`,
    \`baseURL\`, \`${authCallParams}\`, and a \`steps\` array where each element has \`method\`, \`path\`, \`requestBody\` OR \`queryParams\`, \`responseBody\`, \`statusCode\`.
-   (Fallback: if batch tool is unavailable, call \`skyramp_scenario_test_generation\` once per step.)
    \`statusCode\` is optional — defaults: POST→201, DELETE→204, GET/PUT/PATCH→200. Only override for non-standard codes.
    **OpenAPI spec is NOT required.** \`apiSchema\` is OPTIONAL — omit it if no spec exists.
    **CRITICAL — Query params vs request body:**

package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js CHANGED Viewed

@@ -2,7 +2,6 @@ import { z } from "zod";
 import { StateManager, hasSessionData, getSessionData, } from "../../utils/AnalysisStateManager.js";
 import { logger } from "../../utils/logger.js";
 import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
-import { getPersonaPrefix } from "../architectPersona.js";
 import { ScenarioSource, AnalysisScope } from "../../types/RepositoryAnalysis.js";
 import { SCENARIO_CATEGORIES } from "../../types/TestRecommendation.js";
 export function mergeEnrichedScenarios(serverScenarios, raw) {
@@ -87,8 +86,7 @@ export function mergeEnrichedScenarios(serverScenarios, raw) {
 }
 export function registerRecommendTestsPrompt(server) {
     server.registerPrompt("skyramp_recommend_tests", {
-        description: getPersonaPrefix() +
-            "Given the repository analysis in stateFile, produce ranked test recommendations split into " +
+        description: "Given the repository analysis in stateFile, produce ranked test recommendations split into " +
             "GENERATE (call generation tools immediately) and ADDITIONAL (deferred, describe only).\n\n" +
             "**Output contract:** Every GENERATE integration test targeting a business rule or formula " +
             "MUST include a non-empty bugCatchingTarget. Parameters for generation tools must derive " +