npm - @skyramp/mcp - Versions diffs - 0.1.0-rc.1 → 0.1.0-rc.3 - Mend

@skyramp/mcp 0.1.0-rc.1 → 0.1.0-rc.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/build/index.js +17 -68
package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +134 -0
package/build/prompts/test-maintenance/drift-analysis-prompt.js +10 -3
package/build/prompts/test-maintenance/driftAnalysisSections.js +13 -13
package/build/prompts/test-recommendation/analysisOutputPrompt.js +42 -50
package/build/prompts/test-recommendation/mergeEnrichedScenarios.test.js +125 -0
package/build/prompts/test-recommendation/recommendationSections.js +131 -25
package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +149 -9
package/build/prompts/test-recommendation/test-recommendation-prompt.js +432 -111
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +455 -63
package/build/prompts/testbot/testbot-prompts.js +34 -13
package/build/prompts/testbot/testbot-prompts.test.js +29 -0
package/build/resources/analysisResources.js +13 -5
package/build/services/TestExecutionService.js +2 -12
package/build/tool-phases.js +2 -2
package/build/tools/generate-tests/generateBatchScenarioRestTool.js +30 -23
package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +88 -0
package/build/tools/generate-tests/generateContractRestTool.js +5 -1
package/build/tools/generate-tests/generateIntegrationRestTool.js +13 -4
package/build/tools/submitReportTool.js +23 -5
package/build/tools/submitReportTool.test.js +84 -6
package/build/tools/test-management/analyzeChangesTool.js +24 -7
package/build/tools/workspace/initScanWorkspaceTool.js +76 -0
package/build/tools/workspace/initializeWorkspaceTool.js +39 -119
package/build/types/RepositoryAnalysis.js +25 -3
package/build/types/TestRecommendation.js +5 -4
package/build/types/TestTypes.js +28 -2
package/build/utils/AnalysisStateManager.js +30 -4
package/build/utils/docker.js +118 -0
package/build/utils/docker.test.js +113 -0
package/build/utils/initAgent.js +75 -13
package/build/utils/routeParsers.js +35 -0
package/build/utils/routeParsers.test.js +66 -1
package/build/utils/scenarioDrafting.js +207 -360
package/build/utils/scenarioDrafting.test.js +191 -256
package/build/utils/skyrampMdContent.js +0 -1
package/build/utils/trace-parser.js +24 -6
package/build/utils/trace-parser.test.js +140 -0
package/build/utils/versions.js +3 -0
package/package.json +1 -1
package/build/prompts/testGenerationPrompt.js +0 -207
package/build/prompts/testHealthPrompt.js +0 -85
package/build/services/DriftAnalysisService.js +0 -1075
package/build/services/DriftAnalysisService.test.js +0 -168
package/build/tools/generate-tests/generateScenarioRestTool.js +0 -131

package/build/index.js CHANGED Viewed

@@ -2,11 +2,9 @@
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import { registerStartTraceCollectionPrompt } from "./prompts/startTraceCollectionPrompts.js";
-import { registerTestHealthPrompt } from "./prompts/testHealthPrompt.js";
 import { registerTraceTool } from "./tools/trace/startTraceCollectionTool.js";
 import { registerTraceStopTool } from "./tools/trace/stopTraceCollectionTool.js";
 import { registerExecuteSkyrampTestTool } from "./tools/executeSkyrampTestTool.js";
-import { registerTestGenerationPrompt } from "./prompts/testGenerationPrompt.js";
 import { AUTH_PLACEHOLDER_TOKEN } from "./types/TestTypes.js";
 import { logger } from "./utils/logger.js";
 import { registerUITestTool } from "./tools/generate-tests/generateUIRestTool.js";
@@ -22,18 +20,18 @@ import { registerFixErrorTool } from "./tools/fixErrorTool.js";
 import { registerRecommendTestsPrompt } from "./prompts/test-recommendation/registerRecommendTestsPrompt.js";
 import { registerModularizationTool } from "./tools/code-refactor/modularizationTool.js";
 import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
-import { registerScenarioTestTool } from "./tools/generate-tests/generateScenarioRestTool.js";
 import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
 import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
 import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerExecuteTestsTool, registerActionsTool, registerStateCleanupTool, } from "./tools/test-management/index.js";
 import { registerTestbotPrompt, registerTestbotResource, } from "./prompts/testbot/testbot-prompts.js";
 import { registerSubmitReportTool } from "./tools/submitReportTool.js";
 import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
+import { registerInitScanWorkspaceTool } from "./tools/workspace/initScanWorkspaceTool.js";
 import { registerOneClickTool } from "./tools/one-click/oneClickTool.js";
 import { registerAnalysisResources } from "./resources/analysisResources.js";
 import { registerProgressResource } from "./resources/progressResource.js";
 import { AnalyticsService } from "./services/AnalyticsService.js";
-import { initCheck } from "./utils/initAgent.js";
+import { registerInitTriggerOnMCPInitialized } from "./utils/initAgent.js";
 import { registerPlaywrightTools, registerTraceRecordingPrompt, getPlaywrightTraceService, } from "./playwright/index.js";
 const server = new McpServer({
     name: "Skyramp MCP Server",
@@ -52,6 +50,13 @@ const server = new McpServer({
     },
     instructions: `Skyramp MCP Server — generates and executes API tests (fuzz, contract, integration, E2E, UI).
+## Workspace Initialization (REQUIRED before the first Skyramp tool call)
+If the workspace root is a git repo AND \`.skyramp/workspace.yml\` does not exist:
+1. Call \`skyramp_init_scan\` with \`workspacePath\` → follow the returned instructions to discover all services.
+2. Call \`skyramp_init_workspace\` with \`workspacePath\`, \`services\`, and the \`scanToken\` from step 1.
+3. Proceed with the originally requested tool.
+Skip only if: not a git repo, \`.skyramp/workspace.yml\` already exists, or user explicitly declines.
 ## Rules
 - NEVER show CLI commands. ALWAYS use the MCP tools provided.
 - For UI and E2E tests, there are TWO recording modes:
@@ -81,23 +86,6 @@ After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use
 - \`skyramp://analysis/{sessionId}/scenarios\` — drafted scenarios
 - \`skyramp://analysis/{sessionId}/diff\` — branch diff context
-## Workspace Initialization (before ANY other Skyramp tool)
-Follow this flow EVERY time before calling any Skyramp tool:
-1. **Check**: Is the workspace root a git repository? (i.e. does a \`.git\` directory exist at the root?)
-   - **If NO** → it is a non-git repo. Do NOT call \`skyramp_initialize_workspace\`. Proceed directly with the requested tool. STOP — do not continue to step 2.
-   - **If YES** → it is a git repo. Continue to step 2.
-2. **Check**: Does .skyramp/workspace.yml exist at the workspace root?
-   - **If YES** → workspace is already initialized. Proceed with the requested tool. STOP here.
-   - **If NO** → you MUST call \`skyramp_initialize_workspace\` BEFORE doing anything else.
-     - Do NOT skip this step. Do NOT proceed to the requested tool first.
-     - Scan the repo for ALL services (see the tool description for detailed steps).
-     - A fullstack or monorepo MUST produce multiple services — never just one.
-     - After workspace init completes, THEN proceed with the originally requested tool.
-3. **ONLY skip init in these two cases: non-git repo (step 1) or explicit user decline** (i.e. user EXPLICITLY says "no", "skip", "don't create workspace", or similar).
-   - A request like "execute tests" or "generate tests" is NOT a signal to skip init.
-   - If the user does decline, respect it — do NOT ask again, and proceed with the requested tool.
 ## Workspace Defaults for Test Generation (MANDATORY)
 Before calling ANY test generation tool, you MUST follow this flow:
@@ -105,7 +93,7 @@ Before calling ANY test generation tool, you MUST follow this flow:
 2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the services section.
 3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
 4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
-5. **CRITICAL — scenario generation**: When calling \`skyramp_scenario_test_generation\`, ALWAYS pass:
+5. **CRITICAL — scenario generation**: When calling \`skyramp_batch_scenario_test_generation\`, ALWAYS pass:
    - \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
    - \`authHeader\`: Which HTTP header carries the auth credential. Get it from \`api.authHeader\` in workspace config. Examples: \`Authorization\` (Bearer/Token auth), \`X-Api-Key\` (API key auth), \`Cookie\` (session/cookie auth like NextAuth). Pass \`""\` to skip auth entirely (unauthenticated endpoints or \`api.authType: "none"\`).
    - \`authScheme\`: Only when \`authHeader\` is \`Authorization\`. The prefix before the token (e.g., \`"Bearer"\` → \`Authorization: Bearer <token>\`). **Derive from**: (1) OpenAPI spec \`securitySchemes\`/\`securityDefinitions\`, (2) source code auth middleware, (3) workspace \`api.authType\`. **Do NOT guess.**
@@ -118,54 +106,10 @@ Before calling ANY test generation tool, you MUST follow this flow:
 8. The user can always override workspace defaults by explicitly specifying values in their request.
 `,
 });
-// Check for first-time invocation after version update (runs in background, doesn't block)
-let initCheckInFlight = false;
-let initCheckDone = false;
-const INIT_MESSAGE = "Skyramp init: Triggering pull of Skyramp worker and executor images if not present locally.";
-const originalRegisterTool = server.registerTool.bind(server);
-server.registerTool = function (name, definition, handler) {
-    const wrappedHandler = async (...args) => {
-        let triggeredInitThisCall = false;
-        if (!initCheckDone && !initCheckInFlight) {
-            // Guard with inFlight so concurrent tool calls don't each spawn a new initCheck(),
-            // but allow retry on failure (initCheckInFlight is reset to false on error).
-            // SkyrampClient constructor calls checkForUpdate("npm") via synchronous koffi FFI,
-            // which can block the event loop for up to 60 s if the update-check server is
-            // unreachable.  Deferring via setImmediate ensures the tool response is written to
-            // stdout (and acknowledged by the MCP client) before any blocking FFI call runs.
-            initCheckInFlight = true;
-            triggeredInitThisCall = true;
-            setImmediate(() => {
-                initCheck()
-                    .then(() => {
-                    initCheckDone = true;
-                })
-                    .catch((err) => {
-                    logger.error("Background initialization check failed", { error: err });
-                })
-                    .finally(() => {
-                    initCheckInFlight = false;
-                });
-            });
-        }
-        const result = await handler(...args);
-        if (triggeredInitThisCall && result) {
-            const content = result.content ?? [];
-            result.content = [
-                { type: "text", text: INIT_MESSAGE },
-                ...content,
-            ];
-        }
-        return result;
-    };
-    return originalRegisterTool(name, definition, wrappedHandler);
-};
 // Register prompts
 logger.info("Starting prompt registration process");
 const prompts = [
-    registerTestGenerationPrompt,
     registerStartTraceCollectionPrompt,
-    registerTestHealthPrompt,
     registerRecommendTestsPrompt,
     registerTraceRecordingPrompt,
 ];
@@ -185,7 +129,7 @@ const testGenerationTools = [
     registerIntegrationTestTool,
     registerE2ETestTool,
     registerUITestTool,
-    registerScenarioTestTool,
+    registerBatchScenarioTestTool,
     registerMockTool,
 ];
 testGenerationTools.forEach((registerTool) => registerTool(server));
@@ -206,6 +150,7 @@ registerExecuteTestsTool(server);
 registerActionsTool(server);
 registerStateCleanupTool(server);
 // Register workspace management tools
+registerInitScanWorkspaceTool(server);
 registerInitializeWorkspaceTool(server);
 // Register one-click orchestrated workflows
 registerOneClickTool(server);
@@ -219,7 +164,6 @@ const infrastructureTools = [
 ];
 if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
     infrastructureTools.push(registerSubmitReportTool);
-    registerBatchScenarioTestTool(server);
     logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
 }
 infrastructureTools.forEach((registerTool) => registerTool(server));
@@ -249,6 +193,11 @@ process.on("uncaughtException", async (error) => {
 // Start MCP server
 async function main() {
     const transport = new StdioServerTransport();
+    server.server.oninitialized = () => {
+        registerInitTriggerOnMCPInitialized().catch((err) => {
+            logger.error("Failed to run MCP initialized trigger", { error: err });
+        });
+    };
     await server.connect(transport);
     logger.info("MCP Server started successfully");
     // Listen for stdin closure (parent process disconnected)

package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js ADDED Viewed

@@ -0,0 +1,134 @@
+import { getPersonaPrefix } from "../architectPersona.js";
+export const INIT_WORKSPACE_INSTRUCTIONS = `${getPersonaPrefix()}Your task is to scan this repository, discover ALL services, and call the \`skyramp_init_workspace\` tool with the discovered services array and the scanToken.
+After scanning the workspace, before calling the \`skyramp_init_workspace\` tool, you MUST:
+**1. Output a \`<thinking>\` block** to justify the reasoning behind each field mapping for every discovered service.
+**2. Then output a Discovery Summary** with the exact services array you will pass to the tool:
+\`\`\`json
+[
+  {
+    "serviceName": "<name>",
+    "language": "<language>",
+    "framework": "<framework>",
+    "testDirectory": "<path>",
+    "api": { "schemaPath": "<path-or-url>", "baseUrl": "<url>", "authType": "<type>", "authHeader": "<header>" },
+    "runtimeDetails": { "runtime": "<runtime>", "serverStartCommand": "<command>", "dockerNetwork": "<network>" }
+  }
+  // ... one entry per discovered service
+]
+\`\`\`
+## Step 1 — List ALL Top-Level Directories
+Run a directory listing of the workspace root. Every top-level directory is a potential service. Common layouts:
+| Layout | Example dirs | Expect |
+|--------|-------------|--------|
+| Monorepo | apps/web, apps/api, packages/shared | 1 service per app |
+| Microservices | services/auth, services/orders | 1 service per service dir |
+| Single service | src/, lib/ | 1 service (the root) |
+## Step 2 — Inspect EVERY Candidate Directory
+For **each** top-level directory, check for service indicator files:
+**Language indicators** (presence of ANY = independent service):
+- package.json → typescript / javascript
+- requirements.txt, pyproject.toml, Pipfile → python
+- pom.xml, build.gradle → java
+**Test framework** (look inside the service dir):
+- playwright.config.* → playwright
+- pytest.ini, conftest.py, pyproject.toml [tool.pytest] → pytest
+- junit in pom.xml → junit
+**API schemas** (look inside the service dir AND check known framework defaults):
+- openapi.json/yaml, swagger.json/yaml → schema file path
+- FastAPI projects → http://localhost:{port}/openapi.json
+- Express with swagger-ui → http://localhost:{port}/api-docs
+- Spring Boot → http://localhost:{port}/v3/api-docs
+- Always use localhost URLs — NEVER use external or production URLs
+## Step 3 — Check Root-Level Runtime Config
+Inspect the repo root (and subdirectories like .devcontainer/) for shared runtime configuration:
+- docker-compose.yml → extract service names, ports, start commands
+  Docker Compose ALWAYS prefixes the network name with "<project-name>_".
+  If compose has "networks: { my-net: ... }" → actual network = "<project-name>_my-net".
+  If no explicit networks section → default network = "<project-name>_default".
+  Project name = basename of the CWD where docker compose runs.
+- Makefile → extract start/dev targets
+- Root package.json scripts → workspace-level commands
+## Step 4 — Build the Complete Services Array
+Create one service entry per deployable unit. You MUST include:
+- Every backend/API service (Python, Java, Go, Node.js)
+- Every frontend service (React, Vue, Angular, Next.js)
+- Set runtime fields from docker-compose.yml if present
+**Basic fields:**
+- \`serviceName\` *(required)* — unique identifier, e.g. "api-gateway", "user-service"
+- \`language\` — \`python\` | \`typescript\` | \`javascript\` | \`java\`
+  Detect from: package.json → typescript/javascript | requirements.txt/pyproject.toml → python | pom.xml/build.gradle → java
+- \`framework\` — \`playwright\` | \`pytest\` | \`robot\` | \`junit\`
+  Detect from: pytest.ini/playwright.config/jest.config/junit in pom.xml
+  MUST match the language: python → pytest or robot | typescript/javascript → playwright | java → junit
+- \`testDirectory\` — path relative to repo root where tests exist or will be generated; prefer existing test dirs over source dirs, e.g. "tests", "api/tests", "test"
+**API fields:**
+- \`api.schemaPath\` — path or URL to OpenAPI/Protobuf/GraphQL schema
+  Search for: openapi.json, swagger.yaml, *.proto, *.graphql
+  Framework defaults: FastAPI → /openapi.json | Express → /api-docs | Spring → /v3/api-docs
+  ⚠️  NEVER use external or production URLs — always use localhost.
+- \`api.baseUrl\` *(required)* — local base URL, e.g. "http://localhost:3000"
+  Derive from docker-compose ports, app config, or README.
+  ⚠️  MUST be a localhost URL. NEVER use external or production URLs.
+- \`api.authType\` — \`bearer\` | \`basic\` | \`oauth\` | \`apiKey\` | \`none\`
+  Detect by checking in order:
+  1. Dependencies: \`jsonwebtoken\`/\`passport-jwt\` → \`bearer\` | \`passport-http\` → \`basic\` | \`passport-oauth2\`/\`openid-client\` → \`oauth\`
+  2. Env vars: \`JWT_SECRET\`/\`ACCESS_TOKEN\` → \`bearer\` | \`API_KEY\`/\`X_API_KEY\` → \`apiKey\` | \`CLIENT_ID\`+\`CLIENT_SECRET\` → \`oauth\`
+  3. Middleware/source: \`req.headers.authorization\` + \`Bearer\` → \`bearer\` | custom header check → \`apiKey\`
+  4. Fallback: frontend/UI service → \`none\` | backend API with no signals → \`bearer\`
+- \`api.authHeader\` — header name, e.g. "Authorization" for bearer/basic/oauth, "X-API-Key" for apiKey, "" for none
+**Runtime fields:**
+- \`runtimeDetails.runtime\` — \`local\` | \`docker\` | \`k8s\`
+  Detect per service:
+  - Service listed in docker-compose.yml → \`"docker"\`
+  - Service has only a Dockerfile (no compose entry) → \`"local"\` or \`"docker"\`
+  - k8s manifests exist (charts/, k8s/, deploy/) → \`"k8s"\`
+  ⚠️  A repo may have MIXED runtimes — a backend in docker-compose.yml uses "docker" while a frontend run with pnpm/npm locally uses "local". Include ALL services regardless of runtime.
+- \`runtimeDetails.serverStartCommand\` — command to start the service. MUST match runtime:
+  - \`"local"\`  → application command: "uvicorn main:app", "npm run dev", "java -jar app.jar"
+  - \`"docker"\` → Docker command: "docker compose up -d \<service-name\>"  ← prefer service-scoped
+  - \`"k8s"\`    → k8s command: "kubectl apply -f deploy/", "helm install myrelease ."
+  ⚠️  NEVER mix (e.g. "uvicorn …" with runtime "docker" will cause errors).
+- \`runtimeDetails.dockerNetwork\` — Docker network name. ONLY set when runtime is \`"docker"\`. NEVER set for "local" or "k8s".
+- \`runtimeDetails.k8sNamespace\` — Kubernetes namespace. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
+- \`runtimeDetails.k8sContext\` — Kubernetes context. ONLY set when runtime is \`"k8s"\`. NEVER set for "local" or "docker".
+## Verification Steps
+Before calling \`skyramp_init_workspace\`, confirm all of the following:
+- ALWAYS SCAN REPO AND FIND SERVICES. A REPO SHOULD HAVE AT LEAST ONE SERVICE.
+- **CRITICAL**: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
+- Services NOT in docker-compose.yml (e.g. a frontend run with pnpm/npm locally) MUST still be included with runtime "local".
+- Every service has \`api.baseUrl\` set to a localhost URL — NEVER a production or external URL.
+- \`framework\` matches \`language\` (python → pytest/robot | typescript/javascript → playwright | java → junit)
+- \`serverStartCommand\` matches \`runtime\`
+- For services in docker-compose.yml: runtime MUST be "docker" and command MUST be a docker command (e.g. "docker compose up -d <service-name>").
+- NEVER use application-level commands (uvicorn, npm, node, python, java, etc.) with runtime "docker".
+- \`dockerNetwork\` is set only when runtime is "docker"
+- \`k8sNamespace\` and \`k8sContext\` are set only when runtime is "k8s"
+Once verified, call \`skyramp_init_workspace\` with:
+- \`workspacePath\`: the repository root path
+- \`services\`: the array built above
+- \`scanToken\`: the token returned by the first call to \`skyramp_init_workspace\` (called with only workspacePath)
+- \`force\`: defaults to false — only set to true if the user explicitly asks to overwrite an existing \`.skyramp/workspace.yml\``;

package/build/prompts/test-maintenance/drift-analysis-prompt.js CHANGED Viewed

@@ -58,15 +58,22 @@ ${scannedSection}`;
     if (inlineMode) {
         // Testbot inline mode: all maintenance logic lives here so the testbot
         // prompt only orchestrates steps without duplicating rules.
-        return `${buildActionDecisionMatrix()}
+        return `<drift_analysis_rules>
+You are acting as a Skyramp Integration Architect.
+For this maintenance step: assess each existing test against the diff returned by \`skyramp_analyze_changes\` and apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) directly — no separate analysis step.
+${buildActionDecisionMatrix()}
 ${buildUpdateExecutionRules()}
 ${buildDriftOutputChecklist(existingTests.length, newEndpointCount, inlineMode)}
-**Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.`;
+**Be brief.** Score each test, decide the action, and apply edits immediately. Do NOT write detailed analysis for IGNORE'd tests.
+</drift_analysis_rules>`;
     }
-    return `${contextSection}
+    return `You are acting as a Skyramp Integration Architect. Your responsibility is to assess each existing test against the branch diff and score it for drift. Apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) based on the scoring guide below.
+${contextSection}
 ${buildDriftScoringGuide()}
 ${buildActionDecisionMatrix()}

package/build/prompts/test-maintenance/driftAnalysisSections.js CHANGED Viewed

@@ -176,24 +176,24 @@ After completing all assessments above, call \`skyramp_actions\` with \`stateFil
     const existingTestSection = inlineMode
         ? `### Existing tests
 For each existing test reported by \`skyramp_analyze_changes\`:
-- **IGNORE/VERIFY tests**: list on a single line: \`<testFile> — IGNORE\` or \`<testFile> — VERIFY (score <N>)\`. Do NOT write detailed rationale.
+- **IGNORE/VERIFY tests**: list on a single line: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY (score {N})\`. Do NOT write detailed rationale.
 - **UPDATE/REGENERATE/DELETE tests**: output the full block:
 \`\`\`
-Test: <testFile>
-Drift Score: <0-100>
-Action: <UPDATE | REGENERATE | DELETE>
-Rationale: <1-2 sentence explanation>
+Test: {testFile}
+Drift Score: {0-100}
+Action: {UPDATE | REGENERATE | DELETE}
+Rationale: {1-2 sentence explanation}
 \`\`\`
 Focus your analysis on tests that need action — do not spend time analyzing unchanged tests.`
         : `### Existing tests (${existingTestCount} total)
 For each existing test:
-- **IGNORE/VERIFY tests**: list on a single line: \`<testFile> — IGNORE\` or \`<testFile> — VERIFY (score <N>)\`. Do NOT write detailed rationale.
+- **IGNORE/VERIFY tests**: list on a single line: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY (score {N})\`. Do NOT write detailed rationale.
 - **UPDATE/REGENERATE/DELETE tests**: output the full block:
 \`\`\`
-Test: <testFile>
-Drift Score: <0-100>
-Action: <UPDATE | REGENERATE | DELETE>
-Rationale: <1-2 sentence explanation>
+Test: {testFile}
+Drift Score: {0-100}
+Action: {UPDATE | REGENERATE | DELETE}
+Rationale: {1-2 sentence explanation}
 \`\`\``;
     const newEndpointSection = inlineMode
         ? ""
@@ -201,10 +201,10 @@ Rationale: <1-2 sentence explanation>
             ? `### New endpoints (${newEndpointCount} detected)
 For EACH new endpoint, output:
 \`\`\`
-Endpoint: <METHOD> <path>
+Endpoint: {METHOD} {path}
 Action: ADD
-Test types: <contract | integration | smoke | ...>
-Rationale: <1 sentence>
+Test types: {contract | integration | smoke | ...}
+Rationale: {1 sentence}
 \`\`\``
             : `### New endpoints
 No new endpoints detected in this diff.`;

package/build/prompts/test-recommendation/analysisOutputPrompt.js CHANGED Viewed

@@ -1,27 +1,32 @@
+import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
 function buildEnrichmentInstructions(p) {
-    const isDiffScope = p.analysisScope === "current_branch_diff";
+    const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
     const useHealthFlow = p.nextTool === "skyramp_analyze_test_health";
     if (!isDiffScope) {
         const nextStep = useHealthFlow
             ? `### Step 3: Identify tests at risk of drift
 Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\``
-            : `### Step 3: Call recommend tests
-Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
-        return `## Your Task — Enrich & Recommend (full repo)
+            : `### Step 3: Present the catalog
+The ranked test recommendation catalog is pre-built and shown below (after the separator line).
+**Your only job is to present it.**
+1. Fill in every \`<…from source>\` placeholder using the field names, computed formulas, and auth details you found in Steps 1–2.
+2. Output the completed catalog **exactly as formatted — grouped by test type (### E2E / ### UI / ### Integration / ### Contract)**. Do NOT restructure, reorder, rename sections, or generate a new format.
+3. Do NOT call any Skyramp generation tools. The catalog shows ready-to-use tool calls that can be executed on demand.
+**If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or FK relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
+        return `## Your Task — Fill in and Present the Catalog (full repo)
 ### Step 1: Read key files
-Read \`package.json\` / \`requirements.txt\`, \`docker-compose.yml\`, route/controller files,
-and model/schema files (Zod schemas, Pydantic models, TypeScript interfaces, DTOs)
-to understand the tech stack, endpoint shapes, auth mechanisms, and request/response schemas.
-### Step 2: Identify resource relationships and parameter locations
-Map how endpoints relate to each other — which POST creates resources consumed by other endpoints?
-**Resolve nested/sub-router paths** from the Router Mounting section above.
-**CRITICAL — Distinguish query params vs request body:** For each endpoint, determine whether
-parameters are sent as URL query params (typical for GET search/filter/list) or request body
-(typical for POST/PUT/PATCH). Look at FastAPI \`Query()\` annotations, Express \`req.query\` usage,
-Spring \`@RequestParam\`, Flask \`request.args\`, etc. Populate \`queryParams\` in interactions
-for GET endpoints that accept search/filter/pagination parameters.
+Read route/controller files and model/schema files (Pydantic models, Zod schemas, DTOs)
+to find: required request body fields, computed response fields and formulas, auth middleware type, storage backend, and how sub-routers are mounted (cross-check against Router Mounting section above).
+### Step 2: Map cross-resource relationships and resolve endpoint paths
+(Distinct from Step 1 — Step 1 reads individual schemas; Step 2 maps how endpoints relate to each other.)
+For each endpoint: which POST creates resources consumed by other endpoints?
+**Resolve nested paths** from the Router Mounting section — a router mounted at \`/products/{product_id}/reviews\` means \`GET /\` in that file is actually \`GET /api/v1/products/{product_id}/reviews\`.
+For GET list endpoints: identify query params (\`limit\`, \`offset\`, \`order\`, \`orderBy\`) from framework annotations (FastAPI \`Query()\`, Express \`req.query\`, etc.).
 ${nextStep}`;
     }
@@ -67,8 +72,20 @@ Draft multi-step scenarios simulating realistic user workflows:
 response data verification, actual field names for chaining.
 **Parameter placement:** GET search/filter endpoints MUST use \`queryParams\`, not \`requestBody\`.
+**No duplicate scenarios.** Each scenario must cover a distinct code path (unique method + path + expected status). Do NOT draft two scenarios that differ only in request body values but hit the same code path (e.g. discount=10% vs discount=25% — both succeed with 200, same logic). A negative-case variant with a different expected status (e.g. discount=-10% → 422) IS a distinct scenario — use a single-step contract test for it (see below).
+**For each new or modified endpoint, ensure at least one error-path scenario is drafted** — a single-step contract test that triggers a specific error (404 for a missing resource ID, 422 for an invalid field value) that the source code explicitly handles. One auth-boundary scenario (missing auth → 401/403) is enough across all endpoints — do not repeat it per endpoint.
+**For every scenario you draft, fill \`bugCatchingTarget\`** with the specific formula, constraint, or failure mode the test is designed to expose. Examples:
+- \`"discount formula: total_amount = subtotal * (1 - discount_value / 100) — wrong if addition is used instead of subtraction"\`
+- \`"items not recalculated after PATCH — total_amount stays at old value if collection update is ignored"\`
+- \`"missing 404 guard on resource ID — returns 500 instead of 404 for unknown IDs"\`
+This field is used at test generation time to compute exact assertion values. Leave it empty only if no specific formula or constraint applies.
 ### Step 4: Call recommend tests
-Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
+Call \`skyramp_recommend_tests\` with:
+- \`stateFile: "${p.stateFile}"\`
+- \`enrichedScenarios\`: (optional) JSON array of your Step 3 scenarios — see the tool's inputSchema for the exact shape. Your enriched scenarios override server-side ones with the same \`scenarioName\` and are prioritized in ranking. Omit if you drafted nothing in Step 3.`;
     return `## Your Task — Enrich & Recommend (PR-scoped)
 ### Step 1: Read the changed files
@@ -81,39 +98,19 @@ ${criticalPatternStep}
 ${step3Content}`;
 }
 export function buildAnalysisOutputText(p) {
-    const isDiffScope = p.analysisScope === "current_branch_diff";
-    const diffSection = p.parsedDiff
+    const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
+    // Router mounting context is unique to this prompt (not in recommendationPrompt).
+    // Branch diff, endpoint catalog, auth config, and OpenAPI spec are omitted here
+    // because they are already present in the recommendation prompt that is
+    // concatenated in the same tool response.
+    const routerSection = !p.wsSchemaPath && p.routerMountContext
         ? `
-## Branch Diff Context
-**Branch**: \`${p.parsedDiff.currentBranch}\` → base: \`${p.parsedDiff.baseBranch}\`
-**Changed Files** (${p.parsedDiff.changedFiles.length}): ${p.parsedDiff.changedFiles.join(", ")}
-**New Endpoints** (${p.parsedDiff.newEndpoints.length}): ${p.parsedDiff.newEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
-**Modified Endpoints** (${p.parsedDiff.modifiedEndpoints.length}): ${p.parsedDiff.modifiedEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
-**Affected Services**: ${p.parsedDiff.affectedServices.join(", ") || "none"}
-`
-        : "";
-    const endpointCatalog = p.scannedEndpoints.length > 0
-        ? `
-## Pre-Scanned Endpoint Catalog (${p.scannedEndpoints.length} routes)
-${p.scannedEndpoints.map((ep) => `  ${ep.methods.join("|")} ${ep.path} (${ep.sourceFile})`).join("\n")}
-`
-        : "";
-    const wsLine = p.wsBaseUrl
-        ? `**Base URL**: \`${p.wsBaseUrl}\`${p.wsAuthHeader ? ` | **Auth header**: \`${p.wsAuthHeader}\`` : ""}${p.wsAuthType ? ` | **Auth type**: \`${p.wsAuthType}\`` : ""}`
-        : "";
-    const specSection = p.wsSchemaPath
-        ? `
-## OpenAPI Spec Available
-Spec at \`${p.wsSchemaPath}\`. **Read it** for authoritative paths and schemas.
-Pass \`apiSchema: "${p.wsSchemaPath}"\` to ALL test generation tool calls.`
-        : p.routerMountContext
-            ? `
 ## Router Mounting / Nesting
 \`\`\`
 ${p.routerMountContext}
 \`\`\`
 Use this to resolve full URL paths for nested endpoints.`
-            : "";
+        : "";
     const enrichment = buildEnrichmentInstructions(p);
     return `# Repository Analysis
@@ -121,12 +118,7 @@ Use this to resolve full URL paths for nested endpoints.`
 **Repository**: \`${p.repositoryPath}\`
 **Analysis Scope**: \`${p.analysisScope}\`
 ${isDiffScope ? `**Diff endpoints**: ${(p.parsedDiff?.newEndpoints.length ?? 0) + (p.parsedDiff?.modifiedEndpoints.length ?? 0)}` : `**Pre-scanned endpoints**: ${p.scannedEndpoints.length}`}
-${wsLine}
-${p.wsSchemaPath ? `**OpenAPI Spec**: \`${p.wsSchemaPath}\` (spec-based flow)` : "**Flow**: Code-scanning (may miss nesting)"}
-${diffSection}
-${endpointCatalog}
-${specSection}
+${routerSection}
 ${enrichment}
 **CRITICAL**: No .json/.md file creation. Prioritize cross-resource workflows.`;

package/build/prompts/test-recommendation/mergeEnrichedScenarios.test.js ADDED Viewed

@@ -0,0 +1,125 @@
+jest.mock("@skyramp/skyramp", () => ({ Skyramp: class {
+    } }));
+import { mergeEnrichedScenarios } from "./registerRecommendTestsPrompt.js";
+import { ScenarioSource } from "../../types/RepositoryAnalysis.js";
+import { TestType } from "../../types/TestTypes.js";
+function makeScenario(overrides = {}) {
+    return {
+        scenarioName: "base-scenario",
+        description: "base",
+        category: "crud",
+        priority: "medium",
+        steps: [{ order: 1, method: "GET", path: "/api/items", description: "list", interactionType: "success", expectedStatusCode: 200 }],
+        chainingKeys: [],
+        requiresAuth: true,
+        estimatedComplexity: "simple",
+        source: ScenarioSource.CodeInferred,
+        testType: TestType.CONTRACT,
+        ...overrides,
+    };
+}
+const VALID_STEP = { order: 1, method: "post", path: "/api/orders", expectedStatusCode: 201 };
+describe("mergeEnrichedScenarios — happy path", () => {
+    it("merges a valid agent scenario into server scenarios", () => {
+        const server = [makeScenario({ scenarioName: "existing" })];
+        const raw = JSON.stringify([{
+                scenarioName: "new-orders-flow",
+                category: "business_rule",
+                steps: [VALID_STEP],
+            }]);
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, raw);
+        expect(rejectionNotes).toHaveLength(0);
+        expect(scenarios.find(s => s.scenarioName === "new-orders-flow")).toBeDefined();
+        expect(scenarios.find(s => s.scenarioName === "existing")).toBeDefined();
+        expect(scenarios).toHaveLength(2);
+    });
+    it("overrides a server scenario when agent provides same scenarioName", () => {
+        const server = [makeScenario({ scenarioName: "orders-flow", description: "server version" })];
+        const raw = JSON.stringify([{
+                scenarioName: "orders-flow",
+                category: "business_rule",
+                description: "agent version",
+                steps: [VALID_STEP],
+            }]);
+        const { scenarios } = mergeEnrichedScenarios(server, raw);
+        expect(scenarios).toHaveLength(1);
+        expect(scenarios[0].description).toBe("agent version");
+        expect(scenarios[0].source).toBe("agent-enriched");
+    });
+    it("normalizes method to uppercase", () => {
+        const raw = JSON.stringify([{
+                scenarioName: "uppercase-test",
+                category: "crud",
+                steps: [{ order: 1, method: "post", path: "/api/items", expectedStatusCode: 201 }],
+            }]);
+        const { scenarios } = mergeEnrichedScenarios([], raw);
+        expect(scenarios[0].steps[0].method).toBe("POST");
+    });
+    it("preserves bugCatchingTarget when provided", () => {
+        const raw = JSON.stringify([{
+                scenarioName: "formula-test",
+                category: "business_rule",
+                bugCatchingTarget: "total = price * qty",
+                steps: [VALID_STEP],
+            }]);
+        const { scenarios } = mergeEnrichedScenarios([], raw);
+        expect(scenarios[0].bugCatchingTarget).toBe("total = price * qty");
+    });
+    it("falls back to server scenarios on empty agent array", () => {
+        const server = [makeScenario({ scenarioName: "server-only" })];
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, "[]");
+        // Empty array → no agent scenarios, return server ones unchanged
+        expect(scenarios).toEqual(server);
+        expect(rejectionNotes).toHaveLength(0);
+    });
+});
+describe("mergeEnrichedScenarios — rejection cases", () => {
+    it("rejects scenario with missing scenarioName", () => {
+        const raw = JSON.stringify([{ category: "crud", steps: [VALID_STEP] }]);
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(scenarios).toHaveLength(0);
+        expect(rejectionNotes[0]).toMatch(/missing scenarioName/);
+    });
+    it("rejects scenario with missing steps array", () => {
+        const raw = JSON.stringify([{ scenarioName: "no-steps", category: "crud" }]);
+        const { rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(rejectionNotes[0]).toMatch(/missing or empty steps/);
+    });
+    it("rejects scenario with empty steps array", () => {
+        const raw = JSON.stringify([{ scenarioName: "empty-steps", category: "crud", steps: [] }]);
+        const { rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(rejectionNotes[0]).toMatch(/missing or empty steps/);
+    });
+    it("rejects scenario with missing category", () => {
+        const raw = JSON.stringify([{ scenarioName: "no-cat", steps: [VALID_STEP] }]);
+        const { rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(rejectionNotes[0]).toMatch(/missing category/);
+    });
+    it("rejects scenario with unknown category", () => {
+        const raw = JSON.stringify([{ scenarioName: "bad-cat", category: "not_a_real_category", steps: [VALID_STEP] }]);
+        const { rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(rejectionNotes[0]).toMatch(/unknown category/);
+    });
+    it("falls back to server scenarios on invalid JSON", () => {
+        const server = [makeScenario()];
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, "{ bad json");
+        expect(scenarios).toEqual(server);
+        expect(rejectionNotes[0]).toMatch(/invalid JSON/);
+    });
+    it("falls back to server scenarios when JSON is not an array", () => {
+        const server = [makeScenario()];
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, JSON.stringify({ not: "array" }));
+        expect(scenarios).toEqual(server);
+        expect(rejectionNotes[0]).toMatch(/expected a JSON array/);
+    });
+    it("accepts valid scenarios and rejects invalid ones in the same batch", () => {
+        const raw = JSON.stringify([
+            { scenarioName: "valid-one", category: "crud", steps: [VALID_STEP] },
+            { category: "crud", steps: [VALID_STEP] }, // missing scenarioName
+        ]);
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(scenarios).toHaveLength(1);
+        expect(scenarios[0].scenarioName).toBe("valid-one");
+        expect(rejectionNotes).toHaveLength(1);
+    });
+});