npm - @skyramp/mcp - Versions diffs - 0.1.0-rc.1 → 0.1.0-rc.2 - Mend

@skyramp/mcp 0.1.0-rc.1 → 0.1.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/build/prompts/test-recommendation/analysisOutputPrompt.js +42 -50
package/build/prompts/test-recommendation/mergeEnrichedScenarios.test.js +125 -0
package/build/prompts/test-recommendation/recommendationSections.js +121 -4
package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +151 -9
package/build/prompts/test-recommendation/test-recommendation-prompt.js +413 -79
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +455 -63
package/build/prompts/testbot/testbot-prompts.js +24 -3
package/build/resources/analysisResources.js +13 -5
package/build/tools/generate-tests/generateBatchScenarioRestTool.js +8 -7
package/build/tools/submitReportTool.js +10 -1
package/build/tools/test-management/analyzeChangesTool.js +24 -7
package/build/types/RepositoryAnalysis.js +25 -3
package/build/types/TestRecommendation.js +5 -4
package/build/types/TestTypes.js +28 -2
package/build/utils/AnalysisStateManager.js +30 -4
package/build/utils/routeParsers.js +35 -0
package/build/utils/routeParsers.test.js +66 -1
package/build/utils/scenarioDrafting.js +207 -360
package/build/utils/scenarioDrafting.test.js +191 -256
package/build/utils/trace-parser.js +24 -6
package/build/utils/trace-parser.test.js +140 -0
package/package.json +1 -1

package/build/prompts/test-recommendation/analysisOutputPrompt.js CHANGED Viewed

@@ -1,27 +1,32 @@
+import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
 function buildEnrichmentInstructions(p) {
-    const isDiffScope = p.analysisScope === "current_branch_diff";
+    const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
     const useHealthFlow = p.nextTool === "skyramp_analyze_test_health";
     if (!isDiffScope) {
         const nextStep = useHealthFlow
             ? `### Step 3: Identify tests at risk of drift
 Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\``
-            : `### Step 3: Call recommend tests
-Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
-        return `## Your Task — Enrich & Recommend (full repo)
+            : `### Step 3: Present the catalog
+The ranked test recommendation catalog is pre-built and shown below (after the separator line).
+**Your only job is to present it.**
+1. Fill in every \`<…from source>\` placeholder using the field names, computed formulas, and auth details you found in Steps 1–2.
+2. Output the completed catalog **exactly as formatted — grouped by test type (### E2E / ### UI / ### Integration / ### Contract)**. Do NOT restructure, reorder, rename sections, or generate a new format.
+3. Do NOT call any Skyramp generation tools. The catalog shows ready-to-use tool calls that can be executed on demand.
+**If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or FK relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
+        return `## Your Task — Fill in and Present the Catalog (full repo)
 ### Step 1: Read key files
-Read \`package.json\` / \`requirements.txt\`, \`docker-compose.yml\`, route/controller files,
-and model/schema files (Zod schemas, Pydantic models, TypeScript interfaces, DTOs)
-to understand the tech stack, endpoint shapes, auth mechanisms, and request/response schemas.
-### Step 2: Identify resource relationships and parameter locations
-Map how endpoints relate to each other — which POST creates resources consumed by other endpoints?
-**Resolve nested/sub-router paths** from the Router Mounting section above.
-**CRITICAL — Distinguish query params vs request body:** For each endpoint, determine whether
-parameters are sent as URL query params (typical for GET search/filter/list) or request body
-(typical for POST/PUT/PATCH). Look at FastAPI \`Query()\` annotations, Express \`req.query\` usage,
-Spring \`@RequestParam\`, Flask \`request.args\`, etc. Populate \`queryParams\` in interactions
-for GET endpoints that accept search/filter/pagination parameters.
+Read route/controller files and model/schema files (Pydantic models, Zod schemas, DTOs)
+to find: required request body fields, computed response fields and formulas, auth middleware type, storage backend, and how sub-routers are mounted (cross-check against Router Mounting section above).
+### Step 2: Map cross-resource relationships and resolve endpoint paths
+(Distinct from Step 1 — Step 1 reads individual schemas; Step 2 maps how endpoints relate to each other.)
+For each endpoint: which POST creates resources consumed by other endpoints?
+**Resolve nested paths** from the Router Mounting section — a router mounted at \`/products/{product_id}/reviews\` means \`GET /\` in that file is actually \`GET /api/v1/products/{product_id}/reviews\`.
+For GET list endpoints: identify query params (\`limit\`, \`offset\`, \`order\`, \`orderBy\`) from framework annotations (FastAPI \`Query()\`, Express \`req.query\`, etc.).
 ${nextStep}`;
     }
@@ -67,8 +72,20 @@ Draft multi-step scenarios simulating realistic user workflows:
 response data verification, actual field names for chaining.
 **Parameter placement:** GET search/filter endpoints MUST use \`queryParams\`, not \`requestBody\`.
+**No duplicate scenarios.** Each scenario must cover a distinct code path (unique method + path + expected status). Do NOT draft two scenarios that differ only in request body values but hit the same code path (e.g. discount=10% vs discount=25% — both succeed with 200, same logic). A negative-case variant with a different expected status (e.g. discount=-10% → 422) IS a distinct scenario — use a single-step contract test for it (see below).
+**For each new or modified endpoint, ensure at least one error-path scenario is drafted** — a single-step contract test that triggers a specific error (404 for a missing resource ID, 422 for an invalid field value) that the source code explicitly handles. One auth-boundary scenario (missing auth → 401/403) is enough across all endpoints — do not repeat it per endpoint.
+**For every scenario you draft, fill \`bugCatchingTarget\`** with the specific formula, constraint, or failure mode the test is designed to expose. Examples:
+- \`"discount formula: total_amount = subtotal * (1 - discount_value / 100) — wrong if addition is used instead of subtraction"\`
+- \`"items not recalculated after PATCH — total_amount stays at old value if collection update is ignored"\`
+- \`"missing 404 guard on resource ID — returns 500 instead of 404 for unknown IDs"\`
+This field is used at test generation time to compute exact assertion values. Leave it empty only if no specific formula or constraint applies.
 ### Step 4: Call recommend tests
-Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
+Call \`skyramp_recommend_tests\` with:
+- \`stateFile: "${p.stateFile}"\`
+- \`enrichedScenarios\`: (optional) JSON array of your Step 3 scenarios — see the tool's inputSchema for the exact shape. Your enriched scenarios override server-side ones with the same \`scenarioName\` and are prioritized in ranking. Omit if you drafted nothing in Step 3.`;
     return `## Your Task — Enrich & Recommend (PR-scoped)
 ### Step 1: Read the changed files
@@ -81,39 +98,19 @@ ${criticalPatternStep}
 ${step3Content}`;
 }
 export function buildAnalysisOutputText(p) {
-    const isDiffScope = p.analysisScope === "current_branch_diff";
-    const diffSection = p.parsedDiff
+    const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
+    // Router mounting context is unique to this prompt (not in recommendationPrompt).
+    // Branch diff, endpoint catalog, auth config, and OpenAPI spec are omitted here
+    // because they are already present in the recommendation prompt that is
+    // concatenated in the same tool response.
+    const routerSection = !p.wsSchemaPath && p.routerMountContext
         ? `
-## Branch Diff Context
-**Branch**: \`${p.parsedDiff.currentBranch}\` → base: \`${p.parsedDiff.baseBranch}\`
-**Changed Files** (${p.parsedDiff.changedFiles.length}): ${p.parsedDiff.changedFiles.join(", ")}
-**New Endpoints** (${p.parsedDiff.newEndpoints.length}): ${p.parsedDiff.newEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
-**Modified Endpoints** (${p.parsedDiff.modifiedEndpoints.length}): ${p.parsedDiff.modifiedEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
-**Affected Services**: ${p.parsedDiff.affectedServices.join(", ") || "none"}
-`
-        : "";
-    const endpointCatalog = p.scannedEndpoints.length > 0
-        ? `
-## Pre-Scanned Endpoint Catalog (${p.scannedEndpoints.length} routes)
-${p.scannedEndpoints.map((ep) => `  ${ep.methods.join("|")} ${ep.path} (${ep.sourceFile})`).join("\n")}
-`
-        : "";
-    const wsLine = p.wsBaseUrl
-        ? `**Base URL**: \`${p.wsBaseUrl}\`${p.wsAuthHeader ? ` | **Auth header**: \`${p.wsAuthHeader}\`` : ""}${p.wsAuthType ? ` | **Auth type**: \`${p.wsAuthType}\`` : ""}`
-        : "";
-    const specSection = p.wsSchemaPath
-        ? `
-## OpenAPI Spec Available
-Spec at \`${p.wsSchemaPath}\`. **Read it** for authoritative paths and schemas.
-Pass \`apiSchema: "${p.wsSchemaPath}"\` to ALL test generation tool calls.`
-        : p.routerMountContext
-            ? `
 ## Router Mounting / Nesting
 \`\`\`
 ${p.routerMountContext}
 \`\`\`
 Use this to resolve full URL paths for nested endpoints.`
-            : "";
+        : "";
     const enrichment = buildEnrichmentInstructions(p);
     return `# Repository Analysis
@@ -121,12 +118,7 @@ Use this to resolve full URL paths for nested endpoints.`
 **Repository**: \`${p.repositoryPath}\`
 **Analysis Scope**: \`${p.analysisScope}\`
 ${isDiffScope ? `**Diff endpoints**: ${(p.parsedDiff?.newEndpoints.length ?? 0) + (p.parsedDiff?.modifiedEndpoints.length ?? 0)}` : `**Pre-scanned endpoints**: ${p.scannedEndpoints.length}`}
-${wsLine}
-${p.wsSchemaPath ? `**OpenAPI Spec**: \`${p.wsSchemaPath}\` (spec-based flow)` : "**Flow**: Code-scanning (may miss nesting)"}
-${diffSection}
-${endpointCatalog}
-${specSection}
+${routerSection}
 ${enrichment}
 **CRITICAL**: No .json/.md file creation. Prioritize cross-resource workflows.`;

package/build/prompts/test-recommendation/mergeEnrichedScenarios.test.js ADDED Viewed

@@ -0,0 +1,125 @@
+jest.mock("@skyramp/skyramp", () => ({ Skyramp: class {
+    } }));
+import { mergeEnrichedScenarios } from "./registerRecommendTestsPrompt.js";
+import { ScenarioSource } from "../../types/RepositoryAnalysis.js";
+import { TestType } from "../../types/TestTypes.js";
+function makeScenario(overrides = {}) {
+    return {
+        scenarioName: "base-scenario",
+        description: "base",
+        category: "crud",
+        priority: "medium",
+        steps: [{ order: 1, method: "GET", path: "/api/items", description: "list", interactionType: "success", expectedStatusCode: 200 }],
+        chainingKeys: [],
+        requiresAuth: true,
+        estimatedComplexity: "simple",
+        source: ScenarioSource.CodeInferred,
+        testType: TestType.CONTRACT,
+        ...overrides,
+    };
+}
+const VALID_STEP = { order: 1, method: "post", path: "/api/orders", expectedStatusCode: 201 };
+describe("mergeEnrichedScenarios — happy path", () => {
+    it("merges a valid agent scenario into server scenarios", () => {
+        const server = [makeScenario({ scenarioName: "existing" })];
+        const raw = JSON.stringify([{
+                scenarioName: "new-orders-flow",
+                category: "business_rule",
+                steps: [VALID_STEP],
+            }]);
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, raw);
+        expect(rejectionNotes).toHaveLength(0);
+        expect(scenarios.find(s => s.scenarioName === "new-orders-flow")).toBeDefined();
+        expect(scenarios.find(s => s.scenarioName === "existing")).toBeDefined();
+        expect(scenarios).toHaveLength(2);
+    });
+    it("overrides a server scenario when agent provides same scenarioName", () => {
+        const server = [makeScenario({ scenarioName: "orders-flow", description: "server version" })];
+        const raw = JSON.stringify([{
+                scenarioName: "orders-flow",
+                category: "business_rule",
+                description: "agent version",
+                steps: [VALID_STEP],
+            }]);
+        const { scenarios } = mergeEnrichedScenarios(server, raw);
+        expect(scenarios).toHaveLength(1);
+        expect(scenarios[0].description).toBe("agent version");
+        expect(scenarios[0].source).toBe("agent-enriched");
+    });
+    it("normalizes method to uppercase", () => {
+        const raw = JSON.stringify([{
+                scenarioName: "uppercase-test",
+                category: "crud",
+                steps: [{ order: 1, method: "post", path: "/api/items", expectedStatusCode: 201 }],
+            }]);
+        const { scenarios } = mergeEnrichedScenarios([], raw);
+        expect(scenarios[0].steps[0].method).toBe("POST");
+    });
+    it("preserves bugCatchingTarget when provided", () => {
+        const raw = JSON.stringify([{
+                scenarioName: "formula-test",
+                category: "business_rule",
+                bugCatchingTarget: "total = price * qty",
+                steps: [VALID_STEP],
+            }]);
+        const { scenarios } = mergeEnrichedScenarios([], raw);
+        expect(scenarios[0].bugCatchingTarget).toBe("total = price * qty");
+    });
+    it("falls back to server scenarios on empty agent array", () => {
+        const server = [makeScenario({ scenarioName: "server-only" })];
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, "[]");
+        // Empty array → no agent scenarios, return server ones unchanged
+        expect(scenarios).toEqual(server);
+        expect(rejectionNotes).toHaveLength(0);
+    });
+});
+describe("mergeEnrichedScenarios — rejection cases", () => {
+    it("rejects scenario with missing scenarioName", () => {
+        const raw = JSON.stringify([{ category: "crud", steps: [VALID_STEP] }]);
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(scenarios).toHaveLength(0);
+        expect(rejectionNotes[0]).toMatch(/missing scenarioName/);
+    });
+    it("rejects scenario with missing steps array", () => {
+        const raw = JSON.stringify([{ scenarioName: "no-steps", category: "crud" }]);
+        const { rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(rejectionNotes[0]).toMatch(/missing or empty steps/);
+    });
+    it("rejects scenario with empty steps array", () => {
+        const raw = JSON.stringify([{ scenarioName: "empty-steps", category: "crud", steps: [] }]);
+        const { rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(rejectionNotes[0]).toMatch(/missing or empty steps/);
+    });
+    it("rejects scenario with missing category", () => {
+        const raw = JSON.stringify([{ scenarioName: "no-cat", steps: [VALID_STEP] }]);
+        const { rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(rejectionNotes[0]).toMatch(/missing category/);
+    });
+    it("rejects scenario with unknown category", () => {
+        const raw = JSON.stringify([{ scenarioName: "bad-cat", category: "not_a_real_category", steps: [VALID_STEP] }]);
+        const { rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(rejectionNotes[0]).toMatch(/unknown category/);
+    });
+    it("falls back to server scenarios on invalid JSON", () => {
+        const server = [makeScenario()];
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, "{ bad json");
+        expect(scenarios).toEqual(server);
+        expect(rejectionNotes[0]).toMatch(/invalid JSON/);
+    });
+    it("falls back to server scenarios when JSON is not an array", () => {
+        const server = [makeScenario()];
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, JSON.stringify({ not: "array" }));
+        expect(scenarios).toEqual(server);
+        expect(rejectionNotes[0]).toMatch(/expected a JSON array/);
+    });
+    it("accepts valid scenarios and rejects invalid ones in the same batch", () => {
+        const raw = JSON.stringify([
+            { scenarioName: "valid-one", category: "crud", steps: [VALID_STEP] },
+            { category: "crud", steps: [VALID_STEP] }, // missing scenarioName
+        ]);
+        const { scenarios, rejectionNotes } = mergeEnrichedScenarios([], raw);
+        expect(scenarios).toHaveLength(1);
+        expect(scenarios[0].scenarioName).toBe("valid-one");
+        expect(rejectionNotes).toHaveLength(1);
+    });
+});

package/build/prompts/test-recommendation/recommendationSections.js CHANGED Viewed

@@ -1,6 +1,53 @@
 export const MAX_TESTS_TO_GENERATE = 3;
 export const MAX_RECOMMENDATIONS = 20;
 export const MAX_CRITICAL_TESTS = 3;
+export function buildArchitectPreamble(isDiffScope) {
+    if (isDiffScope) {
+        return `You are acting as a Skyramp Integration Architect. You will receive a branch diff — changed endpoints, source code interactions, and existing tests. Your responsibility is to map test intent to the Skyramp generation spec with precision. No guessing — derive all parameters from the codebase, workspace config, and provided context only.
+Your task:
+1. **Recommend AND generate tests for this PR's changes** — prioritize tests most likely to catch real production bugs (state machine violations, cross-resource data integrity, computed field errors, security boundary bypasses) over trivial coverage
+2. **Immediately generate the top-priority tests** by calling Skyramp MCP generation tools — only these types: **integration, contract, E2E, UI**. Never smoke. Never fuzz.
+3. **Avoid duplicate coverage** — if an existing test already covers an endpoint + test type, choose a different angle instead
+Every tool parameter must trace to a concrete source: repository analysis, source code, or OpenAPI spec. Do not invent field names, request shapes, base URLs, or auth values. If a required value is unknown, read the relevant source file before calling the tool.`;
+    }
+    return `You are acting as a Skyramp Integration Architect. You will receive a full repository analysis — all endpoints, source code interactions, and existing tests. Your responsibility is to map test intent to the Skyramp generation spec with precision. No guessing — derive all parameters from the codebase, workspace config, and provided context only.
+Your task:
+1. **Produce a comprehensive test recommendation catalog** grouped by test type (E2E → UI → Integration → Contract), ranked by production risk within each type
+2. **Present each recommendation with a complete, ready-to-use Skyramp tool call** — fill in all field names, endpoint URLs, request shapes, and auth parameters from source code so the catalog is immediately actionable
+3. **Do not call any generation tools** — your output is the catalog itself; it is executed on demand
+Replace every \`<…from source>\` placeholder with actual values before presenting. Do not invent values — read the source file if a value is unclear.`;
+}
+export function buildContextFetchingGuidance(sessionId) {
+    if (!sessionId)
+        return "";
+    return `<context_fetching_protocol>
+## Execution Plan Context
+Before calling any tool, replace every \`<from source>\` placeholder in the tool call parameters with actual values read from the relevant source file (handler, schema, or model). Do not proceed with placeholders still present — every parameter must trace to a concrete source.
+</context_fetching_protocol>`;
+}
+export function buildReasoningProtocol() {
+    return `<reasoning_protocol>
+## Parameter Grounding Rule
+Before each GENERATE tool call, output a brief \`<thinking>\` block stating WHERE the key values come from:
+- **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec — state the file or schema name
+- **endpointURL** → workspace \`baseUrl\` + endpoint path (both required — never path alone)
+- **authHeader / authScheme** → workspace config or OpenAPI \`securitySchemes\`
+- **FK path params** → chained from a prior step's response \`id\` field — not hardcoded
+- **Names / string values** → realistic; append timestamp suffix to avoid re-run conflicts
+## Ranking Rule
+For each GENERATE item, include one sentence in your output (before the tool calls) stating the specific bug or failure it targets — derived from \`bugCatchingTarget\` or your source-code reading. Example: "Targets: order total miscalculation — total_amount = sum(item.price × item.quantity) should recompute when items array changes."
+If \`bugCatchingTarget\` is empty for a GENERATE item, derive it from source code before including the item. A GENERATE slot without a specific bug target belongs in ADDITIONAL.
+If a value cannot be sourced, read the relevant source file before calling the tool. Do not proceed with invented values.
+</reasoning_protocol>`;
+}
 function serializeAuthCallParams(params) {
     const parts = [`authHeader: "${params.authHeader}"`];
     if (params.authScheme !== undefined) {
@@ -45,7 +92,7 @@ export function buildTestPatternGuidelines() {
 - **Race conditions**: If concurrent writes are possible (inventory deduction, counter increment), test concurrent requests
 - **Computed fields**: If response contains derived values (total, average, count), verify computation with known inputs (e.g., total_cost = compute_seconds * rate + memory_mb * rate + external_cost)
 - **Mutation with collection modification**: If PUT/PATCH endpoints accept arrays of child items (e.g., order line items, cart products, invoice entries), test adding/removing items and verify that derived totals (e.g., total_amount, subtotal, item_count) are recalculated correctly. This is the most common source of user-reported bugs — always prioritize it for GENERATE over simple field-update tests.
-    **CRITICAL**: The PATCH/PUT request body MUST include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
+    The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
 - **Webhook/event side effects**: If endpoints trigger async operations, test that side effects occur (e.g., POST /orders triggers notification)
 - **Cross-user isolation**: If resources are owned by users, test that user B cannot access/modify user A's resources (GET /users/{other_id}/data → 403 Forbidden)
 - **Range/boundary invariants**: If business rules cap values (max retries, min balance, discount ≤ subtotal), test the boundary (e.g., set retries to max+1 → expect rejection)
@@ -118,6 +165,72 @@ export function buildTestExamples() {
 - Single-resource CRUD with no cross-resource or state verification
 - POST with missing field → 422 (obvious validation, covered by contract tests)`;
 }
+export function buildVerificationChecklist(topN, maxGen) {
+    return `<verification>
+Before finalizing your output, verify:
+1. **Count**: Total recommendation count equals exactly ${topN} (${maxGen} GENERATE + ${topN - maxGen} ADDITIONAL). Not fewer.
+2. **Distinct paths**: Each GENERATE item targets a distinct code path — no two share the same HTTP method + endpoint + expected status.
+3. **Auth parameters are consistent** across all tool calls (same authHeader and authScheme).
+4. Every endpointURL includes both the base URL and the path (not just the base, e.g. \`http://host/api/v1/orders/{id}\`).
+5. **All \`<from source>\` placeholders** you received have been replaced with actual values derived from source code — no \`<...>\` remain in your output.
+6. **Real request shapes**: requestBody for POST/PUT/PATCH uses actual field names from source (not \`{}\`). GET search/filter uses \`queryParams\`, not \`requestBody\`.
+7. **scenarioFile**: \`skyramp_integration_test_generation\` uses the exact \`filePath\` returned by \`skyramp_batch_scenario_test_generation\` — not a guessed or hardcoded filename.
+8. **bugCatchingTarget**: Every GENERATE integration test that targets a business rule, formula, or constraint has a non-empty \`bugCatchingTarget\`.
+9. **FK chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
+</verification>`;
+}
+export function buildFewShotExamples() {
+    return `<examples>
+<example index="1" type="integration_recommendation">
+<thinking>
+**Parameter grounding**:
+- baseURL: "http://localhost:8000" (workspace api.baseUrl)
+- steps[0].requestBody fields "name", "price": ProductCreate schema fields (src/models/product.py)
+- steps[1].requestBody "product_id": FK to products — chained from step 0 response id
+- steps[1].requestBody "quantity": OrderCreate schema field (src/models/order.py)
+- responseBody "total_amount": 89.97 = 29.99 × 3 — from order total formula (src/services/order_service.py: total = sum(item.price * item.quantity))
+- authHeader/authScheme: workspace config (Authorization / Bearer)
+</thinking>
+**#1 — GENERATE** | integration | business_rule | new
+Scenario: orders-create-with-product-total-calc (3 steps)
+  1. POST /api/v1/products → 201: Create product with known price
+  2. POST /api/v1/orders → 201: Create order referencing product_id from step 1, quantity=3
+  3. GET /api/v1/orders/{order_id} → 200: Verify total_amount = 29.99 × 3 = 89.97
+bugCatchingTarget: "total_amount = sum(item.price × item.quantity) — wrong if multiplication is skipped or items list is ignored"
+Tool calls:
+  skyramp_batch_scenario_test_generation({ scenarioName: "orders-create-with-product-total-calc", destination: "localhost", baseURL: "http://localhost:8000", authHeader: "Authorization", authScheme: "Bearer", steps: [
+    { method: "POST", path: "/api/v1/products", statusCode: 201, requestBody: "{\"name\": \"Widget-1713000000\", \"price\": 29.99}" },
+    { method: "POST", path: "/api/v1/orders", statusCode: 201, requestBody: "{\"product_id\": \"chained\", \"quantity\": 3}" },
+    { method: "GET", path: "/api/v1/orders/{order_id}", statusCode: 200, responseBody: "{\"id\": \"chained\", \"total_amount\": 89.97, \"items\": [{\"product_id\": \"chained\", \"quantity\": 3, \"unit_price\": 29.99}]}" }
+  ] })
+  skyramp_integration_test_generation({ scenarioFile: "<filePath returned by skyramp_batch_scenario_test_generation above>" })
+Reasoning: Catches a broken total calculation before it ships — the most common source of order-related bug reports.
+</example>
+<example index="2" type="contract_recommendation">
+<thinking>
+**Parameter grounding**:
+- endpointURL: "http://localhost:8000/api/v1/products/{product_id}" (workspace baseUrl + path from endpoint listing)
+- method: "DELETE" (route definition, uppercase)
+- pathParams "product_id=<random-uuid-v4>": hardcoded non-existent ID — no setup step needed for a 404 test; use a fresh UUID v4, not all-zeros
+- authHeader/authScheme: workspace config (Authorization / Bearer)
+- No requestData — DELETE carries no body
+</thinking>
+**#2 — GENERATE** | contract | error_handling | new
+DELETE /api/v1/products/{product_id} → 404
+Tool: skyramp_contract_test_generation({ endpointURL: "http://localhost:8000/api/v1/products/{product_id}", method: "DELETE", authHeader: "Authorization", authScheme: "Bearer", pathParams: "product_id=<random-uuid-v4>" })
+Reasoning: Catches a missing 404 guard on DELETE — verifies the handler returns 404 for non-existent resources rather than a 500 or silent no-op.
+</example>
+<example index="3" type="additional_recommendation">
+#5 [ADDITIONAL] | integration | security_boundary | existing
+  Scenario: orders-unauthorized-cross-user-access (POST /api/v1/orders → GET /api/v1/orders/{order_id} as different user → 403)
+  Validates: Cross-user isolation — user B cannot read user A's orders.
+</example>
+</examples>`;
+}
 export function buildToolWorkflows(authHeaderValue, authTypeValue = "") {
     const isAuthorizationHeader = /^authorization$/i.test(authHeaderValue);
     const noAuth = !authHeaderValue;
@@ -177,6 +290,10 @@ To skip auth for unauthenticated endpoints, pass \`authHeader: ""\`.`;
         : `**Auth params:** \`${authCallParams}\` — pass to EVERY tool call below.`;
     return `## How to Generate Tests — Tool Workflows
+**Contract**: The following tool signatures are strict technical contracts. Every parameter should match the schema exactly. Omit optional parameters rather than guessing values. If a required field cannot be resolved, fetch context first.
+**Before every tool call**: Output a <thinking> block justifying the mapping of intent to endpoint to tool parameters. See Mandatory Reasoning Protocol above.
 ${authHeaderLine}
 ${authGuidance}
@@ -189,13 +306,13 @@ ${authGuidance}
    **CRITICAL — Query params vs request body:**
    - For **POST/PUT/PATCH**: use \`requestBody\` with realistic field values from source code schemas.
    - For **GET/DELETE with search/filter/pagination**: use \`queryParams\` (JSON string, e.g., \`{"q": "bear", "limit": 10}\`).
-     NEVER put query parameters in \`requestBody\` for GET requests — GET request bodies are non-standard and may be ignored or rejected.
+     Do not put query parameters in \`requestBody\` for GET requests — GET request bodies are non-standard and may be ignored or rejected.
    - For **GET by ID**: no \`requestBody\` or \`queryParams\` needed — the ID is in the path.
    \`responseBody\` should match the actual API response shape from source code (including all fields
    returned by the controller — e.g., \`id\`, \`ownerId\`, \`createdAt\`, included relations like \`collection\`, \`tags\`).
    Wrap in \`{"response": ...}\` if the API uses an envelope pattern. If omitted, a synthetic response is generated.
    Inspect the source code to determine the correct request AND response body shapes — avoid sending \`{}\`.
-   **CRITICAL for PATCH/PUT mutation-recalc scenarios:** The request body MUST include the child
+   **For PATCH/PUT mutation-recalc scenarios:** The request body should include the child
    collection array (e.g. \`"items": [{"product_id": <chained from prior POST>, "quantity": 2}]\`).
    Never send a PATCH that only modifies metadata (discount, status) without also including the
    items/products collection — such a test will not catch collection-level or total-recalculation bugs.
@@ -206,7 +323,7 @@ ${authGuidance}
    Do NOT pass \`chainingKey\` — defaults to \`response.id\`. After generation, the testbot
    will verify and fix path param chaining in the generated test.
-**For single-endpoint tests (contract/fuzz):**
+**For single-endpoint tests (contract):**
 \`skyramp_{type}_test_generation\` with \`endpointURL\` (full URL incl. base + path), \`method\`,
 \`${authCallParams}\`, and \`requestData\` from source code schemas.
 If an OpenAPI spec exists, ALSO pass \`apiSchema\` — it enables schema-aware validation

package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js CHANGED Viewed

@@ -1,11 +1,102 @@
 import { z } from "zod";
-import { StateManager, } from "../../utils/AnalysisStateManager.js";
+import { StateManager, hasSessionData, getSessionData, } from "../../utils/AnalysisStateManager.js";
 import { logger } from "../../utils/logger.js";
 import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
+import { getPersonaPrefix } from "../architectPersona.js";
+import { ScenarioSource, AnalysisScope } from "../../types/RepositoryAnalysis.js";
+import { SCENARIO_CATEGORIES } from "../../types/TestRecommendation.js";
+export function mergeEnrichedScenarios(serverScenarios, raw) {
+    const rejectionNotes = [];
+    let parsed;
+    try {
+        const result = JSON.parse(raw);
+        if (!Array.isArray(result)) {
+            return { scenarios: serverScenarios, rejectionNotes: ["enrichedScenarios: expected a JSON array, got " + typeof result] };
+        }
+        parsed = result;
+    }
+    catch {
+        logger.warning("enrichedScenarios: invalid JSON — using server-side scenarios only");
+        return { scenarios: serverScenarios, rejectionNotes: ["enrichedScenarios: invalid JSON — all scenarios skipped"] };
+    }
+    const agentScenarios = [];
+    for (const s of parsed) {
+        const name = s?.scenarioName ? String(s.scenarioName) : null;
+        const label = name ? `"${name}"` : "(unnamed)";
+        if (!name) {
+            rejectionNotes.push(`rejected ${label}: missing scenarioName`);
+            continue;
+        }
+        if (!Array.isArray(s?.steps) || s.steps.length === 0) {
+            rejectionNotes.push(`rejected ${label}: missing or empty steps array`);
+            continue;
+        }
+        if (!s?.category) {
+            rejectionNotes.push(`rejected ${label}: missing category`);
+            continue;
+        }
+        if (!SCENARIO_CATEGORIES.includes(s.category)) {
+            rejectionNotes.push(`rejected ${label}: unknown category "${s.category}" — valid: ${SCENARIO_CATEGORIES.join(", ")}`);
+            continue;
+        }
+        agentScenarios.push({
+            scenarioName: name,
+            description: s.description ?? "",
+            category: s.category,
+            priority: s.priority ?? "high",
+            bugCatchingTarget: s.bugCatchingTarget,
+            testType: s.testType,
+            steps: s.steps.map((st, idx) => ({
+                order: st.order ?? idx + 1,
+                method: String(st.method ?? "GET").toUpperCase(),
+                path: String(st.path ?? "/"),
+                description: st.description ?? `${st.method} ${st.path}`,
+                interactionType: st.interactionType ?? "success",
+                requestBody: st.requestBody,
+                queryParams: st.queryParams,
+                responseBody: st.responseBody,
+                // Default status code by method if omitted to avoid `statusCode: undefined` in tool calls
+                expectedStatusCode: st.expectedStatusCode ??
+                    (String(st.method ?? "").toUpperCase() === "POST" ? 201
+                        : String(st.method ?? "").toUpperCase() === "DELETE" ? 204
+                            : 200),
+                expectedResponseFields: st.expectedResponseFields,
+                bodyMustInclude: st.bodyMustInclude,
+                chainsFrom: st.chainsFrom,
+            })),
+            chainingKeys: s.chainingKeys ?? [],
+            requiresAuth: s.requiresAuth ?? true,
+            estimatedComplexity: s.estimatedComplexity ?? "moderate",
+            source: ScenarioSource.AgentEnriched,
+        });
+    }
+    if (agentScenarios.length === 0) {
+        return { scenarios: serverScenarios, rejectionNotes };
+    }
+    const merged = new Map(serverScenarios.map(s => [s.scenarioName, s]));
+    for (const s of agentScenarios) {
+        merged.set(s.scenarioName, s);
+    }
+    logger.info("Merged agent-enriched scenarios", {
+        server: serverScenarios.length,
+        agent: agentScenarios.length,
+        total: merged.size,
+        rejected: rejectionNotes.length,
+    });
+    return { scenarios: Array.from(merged.values()), rejectionNotes };
+}
 export function registerRecommendTestsPrompt(server) {
     server.registerPrompt("skyramp_recommend_tests", {
-        description: "Generate ranked test recommendations from a test-management analysis. " +
-            "Provide a stateFile path from skyramp_analyze_changes.",
+        description: getPersonaPrefix() +
+            "Given the repository analysis in stateFile, produce ranked test recommendations split into " +
+            "GENERATE (call generation tools immediately) and ADDITIONAL (deferred, describe only).\n\n" +
+            "**Output contract:** Every GENERATE integration test targeting a business rule or formula " +
+            "MUST include a non-empty bugCatchingTarget. Parameters for generation tools must derive " +
+            "from the repository analysis, enrichedScenarios, or source code you read — no invented " +
+            "field names, no guessed URLs.\n\n" +
+            "**Ranking:** Prioritize business_rule and security_boundary over crud. Within a category, " +
+            "prefer tests that catch specific formulas, constraints, or state transitions over generic " +
+            "happy-path coverage. Provide a stateFile path from skyramp_analyze_changes.",
         argsSchema: {
             stateFile: z
                 .string()
@@ -17,28 +108,79 @@ export function registerRecommendTestsPrompt(server) {
                 .default(10)
                 .optional()
                 .describe("Maximum number of ranked recommendations to return (default: 10)"),
+            enrichedScenarios: z
+                .string()
+                .optional()
+                .refine((val) => {
+                if (val === undefined || val === "")
+                    return true;
+                try {
+                    const parsed = JSON.parse(val);
+                    return Array.isArray(parsed);
+                }
+                catch {
+                    return false;
+                }
+            }, { message: "enrichedScenarios must be a valid JSON array string (e.g. '[{\"scenarioName\":\"...\"}]')" })
+                .describe("JSON array of agent-drafted scenarios (DraftedScenario[]). Each must have " +
+                "scenarioName, category, and steps (array with method, path, order). " +
+                "Agent scenarios override server-side ones by scenarioName and are prioritized in ranking."),
         },
     }, async (args) => {
         const stateFile = args.stateFile;
         if (!stateFile) {
             throw new Error("stateFile is required");
         }
+        // Try in-memory session store first (fullAnalysis no longer on disk).
+        // Extract sessionId from the disk state, then look up the full analysis
+        // from process memory — same pattern as analysisResources.ts.
         const mgr = StateManager.fromStatePath(stateFile);
         if (!mgr.exists()) {
             throw new Error(`State file "${stateFile}" not found. Run skyramp_analyze_changes first.`);
         }
         const fullState = await mgr.readFullState();
         const state = fullState ?? null;
-        if (!state?.repositoryAnalysis?.fullAnalysis) {
+        if (!state?.repositoryAnalysis) {
             throw new Error(`State file "${stateFile}" has no analysis data. Re-run skyramp_analyze_changes.`);
         }
-        const { fullAnalysis, sessionId, wsAuthHeader, wsAuthType } = state.repositoryAnalysis;
+        const { sessionId, wsAuthHeader, wsAuthType } = state.repositoryAnalysis;
         const repositoryPath = fullState?.metadata?.repositoryPath ?? "";
+        // Resolve fullAnalysis: memory first, disk fallback for backward compat
+        let fullAnalysis;
+        if (sessionId && hasSessionData(sessionId)) {
+            const memData = getSessionData(sessionId);
+            if (memData?.analysis) {
+                fullAnalysis = memData.analysis;
+                logger.debug("Loaded fullAnalysis from process memory", { sessionId });
+            }
+        }
+        if (!fullAnalysis) {
+            fullAnalysis = state.repositoryAnalysis.fullAnalysis;
+        }
+        if (!fullAnalysis) {
+            throw new Error(`Analysis data for session not found in memory or on disk. Re-run skyramp_analyze_changes.`);
+        }
         const analysisScope = state.analysisScope === "branch_diff"
-            ? "current_branch_diff"
-            : "full_repo";
+            ? AnalysisScope.CurrentBranchDiff
+            : AnalysisScope.FullRepo;
         const effectiveTopN = args.topN;
-        const prompt = buildRecommendationPrompt(fullAnalysis, analysisScope, effectiveTopN, undefined, wsAuthHeader, wsAuthType);
+        const enrichedRaw = args.enrichedScenarios;
+        let mergedAnalysis = fullAnalysis;
+        let rejectionWarning = "";
+        if (enrichedRaw) {
+            const { scenarios, rejectionNotes } = mergeEnrichedScenarios(fullAnalysis.businessContext.draftedScenarios ?? [], enrichedRaw);
+            mergedAnalysis = {
+                ...fullAnalysis,
+                businessContext: {
+                    ...fullAnalysis.businessContext,
+                    draftedScenarios: scenarios,
+                },
+            };
+            if (rejectionNotes.length > 0) {
+                rejectionWarning = `\n⚠️ enrichedScenarios — ${rejectionNotes.length} scenario(s) rejected and not used in recommendations:\n${rejectionNotes.map(n => `  - ${n}`).join("\n")}\nFix and re-call skyramp_recommend_tests to incorporate corrections.\n`;
+            }
+        }
+        const prompt = buildRecommendationPrompt(mergedAnalysis, analysisScope, effectiveTopN, undefined, wsAuthHeader, wsAuthType, undefined, sessionId);
         logger.info("Serving recommendation prompt via MCP Prompt", {
             stateFile,
             analysisScope,
@@ -52,7 +194,7 @@ export function registerRecommendTestsPrompt(server) {
                     role: "user",
                     content: {
                         type: "text",
-                        text: `State file: ${stateFile}\nRepository: ${repositoryPath}\nScope: ${analysisScope}\n${resourceLinks}\n${prompt}`,
+                        text: `State file: ${stateFile}\nRepository: ${repositoryPath}\nScope: ${analysisScope}\n${resourceLinks}${rejectionWarning}\n${prompt}`,
                     },
                 },
             ],