npm - @skyramp/mcp - Versions diffs - 0.0.65 → 0.1.0-rc.2 - Mend

@skyramp/mcp 0.0.65 → 0.1.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/build/prompts/test-recommendation/recommendationSections.js CHANGED Viewed

@@ -1,6 +1,53 @@
 export const MAX_TESTS_TO_GENERATE = 3;
 export const MAX_RECOMMENDATIONS = 20;
 export const MAX_CRITICAL_TESTS = 3;
+export function buildArchitectPreamble(isDiffScope) {
+    if (isDiffScope) {
+        return `You are acting as a Skyramp Integration Architect. You will receive a branch diff — changed endpoints, source code interactions, and existing tests. Your responsibility is to map test intent to the Skyramp generation spec with precision. No guessing — derive all parameters from the codebase, workspace config, and provided context only.
+Your task:
+1. **Recommend AND generate tests for this PR's changes** — prioritize tests most likely to catch real production bugs (state machine violations, cross-resource data integrity, computed field errors, security boundary bypasses) over trivial coverage
+2. **Immediately generate the top-priority tests** by calling Skyramp MCP generation tools — only these types: **integration, contract, E2E, UI**. Never smoke. Never fuzz.
+3. **Avoid duplicate coverage** — if an existing test already covers an endpoint + test type, choose a different angle instead
+Every tool parameter must trace to a concrete source: repository analysis, source code, or OpenAPI spec. Do not invent field names, request shapes, base URLs, or auth values. If a required value is unknown, read the relevant source file before calling the tool.`;
+    }
+    return `You are acting as a Skyramp Integration Architect. You will receive a full repository analysis — all endpoints, source code interactions, and existing tests. Your responsibility is to map test intent to the Skyramp generation spec with precision. No guessing — derive all parameters from the codebase, workspace config, and provided context only.
+Your task:
+1. **Produce a comprehensive test recommendation catalog** grouped by test type (E2E → UI → Integration → Contract), ranked by production risk within each type
+2. **Present each recommendation with a complete, ready-to-use Skyramp tool call** — fill in all field names, endpoint URLs, request shapes, and auth parameters from source code so the catalog is immediately actionable
+3. **Do not call any generation tools** — your output is the catalog itself; it is executed on demand
+Replace every \`<…from source>\` placeholder with actual values before presenting. Do not invent values — read the source file if a value is unclear.`;
+}
+export function buildContextFetchingGuidance(sessionId) {
+    if (!sessionId)
+        return "";
+    return `<context_fetching_protocol>
+## Execution Plan Context
+Before calling any tool, replace every \`<from source>\` placeholder in the tool call parameters with actual values read from the relevant source file (handler, schema, or model). Do not proceed with placeholders still present — every parameter must trace to a concrete source.
+</context_fetching_protocol>`;
+}
+export function buildReasoningProtocol() {
+    return `<reasoning_protocol>
+## Parameter Grounding Rule
+Before each GENERATE tool call, output a brief \`<thinking>\` block stating WHERE the key values come from:
+- **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec — state the file or schema name
+- **endpointURL** → workspace \`baseUrl\` + endpoint path (both required — never path alone)
+- **authHeader / authScheme** → workspace config or OpenAPI \`securitySchemes\`
+- **FK path params** → chained from a prior step's response \`id\` field — not hardcoded
+- **Names / string values** → realistic; append timestamp suffix to avoid re-run conflicts
+## Ranking Rule
+For each GENERATE item, include one sentence in your output (before the tool calls) stating the specific bug or failure it targets — derived from \`bugCatchingTarget\` or your source-code reading. Example: "Targets: order total miscalculation — total_amount = sum(item.price × item.quantity) should recompute when items array changes."
+If \`bugCatchingTarget\` is empty for a GENERATE item, derive it from source code before including the item. A GENERATE slot without a specific bug target belongs in ADDITIONAL.
+If a value cannot be sourced, read the relevant source file before calling the tool. Do not proceed with invented values.
+</reasoning_protocol>`;
+}
 function serializeAuthCallParams(params) {
     const parts = [`authHeader: "${params.authHeader}"`];
     if (params.authScheme !== undefined) {
@@ -45,7 +92,7 @@ export function buildTestPatternGuidelines() {
 - **Race conditions**: If concurrent writes are possible (inventory deduction, counter increment), test concurrent requests
 - **Computed fields**: If response contains derived values (total, average, count), verify computation with known inputs (e.g., total_cost = compute_seconds * rate + memory_mb * rate + external_cost)
 - **Mutation with collection modification**: If PUT/PATCH endpoints accept arrays of child items (e.g., order line items, cart products, invoice entries), test adding/removing items and verify that derived totals (e.g., total_amount, subtotal, item_count) are recalculated correctly. This is the most common source of user-reported bugs — always prioritize it for GENERATE over simple field-update tests.
-    **CRITICAL**: The PATCH/PUT request body MUST include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
+    The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
 - **Webhook/event side effects**: If endpoints trigger async operations, test that side effects occur (e.g., POST /orders triggers notification)
 - **Cross-user isolation**: If resources are owned by users, test that user B cannot access/modify user A's resources (GET /users/{other_id}/data → 403 Forbidden)
 - **Range/boundary invariants**: If business rules cap values (max retries, min balance, discount ≤ subtotal), test the boundary (e.g., set retries to max+1 → expect rejection)
@@ -118,6 +165,72 @@ export function buildTestExamples() {
 - Single-resource CRUD with no cross-resource or state verification
 - POST with missing field → 422 (obvious validation, covered by contract tests)`;
 }
+export function buildVerificationChecklist(topN, maxGen) {
+    return `<verification>
+Before finalizing your output, verify:
+1. **Count**: Total recommendation count equals exactly ${topN} (${maxGen} GENERATE + ${topN - maxGen} ADDITIONAL). Not fewer.
+2. **Distinct paths**: Each GENERATE item targets a distinct code path — no two share the same HTTP method + endpoint + expected status.
+3. **Auth parameters are consistent** across all tool calls (same authHeader and authScheme).
+4. Every endpointURL includes both the base URL and the path (not just the base, e.g. \`http://host/api/v1/orders/{id}\`).
+5. **All \`<from source>\` placeholders** you received have been replaced with actual values derived from source code — no \`<...>\` remain in your output.
+6. **Real request shapes**: requestBody for POST/PUT/PATCH uses actual field names from source (not \`{}\`). GET search/filter uses \`queryParams\`, not \`requestBody\`.
+7. **scenarioFile**: \`skyramp_integration_test_generation\` uses the exact \`filePath\` returned by \`skyramp_batch_scenario_test_generation\` — not a guessed or hardcoded filename.
+8. **bugCatchingTarget**: Every GENERATE integration test that targets a business rule, formula, or constraint has a non-empty \`bugCatchingTarget\`.
+9. **FK chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
+</verification>`;
+}
+export function buildFewShotExamples() {
+    return `<examples>
+<example index="1" type="integration_recommendation">
+<thinking>
+**Parameter grounding**:
+- baseURL: "http://localhost:8000" (workspace api.baseUrl)
+- steps[0].requestBody fields "name", "price": ProductCreate schema fields (src/models/product.py)
+- steps[1].requestBody "product_id": FK to products — chained from step 0 response id
+- steps[1].requestBody "quantity": OrderCreate schema field (src/models/order.py)
+- responseBody "total_amount": 89.97 = 29.99 × 3 — from order total formula (src/services/order_service.py: total = sum(item.price * item.quantity))
+- authHeader/authScheme: workspace config (Authorization / Bearer)
+</thinking>
+**#1 — GENERATE** | integration | business_rule | new
+Scenario: orders-create-with-product-total-calc (3 steps)
+  1. POST /api/v1/products → 201: Create product with known price
+  2. POST /api/v1/orders → 201: Create order referencing product_id from step 1, quantity=3
+  3. GET /api/v1/orders/{order_id} → 200: Verify total_amount = 29.99 × 3 = 89.97
+bugCatchingTarget: "total_amount = sum(item.price × item.quantity) — wrong if multiplication is skipped or items list is ignored"
+Tool calls:
+  skyramp_batch_scenario_test_generation({ scenarioName: "orders-create-with-product-total-calc", destination: "localhost", baseURL: "http://localhost:8000", authHeader: "Authorization", authScheme: "Bearer", steps: [
+    { method: "POST", path: "/api/v1/products", statusCode: 201, requestBody: "{\"name\": \"Widget-1713000000\", \"price\": 29.99}" },
+    { method: "POST", path: "/api/v1/orders", statusCode: 201, requestBody: "{\"product_id\": \"chained\", \"quantity\": 3}" },
+    { method: "GET", path: "/api/v1/orders/{order_id}", statusCode: 200, responseBody: "{\"id\": \"chained\", \"total_amount\": 89.97, \"items\": [{\"product_id\": \"chained\", \"quantity\": 3, \"unit_price\": 29.99}]}" }
+  ] })
+  skyramp_integration_test_generation({ scenarioFile: "<filePath returned by skyramp_batch_scenario_test_generation above>" })
+Reasoning: Catches a broken total calculation before it ships — the most common source of order-related bug reports.
+</example>
+<example index="2" type="contract_recommendation">
+<thinking>
+**Parameter grounding**:
+- endpointURL: "http://localhost:8000/api/v1/products/{product_id}" (workspace baseUrl + path from endpoint listing)
+- method: "DELETE" (route definition, uppercase)
+- pathParams "product_id=<random-uuid-v4>": hardcoded non-existent ID — no setup step needed for a 404 test; use a fresh UUID v4, not all-zeros
+- authHeader/authScheme: workspace config (Authorization / Bearer)
+- No requestData — DELETE carries no body
+</thinking>
+**#2 — GENERATE** | contract | error_handling | new
+DELETE /api/v1/products/{product_id} → 404
+Tool: skyramp_contract_test_generation({ endpointURL: "http://localhost:8000/api/v1/products/{product_id}", method: "DELETE", authHeader: "Authorization", authScheme: "Bearer", pathParams: "product_id=<random-uuid-v4>" })
+Reasoning: Catches a missing 404 guard on DELETE — verifies the handler returns 404 for non-existent resources rather than a 500 or silent no-op.
+</example>
+<example index="3" type="additional_recommendation">
+#5 [ADDITIONAL] | integration | security_boundary | existing
+  Scenario: orders-unauthorized-cross-user-access (POST /api/v1/orders → GET /api/v1/orders/{order_id} as different user → 403)
+  Validates: Cross-user isolation — user B cannot read user A's orders.
+</example>
+</examples>`;
+}
 export function buildToolWorkflows(authHeaderValue, authTypeValue = "") {
     const isAuthorizationHeader = /^authorization$/i.test(authHeaderValue);
     const noAuth = !authHeaderValue;
@@ -177,6 +290,10 @@ To skip auth for unauthenticated endpoints, pass \`authHeader: ""\`.`;
         : `**Auth params:** \`${authCallParams}\` — pass to EVERY tool call below.`;
     return `## How to Generate Tests — Tool Workflows
+**Contract**: The following tool signatures are strict technical contracts. Every parameter should match the schema exactly. Omit optional parameters rather than guessing values. If a required field cannot be resolved, fetch context first.
+**Before every tool call**: Output a <thinking> block justifying the mapping of intent to endpoint to tool parameters. See Mandatory Reasoning Protocol above.
 ${authHeaderLine}
 ${authGuidance}
@@ -189,13 +306,13 @@ ${authGuidance}
    **CRITICAL — Query params vs request body:**
    - For **POST/PUT/PATCH**: use \`requestBody\` with realistic field values from source code schemas.
    - For **GET/DELETE with search/filter/pagination**: use \`queryParams\` (JSON string, e.g., \`{"q": "bear", "limit": 10}\`).
-     NEVER put query parameters in \`requestBody\` for GET requests — GET request bodies are non-standard and may be ignored or rejected.
+     Do not put query parameters in \`requestBody\` for GET requests — GET request bodies are non-standard and may be ignored or rejected.
    - For **GET by ID**: no \`requestBody\` or \`queryParams\` needed — the ID is in the path.
    \`responseBody\` should match the actual API response shape from source code (including all fields
    returned by the controller — e.g., \`id\`, \`ownerId\`, \`createdAt\`, included relations like \`collection\`, \`tags\`).
    Wrap in \`{"response": ...}\` if the API uses an envelope pattern. If omitted, a synthetic response is generated.
    Inspect the source code to determine the correct request AND response body shapes — avoid sending \`{}\`.
-   **CRITICAL for PATCH/PUT mutation-recalc scenarios:** The request body MUST include the child
+   **For PATCH/PUT mutation-recalc scenarios:** The request body should include the child
    collection array (e.g. \`"items": [{"product_id": <chained from prior POST>, "quantity": 2}]\`).
    Never send a PATCH that only modifies metadata (discount, status) without also including the
    items/products collection — such a test will not catch collection-level or total-recalculation bugs.
@@ -206,7 +323,7 @@ ${authGuidance}
    Do NOT pass \`chainingKey\` — defaults to \`response.id\`. After generation, the testbot
    will verify and fix path param chaining in the generated test.
-**For single-endpoint tests (contract/fuzz):**
+**For single-endpoint tests (contract):**
 \`skyramp_{type}_test_generation\` with \`endpointURL\` (full URL incl. base + path), \`method\`,
 \`${authCallParams}\`, and \`requestData\` from source code schemas.
 If an OpenAPI spec exists, ALSO pass \`apiSchema\` — it enables schema-aware validation

package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js CHANGED Viewed

@@ -1,11 +1,102 @@
 import { z } from "zod";
-import { StateManager, } from "../../utils/AnalysisStateManager.js";
+import { StateManager, hasSessionData, getSessionData, } from "../../utils/AnalysisStateManager.js";
 import { logger } from "../../utils/logger.js";
 import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
+import { getPersonaPrefix } from "../architectPersona.js";
+import { ScenarioSource, AnalysisScope } from "../../types/RepositoryAnalysis.js";
+import { SCENARIO_CATEGORIES } from "../../types/TestRecommendation.js";
+export function mergeEnrichedScenarios(serverScenarios, raw) {
+    const rejectionNotes = [];
+    let parsed;
+    try {
+        const result = JSON.parse(raw);
+        if (!Array.isArray(result)) {
+            return { scenarios: serverScenarios, rejectionNotes: ["enrichedScenarios: expected a JSON array, got " + typeof result] };
+        }
+        parsed = result;
+    }
+    catch {
+        logger.warning("enrichedScenarios: invalid JSON — using server-side scenarios only");
+        return { scenarios: serverScenarios, rejectionNotes: ["enrichedScenarios: invalid JSON — all scenarios skipped"] };
+    }
+    const agentScenarios = [];
+    for (const s of parsed) {
+        const name = s?.scenarioName ? String(s.scenarioName) : null;
+        const label = name ? `"${name}"` : "(unnamed)";
+        if (!name) {
+            rejectionNotes.push(`rejected ${label}: missing scenarioName`);
+            continue;
+        }
+        if (!Array.isArray(s?.steps) || s.steps.length === 0) {
+            rejectionNotes.push(`rejected ${label}: missing or empty steps array`);
+            continue;
+        }
+        if (!s?.category) {
+            rejectionNotes.push(`rejected ${label}: missing category`);
+            continue;
+        }
+        if (!SCENARIO_CATEGORIES.includes(s.category)) {
+            rejectionNotes.push(`rejected ${label}: unknown category "${s.category}" — valid: ${SCENARIO_CATEGORIES.join(", ")}`);
+            continue;
+        }
+        agentScenarios.push({
+            scenarioName: name,
+            description: s.description ?? "",
+            category: s.category,
+            priority: s.priority ?? "high",
+            bugCatchingTarget: s.bugCatchingTarget,
+            testType: s.testType,
+            steps: s.steps.map((st, idx) => ({
+                order: st.order ?? idx + 1,
+                method: String(st.method ?? "GET").toUpperCase(),
+                path: String(st.path ?? "/"),
+                description: st.description ?? `${st.method} ${st.path}`,
+                interactionType: st.interactionType ?? "success",
+                requestBody: st.requestBody,
+                queryParams: st.queryParams,
+                responseBody: st.responseBody,
+                // Default status code by method if omitted to avoid `statusCode: undefined` in tool calls
+                expectedStatusCode: st.expectedStatusCode ??
+                    (String(st.method ?? "").toUpperCase() === "POST" ? 201
+                        : String(st.method ?? "").toUpperCase() === "DELETE" ? 204
+                            : 200),
+                expectedResponseFields: st.expectedResponseFields,
+                bodyMustInclude: st.bodyMustInclude,
+                chainsFrom: st.chainsFrom,
+            })),
+            chainingKeys: s.chainingKeys ?? [],
+            requiresAuth: s.requiresAuth ?? true,
+            estimatedComplexity: s.estimatedComplexity ?? "moderate",
+            source: ScenarioSource.AgentEnriched,
+        });
+    }
+    if (agentScenarios.length === 0) {
+        return { scenarios: serverScenarios, rejectionNotes };
+    }
+    const merged = new Map(serverScenarios.map(s => [s.scenarioName, s]));
+    for (const s of agentScenarios) {
+        merged.set(s.scenarioName, s);
+    }
+    logger.info("Merged agent-enriched scenarios", {
+        server: serverScenarios.length,
+        agent: agentScenarios.length,
+        total: merged.size,
+        rejected: rejectionNotes.length,
+    });
+    return { scenarios: Array.from(merged.values()), rejectionNotes };
+}
 export function registerRecommendTestsPrompt(server) {
     server.registerPrompt("skyramp_recommend_tests", {
-        description: "Generate ranked test recommendations from a test-management analysis. " +
-            "Provide a stateFile path from skyramp_analyze_changes.",
+        description: getPersonaPrefix() +
+            "Given the repository analysis in stateFile, produce ranked test recommendations split into " +
+            "GENERATE (call generation tools immediately) and ADDITIONAL (deferred, describe only).\n\n" +
+            "**Output contract:** Every GENERATE integration test targeting a business rule or formula " +
+            "MUST include a non-empty bugCatchingTarget. Parameters for generation tools must derive " +
+            "from the repository analysis, enrichedScenarios, or source code you read — no invented " +
+            "field names, no guessed URLs.\n\n" +
+            "**Ranking:** Prioritize business_rule and security_boundary over crud. Within a category, " +
+            "prefer tests that catch specific formulas, constraints, or state transitions over generic " +
+            "happy-path coverage. Provide a stateFile path from skyramp_analyze_changes.",
         argsSchema: {
             stateFile: z
                 .string()
@@ -17,28 +108,79 @@ export function registerRecommendTestsPrompt(server) {
                 .default(10)
                 .optional()
                 .describe("Maximum number of ranked recommendations to return (default: 10)"),
+            enrichedScenarios: z
+                .string()
+                .optional()
+                .refine((val) => {
+                if (val === undefined || val === "")
+                    return true;
+                try {
+                    const parsed = JSON.parse(val);
+                    return Array.isArray(parsed);
+                }
+                catch {
+                    return false;
+                }
+            }, { message: "enrichedScenarios must be a valid JSON array string (e.g. '[{\"scenarioName\":\"...\"}]')" })
+                .describe("JSON array of agent-drafted scenarios (DraftedScenario[]). Each must have " +
+                "scenarioName, category, and steps (array with method, path, order). " +
+                "Agent scenarios override server-side ones by scenarioName and are prioritized in ranking."),
         },
     }, async (args) => {
         const stateFile = args.stateFile;
         if (!stateFile) {
             throw new Error("stateFile is required");
         }
+        // Try in-memory session store first (fullAnalysis no longer on disk).
+        // Extract sessionId from the disk state, then look up the full analysis
+        // from process memory — same pattern as analysisResources.ts.
         const mgr = StateManager.fromStatePath(stateFile);
         if (!mgr.exists()) {
             throw new Error(`State file "${stateFile}" not found. Run skyramp_analyze_changes first.`);
         }
         const fullState = await mgr.readFullState();
         const state = fullState ?? null;
-        if (!state?.repositoryAnalysis?.fullAnalysis) {
+        if (!state?.repositoryAnalysis) {
             throw new Error(`State file "${stateFile}" has no analysis data. Re-run skyramp_analyze_changes.`);
         }
-        const { fullAnalysis, sessionId, wsAuthHeader, wsAuthType } = state.repositoryAnalysis;
+        const { sessionId, wsAuthHeader, wsAuthType } = state.repositoryAnalysis;
         const repositoryPath = fullState?.metadata?.repositoryPath ?? "";
+        // Resolve fullAnalysis: memory first, disk fallback for backward compat
+        let fullAnalysis;
+        if (sessionId && hasSessionData(sessionId)) {
+            const memData = getSessionData(sessionId);
+            if (memData?.analysis) {
+                fullAnalysis = memData.analysis;
+                logger.debug("Loaded fullAnalysis from process memory", { sessionId });
+            }
+        }
+        if (!fullAnalysis) {
+            fullAnalysis = state.repositoryAnalysis.fullAnalysis;
+        }
+        if (!fullAnalysis) {
+            throw new Error(`Analysis data for session not found in memory or on disk. Re-run skyramp_analyze_changes.`);
+        }
         const analysisScope = state.analysisScope === "branch_diff"
-            ? "current_branch_diff"
-            : "full_repo";
+            ? AnalysisScope.CurrentBranchDiff
+            : AnalysisScope.FullRepo;
         const effectiveTopN = args.topN;
-        const prompt = buildRecommendationPrompt(fullAnalysis, analysisScope, effectiveTopN, undefined, wsAuthHeader, wsAuthType);
+        const enrichedRaw = args.enrichedScenarios;
+        let mergedAnalysis = fullAnalysis;
+        let rejectionWarning = "";
+        if (enrichedRaw) {
+            const { scenarios, rejectionNotes } = mergeEnrichedScenarios(fullAnalysis.businessContext.draftedScenarios ?? [], enrichedRaw);
+            mergedAnalysis = {
+                ...fullAnalysis,
+                businessContext: {
+                    ...fullAnalysis.businessContext,
+                    draftedScenarios: scenarios,
+                },
+            };
+            if (rejectionNotes.length > 0) {
+                rejectionWarning = `\n⚠️ enrichedScenarios — ${rejectionNotes.length} scenario(s) rejected and not used in recommendations:\n${rejectionNotes.map(n => `  - ${n}`).join("\n")}\nFix and re-call skyramp_recommend_tests to incorporate corrections.\n`;
+            }
+        }
+        const prompt = buildRecommendationPrompt(mergedAnalysis, analysisScope, effectiveTopN, undefined, wsAuthHeader, wsAuthType, undefined, sessionId);
         logger.info("Serving recommendation prompt via MCP Prompt", {
             stateFile,
             analysisScope,
@@ -52,7 +194,7 @@ export function registerRecommendTestsPrompt(server) {
                     role: "user",
                     content: {
                         type: "text",
-                        text: `State file: ${stateFile}\nRepository: ${repositoryPath}\nScope: ${analysisScope}\n${resourceLinks}\n${prompt}`,
+                        text: `State file: ${stateFile}\nRepository: ${repositoryPath}\nScope: ${analysisScope}\n${resourceLinks}${rejectionWarning}\n${prompt}`,
                     },
                 },
             ],