npm - @skyramp/mcp - Versions diffs - 0.1.4 → 0.1.6 - Mend

@skyramp/mcp 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/build/index.js CHANGED Viewed

@@ -35,6 +35,7 @@ import { registerAnalysisResources } from "./resources/analysisResources.js";
 import { registerProgressResource } from "./resources/progressResource.js";
 import { AnalyticsService } from "./services/AnalyticsService.js";
 import { registerInitTriggerOnMCPInitialized } from "./utils/initAgent.js";
+import { isTestbotEnabled } from "./utils/featureFlags.js";
 import { registerPlaywrightTools, registerTraceRecordingPrompt, getPlaywrightTraceService, } from "./playwright/index.js";
 const oneClickEnabled = process.env.SKYRAMP_FEATURE_ONE_CLICK === "1";
 const oneClickInstructions = oneClickEnabled
@@ -95,8 +96,8 @@ After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use
 Before calling ANY test generation tool, you MUST follow this flow:
 1. **Read** the .skyramp/workspace.yml file to get the configured defaults.
-2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the services section.
-3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
+2. **Extract** the \`language\`, \`framework\`, \`testDirectory\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the matching service in the services section.
+3. **Use those values** as defaults for the test generation tool call. Pass the service \`testDirectory\` as the generation tool \`outputDir\`. Do NOT ask the user for these values if they are already configured in the workspace file.
 4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
 5. **CRITICAL — scenario generation**: When calling \`skyramp_batch_scenario_test_generation\`, ALWAYS pass:
    - \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
@@ -107,7 +108,7 @@ Before calling ANY test generation tool, you MUST follow this flow:
 6. **CRITICAL — integration test from scenario**: When calling \`skyramp_integration_test_generation\` with a \`scenarioFile\`:
    - If workspace has \`api.authType\` set: omit auth params entirely — passing auth here alongside workspace \`authType\` causes "${AUTH_CONFLICT_ERROR_MSG}".
    - If workspace has no \`api.authType\`: pass \`authHeader\` only (no \`authScheme\`).
-7. **If the workspace file does not exist**, or the needed values (language, framework, outputDir) are missing from the workspace config, ASK the user which language and framework they want before calling the tool.
+7. **If the workspace file does not exist**, or the needed values (language, framework, testDirectory) are missing from the workspace config, ASK the user which language, framework, and outputDir they want before calling the tool.
 8. The user can always override workspace defaults by explicitly specifying values in their request.
 `,
 });
@@ -118,7 +119,7 @@ const prompts = [
     registerRecommendTestsPrompt,
     registerTraceRecordingPrompt,
 ];
-if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
+if (isTestbotEnabled()) {
     prompts.push(registerTestbotPrompt);
     registerTestbotResource(server);
     logger.info("TestBot prompt enabled via SKYRAMP_FEATURE_TESTBOT");
@@ -169,7 +170,7 @@ const infrastructureTools = [
     registerTraceTool,
     registerTraceStopTool,
 ];
-if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
+if (isTestbotEnabled()) {
     infrastructureTools.push(registerSubmitReportTool);
     logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
 }

package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js CHANGED Viewed

@@ -77,11 +77,15 @@ Create one service entry per deployable unit. You MUST include:
 - \`framework\` — \`playwright\` | \`pytest\` | \`robot\` | \`junit\`
   Detect from: pytest.ini/playwright.config/jest.config/junit in pom.xml
   MUST match the language: python → pytest or robot | typescript/javascript → playwright | java → junit
-- \`testDirectory\` — path relative to repo root where generated tests will be placed. **MUST match the test framework's configured test directory**:
-  - **Playwright**: Read \`playwright.config.ts\` (or \`.js\`/\`.mjs\`) and extract the \`testDir\` value. If no \`testDir\` is specified, common defaults: "tests/", "test/".
-  - **pytest**: Read \`pytest.ini\`, \`pyproject.toml [tool.pytest.ini_options]\`, or \`setup.cfg [tool:pytest]\` for \`testpaths\`. Common defaults: "tests/", "test/".
-  - **JUnit**: Usually "src/test/java" — check \`pom.xml\` or \`build.gradle\` for custom test source directories.
-  ⚠️ **CRITICAL**: If the framework config specifies a test directory, you MUST use that exact path
+- testDirectory — stable path relative to repo root where generated tests for this service will be placed.
+  - For each service, use the test directory configured by that service's test framework when one is discoverable:
+    - Playwright: Read playwright.config.ts (or .js/.mjs) and extract the testDir value.
+    - pytest: Read pytest.ini, pyproject.toml [tool.pytest.ini_options], or setup.cfg [tool:pytest] for testpaths.
+    - JUnit: Usually src/test/java — check pom.xml or build.gradle for custom test source directories.
+  - If no framework-configured test directory is available, use the Skyramp deterministic fallback:
+    - Single generated-test service: set testDirectory to tests/.
+    - Multiple generated-test services: set testDirectory to tests/<serviceName>, where <serviceName> is the exact serviceName with path separators and whitespace replaced by -.
+  Framework config precedence: If framework config specifies a test directory, use that exact path. Use the Skyramp deterministic fallback only when no framework-configured test directory is available.
 **API fields:**
 - \`api.schemaPath\` — path or URL to OpenAPI/Protobuf/GraphQL schema
@@ -154,12 +158,12 @@ Create one service entry per deployable unit. You MUST include:
 Before calling \`skyramp_init_workspace\`, confirm all of the following:
 - ALWAYS SCAN REPO AND FIND SERVICES. A REPO SHOULD HAVE AT LEAST ONE SERVICE.
-- **CRITICAL**: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
+- CRITICAL: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
 - Services NOT in docker-compose.yml (e.g. a frontend run with pnpm/npm locally) MUST still be included with runtime "local".
 - Every service has \`api.baseUrl\` set to a valid, discoverable URL — localhost for local services, or the actual deployment URL for cloud/external services. Never fabricate a URL.
 - Every service with \`authType: apiKey\` has \`authHeader\` explicitly set to the actual custom header name (e.g. \`"X-API-Key"\`, \`"X-Admin-Key"\`). If you cannot find the header name in the source code, env vars, or README, do NOT use \`authType: apiKey\` — use \`authType: none\` and add a YAML comment explaining auth is unresolved.
 - \`framework\` matches \`language\` (python → pytest/robot | typescript/javascript → playwright | java → junit)
-- \`testDirectory\` matches the framework's config file (Playwright: \`testDir\` in playwright.config.ts | pytest: \`testpaths\` in pytest.ini/pyproject.toml | JUnit: test source dir in pom.xml/build.gradle). If no config file is found, use the common defaults: "tests/", "test/".
+- \`testDirectory\` follows the stable resolution rules above: framework config file when present (Playwright: \`testDir\` in playwright.config.ts | pytest: \`testpaths\` in pytest.ini/pyproject.toml | JUnit: test source dir in pom.xml/build.gradle); otherwise the deterministic default (\`tests/\` for a single service, \`tests/<serviceName>\` for multiple services).
 - \`serverStartCommand\` matches \`runtime\`
 - For services in docker-compose.yml: runtime MUST be "docker" and command MUST be a docker command (e.g. "docker compose up -d <service-name>").
 - NEVER use application-level commands (uvicorn, npm, node, python, java, etc.) with runtime "docker".

package/build/prompts/personas.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { isTestbotEnabled } from "../utils/featureFlags.js";
 /**
  * Skyramp personas injected into tool descriptions and prompts.
  *
@@ -19,5 +20,5 @@ export const SKYRAMP_QA_PERSONA = `You are acting as a Skyramp QA Automation Eng
  * avoid duplicating it in every tool description.
  */
 export function getPersonaPrefix() {
-    return process.env.SKYRAMP_FEATURE_TESTBOT ? '' : `${SKYRAMP_QA_PERSONA}\n\n`;
+    return isTestbotEnabled() ? '' : `${SKYRAMP_QA_PERSONA}\n\n`;
 }

package/build/prompts/test-maintenance/drift-analysis-prompt.js CHANGED Viewed

@@ -74,8 +74,9 @@ ${candidateFilesSection}`;
     if (inlineMode) {
         // Testbot inline mode: all maintenance logic lives here so the testbot
         // prompt only orchestrates steps without duplicating rules.
+        // No persona statement here — the outer testbot prompt already establishes
+        // the agent's context; a nested identity statement causes role confusion.
         return `<drift_analysis_rules>
-You are acting as a Skyramp Integration Architect.
 For this maintenance step: assess each existing test against the diff returned by \`skyramp_analyze_changes\` and apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) directly — no separate analysis step.
 ${buildActionDecisionMatrix()}

package/build/prompts/test-maintenance/drift-analysis-prompt.test.js CHANGED Viewed

@@ -1,4 +1,32 @@
 import { buildDriftAnalysisPrompt } from "./drift-analysis-prompt.js";
+describe("buildDriftAnalysisPrompt - inline mode (no stateFile)", () => {
+    function inlinePrompt() {
+        return buildDriftAnalysisPrompt({
+            existingTests: [],
+            scannedEndpoints: [],
+            repositoryPath: "/repo",
+            // stateFile omitted → inline mode
+        });
+    }
+    it("wraps inline rules in drift_analysis_rules XML tags", () => {
+        const prompt = inlinePrompt();
+        expect(prompt).toContain("<drift_analysis_rules>");
+        expect(prompt).toContain("</drift_analysis_rules>");
+    });
+    it("does not contain the persona statement", () => {
+        const prompt = inlinePrompt();
+        expect(prompt).not.toContain("You are acting as a Skyramp Integration Architect");
+    });
+    it("does not contain the standalone Test Health Analysis header", () => {
+        const prompt = inlinePrompt();
+        expect(prompt).not.toContain("# Test Health Analysis");
+    });
+    it("does not contain the skyramp_actions CTA (that belongs to standalone mode)", () => {
+        const prompt = inlinePrompt();
+        // Inline mode final step directs applying changes directly, not calling skyramp_actions
+        expect(prompt).not.toContain("call `skyramp_actions`");
+    });
+});
 describe("buildDriftAnalysisPrompt - scanned endpoints rendering", () => {
     // Reproduces the [object Object] bug: skeletonEndpoints from analyzeChangesTool
     // stores methods as objects { method: string, ... }, not plain strings.

package/build/prompts/test-maintenance/driftAnalysisSections.js CHANGED Viewed

@@ -143,8 +143,8 @@ When a diff adds a new HTTP method to a resource, UPDATE covers **all** existing
 ### PATCH/PUT with child collections (MANDATORY)
 When updating a contract or integration test for a PATCH or PUT endpoint whose request/response includes a child collection array (e.g. \`items\`, \`products\`, \`line_items\`):
-1. The request body MUST include the child array with at least one item containing the Foreign Key field (e.g. \`product_id\`) and a \`quantity\` field.
-2. Assert each item's Foreign Key field and \`quantity\` match the sent values.
+1. The request body MUST include the child array with at least one item containing the FK field (e.g. \`product_id\`) and a \`quantity\` field.
+2. Assert each item's FK field and \`quantity\` match the sent values.
 3. Assert the top-level computed total (e.g. \`total_amount\`) equals the expected math from the items.
 A test that only sends/asserts metadata (discount, status, notes) without asserting the items array is INCOMPLETE and will produce false passes even when the items/total logic is broken.

package/build/prompts/test-recommendation/analysisOutputPrompt.js CHANGED Viewed

@@ -12,12 +12,22 @@ const FRONTEND_EXT = /\.(tsx?|jsx?|vue|svelte|css|scss|less|html|svg)$/i;
  * Returned as an empty string when no router context is available.
  */
 function buildPathResolutionTableStep(p) {
-    if (!p.routerMountContext.length || p.wsSchemaPath)
-        return "";
-    return `### Step 1.5: Build path resolution table
-The **Routing entry-point files** section above lists the files to read.
-**Read each of those files** and trace every router mount call to understand nesting — the pattern varies by framework but the structure is universal: a parent attaches a child router with an optional extra prefix segment. If a prefix is a variable (e.g. \`prefix=api_prefix\`), resolve the variable's value by reading the assignment or the config/settings file it comes from. Examples of what to look for (non-exhaustive):
+    // Case A: spec was fetched successfully — instruct LLM to validate paths against it
+    if (p.wsSchemaPath && p.specFetchSucceeded) {
+        return `### Step 1.5: Validate all endpoint paths against the OpenAPI spec
+Fetch \`${p.wsSchemaPath}\` and extract all keys from \`spec.paths\`.
+**Before placing any path in a tool call**, confirm it exists in that list.
+If a path is NOT in the spec **and it did not come from the PR diff**, find the correct spelling by matching resource name — do NOT use it unverified.
+Paths the PR explicitly added or modified may not yet appear in the spec (spec lag) — treat those as valid.
+`;
+    }
+    // Case B: no spec (or spec unreachable) but router mount context available
+    if (p.routerMountContext.length) {
+        const hasInlined = (p.routerFileContents?.length ?? 0) > 0;
+        return `### Step 1.5: Build path resolution table
+${hasInlined
+            ? "The **Routing entry-point files** section above contains the inlined file contents — use them directly to trace every router mount call"
+            : "The **Routing entry-point files** section above lists the files to read.\n\n**Read each of those files** and trace every router mount call"} to understand nesting — the pattern varies by framework but the structure is universal: a parent attaches a child router with an optional extra prefix segment. If a prefix is a variable (e.g. \`prefix=api_prefix\`), resolve the variable's value by reading the assignment or the config/settings file it comes from. Examples of what to look for (non-exhaustive):
 - Python (FastAPI/Flask): \`parent.include_router(child, prefix="...")\`, \`app.register_blueprint(...)\`
 - JS/TS (Express/Fastify/Hapi): \`app.use('/path', childRouter)\`, \`router.use('/path', sub)\`
 - NestJS: \`@Module({ imports: [FeatureModule] })\` — trace the module import chain; each \`@Controller('prefix')\` contributes a segment
@@ -33,6 +43,20 @@ Chain all segments from the app root down through every intermediate mount to ea
 **This table is authoritative.** Before placing any URL in a tool call, look up the source file. If the pre-built catalog shows a different path, use the table value.
+`;
+    }
+    // Case C: no spec AND no router context — source-verify fallback
+    // Note: also fires when a spec was configured (wsSchemaPath set) but could not be
+    // fetched at analysis time (specFetchSucceeded = false). When that happens the LLM
+    // should know a spec was expected so it can be extra-skeptical about path correctness.
+    const specFailedNote = p.wsSchemaPath && !p.specFetchSucceeded
+        ? `\n> ⚠️ A spec was configured (\`${p.wsSchemaPath}\`) but could not be loaded at analysis time — treat all paths as unverified until confirmed against source.`
+        : "";
+    return `### Step 1.5: Verify endpoint paths from source files
+The endpoint catalog below was produced by static regex analysis and is **unverified**.
+Before using any path in a tool call, read the route definition file identified in the "Source" column and confirm the path string exactly.
+Pay special attention to mount prefixes — a router at \`/api/v1\` + route \`/version\` → path is \`/api/v1/version\`, not \`/api/server-version\`.
+${specFailedNote}
 `;
 }
 // Inline note added to any step where the LLM reads Java source files. Java Spring
@@ -52,10 +76,10 @@ The ranked test recommendation catalog is pre-built and shown below (after the s
 **Your only job is to present it.**
 1. Fill in every \`<…from source>\` placeholder using the field names, computed formulas, and auth details you found in Steps 1–2.
-2. Output the completed catalog **exactly as formatted**, preserving whatever test-type section headings are already present in the catalog. Do NOT restructure, reorder, rename sections, invent missing sections, or generate a new format.
+2. Output the completed catalog **exactly as formatted — grouped by test type (### E2E / ### UI / ### Integration / ### Contract)**. Do NOT restructure, reorder, rename sections, or generate a new format.
 3. Do NOT call any Skyramp generation tools. The catalog shows ready-to-use tool calls that can be executed on demand.
-**If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or Foreign Key relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
+**If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or FK relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
         const hasJavaFiles = p.candidateRouteFiles?.some(f => /\.(java|kt)$/.test(f)) ?? false;
         const routeFilesSection = p.candidateRouteFiles && p.candidateRouteFiles.length > 0
             ? `\nRoute/controller files found by static scan (read these to discover endpoints — the regex-based catalog below may be incomplete for your framework):\n${p.candidateRouteFiles.map(f => `- ${f}`).join("\n")}\n`
@@ -125,6 +149,33 @@ No diff was available — read the changed source files listed above directly to
 ${diffHasJavaFiles ? JAVA_SPRING_NOTE : ""}
 For each endpoint found: note the HTTP method, full path, and source file.
 Also compare against the endpoint catalog to identify any endpoints that appear in the catalog but are no longer present in the source files — these are removed endpoints.`;
+    // Step 2.3: Caller-tracing instruction — only emitted when the PR touches backend code
+    // files that contain no route annotations (utilities, helpers, services). Tells the LLM
+    // to search for callers of the changed functions to find the actual HTTP surface
+    // rather than falling back to the proximity-scanned CRUD endpoints. (Bug 5 fix)
+    //
+    // We filter out:
+    //   - Frontend component files (.jsx/.tsx/.vue/.svelte) — UI changes have no callers
+    //     in the HTTP graph; emitting this block for them produces irrelevant instructions.
+    //   - Non-code files (docs, config, assets, lockfiles) — they have no "changed symbols"
+    //     to trace and listing them as bullets is misleading.
+    const BACKEND_CODE_EXT = /\.(ts|js|mjs|cjs|py|java|kt|rb|go|cs|php|rs|scala|swift|c|cpp|h|hpp)$/i;
+    const traceableUnmatched = (p.unmatchedFiles ?? []).filter(f => BACKEND_CODE_EXT.test(f));
+    const callerTracingStep = isDiffScope && !isUIOnly && traceableUnmatched.length > 0
+        ? `
+### Step 2.3: Trace callers of changed non-route files
+The following changed files contain **no HTTP endpoint registrations** (no route annotations, controller mappings, or handler decorators). Their changes will only be tested if you find and target the HTTP endpoints that *call* them:
+${traceableUnmatched.map(f => `- \`${f}\``).join("\n")}
+For each file above:
+1. **Find the changed symbols** — read the diff (or the file) to identify which functions, methods, or classes were modified.
+2. **Search for callers** — look for import statements and call sites of those symbols across service, handler, and controller files. Use fully qualified names (e.g. \`DataUtils.addFileData\`, not just \`addFileData\`) to avoid false matches in large monorepos.
+3. **Trace to HTTP registration** — from each caller, follow up to the route/controller registration (Spring \`@PostMapping\`, Express \`router.post\`, FastAPI \`@router.post\`, etc.) to identify the endpoint(s) that invoke the changed logic.
+4. **Augment the endpoint list** from Step 2 with these execution-path endpoints.
+5. If an execution or processing endpoint is found (path ending in \`/execute\`, \`/run\`, \`/trigger\`, \`/process\`, \`/invoke\`, or similar), it **MUST** be included in the test candidates. Do not produce coverage consisting solely of CRUD endpoints when an execution-path endpoint was found — CRUD tests may still be included but must not be the only coverage.
+`
+        : "";
     const criticalPatternStep = `### Step 2.5: Identify critical patterns for test categorization
 Look for these patterns in model/schema/handler files to inform test recommendations:
 - **Unique constraints**: \`@unique\`, \`unique: true\`, unique indexes, \`.refine()\` uniqueness checks, \`UNIQUE\` in SQL migrations
@@ -168,22 +219,29 @@ Call \`skyramp_recommend_tests\` with:
 ### Step 1: Read the changed files and diff
 ${changedFiles}${diffFileRef}
 ${buildPathResolutionTableStep(p)}${step2}
+${callerTracingStep}
 ${criticalPatternStep}
 ${step3Content}`;
 }
 export function buildAnalysisOutputText(p) {
     const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
-    // Router mounting context is unique to this prompt (not in recommendationPrompt).
-    // Branch diff, endpoint catalog, auth config, and OpenAPI spec are omitted here
-    // because they are already present in the recommendation prompt that is
-    // concatenated in the same tool response.
-    const routerSection = !p.wsSchemaPath && p.routerMountContext.length
+    // Router mounting context is unique to this prompt; shown whenever mount context
+    // is available, regardless of whether a spec is configured.
+    const routerSection = p.routerMountContext.length
         ? `
 ## Routing entry-point files
-Read these in Step 1.5 to trace the full router/module hierarchy:
-${p.routerMountContext.map(f => `- \`${f}\``).join("\n")}`
+${p.routerFileContents?.length
+            ? p.routerFileContents.map(({ file, content }) => `### \`${file}\`\n\`\`\`\n${content}\n\`\`\``)
+                .join("\n\n") + (p.routerMountContext.length > (p.routerFileContents?.length ?? 0)
+                ? `\n\nAdditional files (too large to inline — read manually if needed):\n` +
+                    p.routerMountContext
+                        .filter(f => !(p.routerFileContents ?? []).some(r => r.file === f))
+                        .map(f => `- \`${f}\``)
+                        .join("\n")
+                : "")
+            : `Read these in Step 1.5 to trace the full router/module hierarchy:\n` +
+                p.routerMountContext.map(f => `- \`${f}\``).join("\n")}`
         : "";
     const enrichment = buildEnrichmentInstructions(p);
     return `# Repository Analysis

package/build/prompts/test-recommendation/analysisOutputPrompt.test.js ADDED Viewed

@@ -0,0 +1,154 @@
+jest.mock("@skyramp/skyramp", () => ({
+    WorkspaceConfigManager: { create: jest.fn() },
+}));
+import { buildAnalysisOutputText } from "./analysisOutputPrompt.js";
+import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
+// ---------------------------------------------------------------------------
+// Minimal fixture factory
+// ---------------------------------------------------------------------------
+function baseParams(overrides = {}) {
+    return {
+        sessionId: "test-session-id",
+        repositoryPath: "/repo",
+        analysisScope: AnalysisScope.CurrentBranchDiff,
+        scannedEndpoints: [],
+        wsBaseUrl: "http://localhost:3000",
+        wsAuthHeader: "Authorization",
+        wsAuthType: "",
+        wsSchemaPath: "",
+        routerMountContext: [],
+        parsedDiff: {
+            changedFiles: [],
+            newEndpoints: [],
+            modifiedEndpoints: [],
+        },
+        ...overrides,
+    };
+}
+// ---------------------------------------------------------------------------
+// Step 2.3 caller-tracing block
+// ---------------------------------------------------------------------------
+describe("buildAnalysisOutputText — unmatchedFiles / Step 2.3 caller-tracing", () => {
+    it("includes Step 2.3 block when unmatchedFiles is non-empty and scope is CurrentBranchDiff", () => {
+        const params = baseParams({
+            unmatchedFiles: [
+                "server/src/main/java/helpers/DataUtils.java",
+                "server/src/main/java/helpers/MustacheHelper.java",
+            ],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).toContain("### Step 2.3: Trace callers of changed non-route files");
+        expect(output).toContain("DataUtils.java");
+        expect(output).toContain("MustacheHelper.java");
+        expect(output).toContain("/execute");
+    });
+    it("lists each unmatched file as a bullet in the Step 2.3 block", () => {
+        const params = baseParams({
+            unmatchedFiles: ["src/services/OrderService.ts", "src/utils/pricingHelper.ts"],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).toContain("- `src/services/OrderService.ts`");
+        expect(output).toContain("- `src/utils/pricingHelper.ts`");
+    });
+    it("omits Step 2.3 block when unmatchedFiles is empty", () => {
+        const params = baseParams({ unmatchedFiles: [] });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+        expect(output).not.toContain("Trace callers of changed non-route files");
+    });
+    it("omits Step 2.3 block when unmatchedFiles is undefined", () => {
+        const params = baseParams({ unmatchedFiles: undefined });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+    });
+    it("omits Step 2.3 block when scope is full_repo even if unmatchedFiles is non-empty", () => {
+        const params = baseParams({
+            analysisScope: AnalysisScope.FullRepo,
+            unmatchedFiles: ["src/services/SomeService.ts"],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+    });
+    it("Step 2.3 appears before Step 2.5 in the output", () => {
+        const params = baseParams({
+            unmatchedFiles: ["src/utils/helper.ts"],
+        });
+        const output = buildAnalysisOutputText(params);
+        const pos23 = output.indexOf("Step 2.3");
+        const pos25 = output.indexOf("Step 2.5");
+        expect(pos23).toBeGreaterThan(-1);
+        expect(pos25).toBeGreaterThan(-1);
+        expect(pos23).toBeLessThan(pos25);
+    });
+    it("Step 2.5 critical-patterns block is always present regardless of unmatchedFiles", () => {
+        const withUnmatched = buildAnalysisOutputText(baseParams({ unmatchedFiles: ["src/utils/foo.ts"] }));
+        const withoutUnmatched = buildAnalysisOutputText(baseParams({ unmatchedFiles: [] }));
+        expect(withUnmatched).toContain("Step 2.5: Identify critical patterns");
+        expect(withoutUnmatched).toContain("Step 2.5: Identify critical patterns");
+    });
+    it("omits Step 2.3 block when unmatchedFiles contains only frontend component files (UI-only PR)", () => {
+        // Frontend files (.tsx, .jsx, .vue, .svelte) end up in unmatchedFiles because they
+        // have no route annotations, but they have no HTTP callers to trace — emitting
+        // Step 2.3 for them would produce irrelevant instructions. (Copilot review fix)
+        const params = baseParams({
+            unmatchedFiles: [
+                "src/components/Button.tsx",
+                "src/pages/Dashboard.jsx",
+                "src/views/UserProfile.vue",
+                "src/routes/Settings.svelte",
+            ],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+        expect(output).not.toContain("Trace callers of changed non-route files");
+    });
+    it("omits Step 2.3 block when unmatchedFiles contains only non-code files (docs/config)", () => {
+        // README.md, package.json, etc. have no changed symbols to trace — listing them
+        // in Step 2.3 is misleading. (Copilot review fix)
+        const params = baseParams({
+            unmatchedFiles: [
+                "README.md",
+                "package.json",
+                "docker-compose.yml",
+                ".github/workflows/ci.yml",
+            ],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+        expect(output).not.toContain("Trace callers of changed non-route files");
+    });
+    it("emits Step 2.3 for backend code files but excludes frontend/non-code siblings", () => {
+        // Mixed PR: one Java helper + one React component + one config file.
+        // Only the Java file should appear in the Step 2.3 bullets.
+        const params = baseParams({
+            unmatchedFiles: [
+                "server/helpers/DataUtils.java",
+                "client/components/ActionButton.tsx",
+                "package.json",
+            ],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).toContain("Step 2.3");
+        expect(output).toContain("DataUtils.java");
+        expect(output).not.toContain("ActionButton.tsx");
+        expect(output).not.toContain("package.json");
+    });
+    it("omits Step 2.3 when unmatchedFiles contains .ts/.js frontend files but isUIOnly is true", () => {
+        // Angular services, React hooks, Vue composables — all .ts/.js — pass the
+        // BACKEND_CODE_EXT filter but belong to a UI-only PR. The !isUIOnly guard
+        // prevents Step 2.3 from emitting contradictory caller-tracing instructions
+        // alongside the UI-only Step 2 guidance. (Copilot review fix)
+        const params = baseParams({
+            // parsedDiff.changedFiles drives isUIOnly detection; all frontend-ext → isUIOnly=true
+            parsedDiff: {
+                changedFiles: ["src/services/auth.service.ts", "src/hooks/useAuth.ts"],
+                newEndpoints: [],
+                modifiedEndpoints: [],
+            },
+            unmatchedFiles: ["src/services/auth.service.ts", "src/hooks/useAuth.ts"],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+        expect(output).not.toContain("Trace callers of changed non-route files");
+    });
+});

package/build/prompts/test-recommendation/recommendationSections.js CHANGED Viewed

@@ -1,7 +1,9 @@
-import { isContractConsumerModeEnabled, resolveServiceDetailsRef } from "../../utils/featureFlags.js";
+import { isContractConsumerModeEnabled } from "../../utils/featureFlags.js";
+import { resolveServiceDetailsRef } from "../../utils/utils.js";
 import { WorkspaceAuthType, getAuthScheme, isAuthorizationHeaderName, AUTH_MIDDLEWARE_PATTERNS_STR } from "../../utils/workspaceAuth.js";
-// Cached at module-load — the flag is process-wide and cannot change per call.
+// Cached at module-load — flags are process-wide and cannot change per call.
 const CONSUMER_MODE_ENABLED = isContractConsumerModeEnabled();
+const SERVICE_REFS = resolveServiceDetailsRef();
 export const MAX_TESTS_TO_GENERATE = 3;
 export const MAX_RECOMMENDATIONS = 20;
 export const MAX_CRITICAL_TESTS = 3;
@@ -42,45 +44,13 @@ Before calling any tool, replace every \`<from source>\` placeholder in the tool
 }
 export function buildReasoningProtocol() {
     return `<reasoning_protocol>
-## Coverage Reasoning Block (MANDATORY — complete BEFORE your Budget Plan)
-Before committing to a Budget Plan and test list, produce a <thinking> block that enumerates ALL testable surfaces introduced or affected by this PR. This prevents narrow focus on a single endpoint/method.
-**For backend-only PRs**, your thinking MUST cover:
-1. **All HTTP methods affected** — if a new validation/service method is added, trace ALL callers (not just createOne — also updateOne, updateMany, deleteOne). List every HTTP method × endpoint pair.
-2. **Error paths per method** — for each endpoint-method, what error codes does the source code return? (400, 401, 403, 404, 409, 422). Each distinct error path is a potential test.
-3. **Cross-service impact** — does the change affect other services that import the modified module? Those endpoints need coverage too.
-4. **Data migrations** — if a migration exists, can its effect be verified via an API call? (e.g. backfill → GET should return the backfilled value)
-**For frontend-only PRs**, your thinking MUST cover:
-1. **Component integration** — which routes render the changed component? Each route is a test target.
-2. **User interactions** — what actions can a user perform on the changed component? (click, type, select, drag). Each distinct action flow is a test.
-3. **State variations** — what different states does the component render? (empty, loading, error, populated, edge values)
-**For mixed (frontend + backend) PRs**, your thinking MUST cover:
-1. All backend surfaces (methods 1–4 above)
-2. All frontend surfaces (methods 1–3 above)
-3. **E2E bridges** — which frontend components call the changed backend endpoints? Those are E2E test candidates that cover both layers in one test.
-**Output format in your thinking block:**
-\`\`\`
-Testable surfaces:
-- POST /permissions → happy path (201), invalid fields (422), missing collection (400)
-- PATCH /permissions/:id → update with valid fields (200), update with invalid fields (422)
-- GET /items/:collection?aggregate → with allowed fields (200), with forbidden fields (403)
-- UI: permissions field selector → add field, remove field, wildcard toggle
-Total distinct surfaces: N
-\`\`\`
-Your Budget Plan total MUST be ≥ the number of GENERATE slots and reflect the breadth of surfaces found. If you found 8 distinct surfaces but only budget 3 tests, you are under-covering the PR.
 ## Parameter Grounding Rule
 Before each GENERATE tool call, confirm WHERE each key value comes from:
 - **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec. **The generation tool rejects empty \`{}\` request bodies for POST/PUT/PATCH** — read the source schema first if the fields are unknown.
 - **endpointURL** → workspace \`baseUrl\` + endpoint path (both required — never path alone)
 - **authHeader / authScheme** → workspace config or OpenAPI \`securitySchemes\`
-- **Foreign Key path params** → chained from a prior step's response (check the actual field name — it may be \`id\`, \`uuid\`, \`_id\`, or a resource-specific \`*_id\` field). The chaining source can be a response body (POST or GET), a response header (e.g. \`Location\`), or a cookie — not hardcoded
+- **FK path params** → chained from a prior step's response (check the actual field name — it may be \`id\`, \`uuid\`, \`_id\`, or a resource-specific \`*_id\` field). The chaining source can be a response body (POST or GET), a response header (e.g. \`Location\`), or a cookie — not hardcoded
 - **Names / string values** → realistic; append timestamp suffix to avoid re-run conflicts
 ## Ranking Rule
@@ -142,11 +112,11 @@ export function buildTestPatternGuidelines() {
 - **Middleware chains**: If auth/rate-limit/logging middleware exists, test the chain (e.g., rate limit hit → auth still checked → correct error returned)
 - **N+1 query risk**: If list endpoints join related data (e.g., orders with products), test with large datasets
 - **State machines**: If resources have status transitions (draft→published→archived), test invalid transitions (e.g., archived→draft should fail)
-- **Cascade deletes**: Only recommend after reading source code to confirm which resource holds the Foreign Key. The resource with the Foreign Key is the child; the one it points to is the parent. Example: if orders.product_id references products, then products is the parent — deleting a product tests whether orders are protected or cascade-deleted. Getting this backwards (treating the child as the parent) produces a nonsensical test.
+- **Cascade deletes**: Only recommend after reading source code to confirm which resource holds the FK. The resource with the FK is the child; the one it points to is the parent. Example: if orders.product_id references products, then products is the parent — deleting a product tests whether orders are protected or cascade-deleted. Getting this backwards (treating the child as the parent) produces a nonsensical test.
 - **Race conditions**: If concurrent writes are possible (inventory deduction, counter increment), test concurrent requests
 - **Computed fields**: If response contains derived values (total, average, count), verify computation with known inputs (e.g., total_cost = compute_seconds * rate + memory_mb * rate + external_cost)
 - **Mutation with collection modification**: If PUT/PATCH endpoints accept arrays of child items (e.g., order line items, cart products, invoice entries), test adding/removing items and verify that derived totals (e.g., total_amount, subtotal, item_count) are recalculated correctly. This is the most common source of user-reported bugs — always prioritize it for GENERATE over simple field-update tests.
-    The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with Foreign Key references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its Foreign Key/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child Foreign Key fields match chained IDs, quantities match sent values, and totals match the computation from the source code
+    The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
 - **Webhook/event side effects**: If endpoints trigger async operations, test that side effects occur (e.g., POST /orders triggers notification)
 - **Cross-user isolation**: If resources are owned by users, test that user B cannot access/modify user A's resources (GET /users/{other_id}/data → 403 Forbidden)
 - **Range/boundary invariants**: If business rules cap values (max retries, min balance, discount ≤ subtotal), test the boundary (e.g., set retries to max+1 → expect rejection)
@@ -160,7 +130,7 @@ that step B depends on (e.g., create product → create order referencing that p
 verify order contains correct product). Single-resource CRUD alone is not an integration test.
 Use actual field names and values from the source code schema or OpenAPI schema (not \`{}\` or invented field names); verify response data, not just status codes.
 When a PUT/PATCH updates a resource with child collections (e.g., order items), the request body
-MUST include the child array with Foreign Key references chained from prior steps — and assertions MUST
+MUST include the child array with FK references chained from prior steps — and assertions MUST
 verify the actual child items in the response (product_id, quantity, unit_price), not just
 top-level metadata like discount or status.
@@ -214,7 +184,7 @@ Before finalizing your output, verify:
 6. **Real request shapes**: requestBody for POST/PUT/PATCH uses actual field names from source (not \`{}\`). GET search/filter uses \`queryParams\`, not \`requestBody\`.
 7. **scenarioFile**: \`skyramp_integration_test_generation\` uses the exact \`filePath\` returned by \`skyramp_batch_scenario_test_generation\` — not a guessed or hardcoded filename.
 8. **bugCatchingTarget**: Every GENERATE integration test that targets a business rule, formula, or constraint has a non-empty \`bugCatchingTarget\`.
-9. **Foreign Key chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
+9. **FK chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
 10. **Concrete scenario names**: No GENERATE item uses a placeholder name ending in a numeric suffix (e.g. \`ui-test-for-changed-component-1\`, \`ui-test-from-trace-2\`). Derive the name from the actual changed component or flow: if the diff touches \`LinkCard.tsx\`, the scenario name should be \`link-card-pin-toggle\` or \`link-card-edit-description\`, not \`ui-test-for-changed-component-1\`. The changed file list is available above — use it.
 </verification>`;
 }
@@ -225,7 +195,7 @@ export function buildFewShotExamples() {
 **Parameter grounding**:
 - baseURL: "http://localhost:8000" (workspace api.baseUrl)
 - steps[0].requestBody fields "name", "price": ProductCreate schema fields (src/models/product.py)
-- steps[1].requestBody "product_id": Foreign Key to products — chained from step 0 response id
+- steps[1].requestBody "product_id": FK to products — chained from step 0 response id
 - steps[1].requestBody "quantity": OrderCreate schema field (src/models/order.py)
 - responseBody "total_amount": 89.97 = 29.99 × 3 — from order total formula (src/services/order_service.py: total = sum(item.price * item.quantity))
 - authHeader/authScheme: workspace config (Authorization / Bearer)
@@ -343,7 +313,7 @@ ${authGuidance}
 **For multi-endpoint workflows (integration tests) — Batch Scenario → Integration pipeline:**
 1. Call \`skyramp_batch_scenario_test_generation\` with ALL steps in a single call: \`scenarioName\`, \`destination\`,
    \`baseURL\`, \`${authCallParams}\`, and a \`steps\` array where each element has \`method\`, \`path\`, \`requestBody\` OR \`queryParams\`, \`responseBody\`, \`statusCode\`.
-   \`statusCode\` is required — determine the expected status code from the source code for each step.
+   \`statusCode\` is optional — defaults: POST→201, DELETE→204, GET/PUT/PATCH→200. Only override for non-standard codes.
    **OpenAPI spec is NOT required.** \`apiSchema\` is OPTIONAL — omit it if no spec exists.
    **CRITICAL — Query params vs request body:**
    - For **POST/PUT/PATCH**: use \`requestBody\` with realistic field values from source code schemas.
@@ -383,12 +353,12 @@ ${CONSUMER_MODE_ENABLED ? `**Contract test mode selection — set based on this
 Only provider-side contract tests are supported. Pass \`providerMode: true\` for new or modified endpoints this codebase owns.`}
 **For UI tests:**
-1. \`browser_navigate\` to the target URL (from ${resolveServiceDetailsRef().baseUrlRef})
+1. \`browser_navigate\` to the target URL (from workspace \`api.baseUrl\`)
 2. \`browser_snapshot\` to see the page (ARIA tree)
 3. Interact using \`browser_click\`, \`browser_type\`, \`browser_fill_form\`, etc.
 4. \`browser_snapshot\` after each interaction that changes the page
 5. \`skyramp_export_zip\` with an **absolute** output path: \`<repositoryPath>/.skyramp/<test_name>_trace.zip\`
-6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip, and \`outputDir\` = ${resolveServiceDetailsRef().frontendTestDirRef} (e.g. \`frontend/tests\`). Do NOT use the backend service's testDirectory — UI tests must go in the frontend service's test directory.
+6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip, and \`outputDir\` = ${SERVICE_REFS.frontendTestDirRef} (e.g. \`frontend/tests\`). Do NOT use the backend service's testDirectory — UI tests must go in the frontend service's test directory.
 Tips: For custom dropdowns (Radix, MUI): click combobox → snapshot → click option (NOT \`browser_select_option\`).

package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js CHANGED Viewed

@@ -55,6 +55,7 @@ export function mergeEnrichedScenarios(serverScenarios, raw) {
                 requestBody: st.requestBody,
                 queryParams: st.queryParams,
                 responseBody: st.responseBody,
+                // Default status code by method if omitted to avoid `statusCode: undefined` in tool calls
                 expectedStatusCode: st.expectedStatusCode ?? inferExpectedStatus(String(st.method ?? "GET")),
                 expectedResponseFields: st.expectedResponseFields,
                 bodyMustInclude: st.bodyMustInclude,
@@ -150,11 +151,29 @@ export function registerRecommendTestsPrompt(server) {
             }
         }
         if (!fullAnalysis) {
+            if (sessionId) {
+                logger.warning(`Session not found in memory (sessionId=${sessionId}) — server may have restarted; falling back to state file`);
+            }
             fullAnalysis = state.repositoryAnalysis.fullAnalysis;
         }
         if (!fullAnalysis) {
             throw new Error(`Analysis data for session not found in memory or on disk. Re-run skyramp_analyze_changes.`);
         }
+        // Hydrate testLocations from the disk-persisted field when fullAnalysis came from disk
+        // (after a server restart, fullAnalysis is loaded from state.repositoryAnalysis.fullAnalysis
+        // but testLocations was persisted separately under state.repositoryAnalysis.testLocations)
+        if (fullAnalysis.existingTests &&
+            !fullAnalysis.existingTests.testLocations &&
+            state.repositoryAnalysis.testLocations) {
+            fullAnalysis = {
+                ...fullAnalysis,
+                existingTests: {
+                    ...fullAnalysis.existingTests,
+                    testLocations: state.repositoryAnalysis.testLocations,
+                },
+            };
+            logger.debug("Hydrated existingTests.testLocations from disk-persisted state", { sessionId });
+        }
         // Normalize legacy state files: before AnalysisScope enum normalization, state stored
         // the user-facing param value "branch_diff". Map it explicitly so diff-mode detection
         // works correctly on state created before this deployment (2-hour TTL window).