npm - @skyramp/mcp - Versions diffs - 0.1.5 → 0.1.6 - Mend

@skyramp/mcp 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/build/index.js CHANGED Viewed

@@ -35,6 +35,7 @@ import { registerAnalysisResources } from "./resources/analysisResources.js";
 import { registerProgressResource } from "./resources/progressResource.js";
 import { AnalyticsService } from "./services/AnalyticsService.js";
 import { registerInitTriggerOnMCPInitialized } from "./utils/initAgent.js";
+import { isTestbotEnabled } from "./utils/featureFlags.js";
 import { registerPlaywrightTools, registerTraceRecordingPrompt, getPlaywrightTraceService, } from "./playwright/index.js";
 const oneClickEnabled = process.env.SKYRAMP_FEATURE_ONE_CLICK === "1";
 const oneClickInstructions = oneClickEnabled
@@ -95,8 +96,8 @@ After \`skyramp_analyze_changes\`, inspect enriched data via MCP Resources (use
 Before calling ANY test generation tool, you MUST follow this flow:
 1. **Read** the .skyramp/workspace.yml file to get the configured defaults.
-2. **Extract** the \`language\`, \`framework\`, \`outputDir\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the services section.
-3. **Use those values** as defaults for the test generation tool call. Do NOT ask the user for these values if they are already configured in the workspace file.
+2. **Extract** the \`language\`, \`framework\`, \`testDirectory\`, \`api.baseUrl\`, \`api.authHeader\`, and \`api.authType\` from the matching service in the services section.
+3. **Use those values** as defaults for the test generation tool call. Pass the service \`testDirectory\` as the generation tool \`outputDir\`. Do NOT ask the user for these values if they are already configured in the workspace file.
 4. **CRITICAL — endpointURL**: The \`endpointURL\` parameter MUST be the full URL to the specific endpoint being tested, NOT just the base URL. Construct it by combining \`api.baseUrl\` with the endpoint path. Example: if \`api.baseUrl\` is \`http://localhost:8000\` and the endpoint is \`/api/v1/products\`, pass \`endpointURL: "http://localhost:8000/api/v1/products"\`. NEVER pass just the base URL (e.g. \`http://localhost:8000\`) as \`endpointURL\`.
 5. **CRITICAL — scenario generation**: When calling \`skyramp_batch_scenario_test_generation\`, ALWAYS pass:
    - \`baseURL\`: The full base URL from \`api.baseUrl\` (e.g., \`http://localhost:3000\`). This determines the scheme, host, and port in the generated trace. Without it, the trace defaults to https:443 which is almost always wrong for local development.
@@ -107,7 +108,7 @@ Before calling ANY test generation tool, you MUST follow this flow:
 6. **CRITICAL — integration test from scenario**: When calling \`skyramp_integration_test_generation\` with a \`scenarioFile\`:
    - If workspace has \`api.authType\` set: omit auth params entirely — passing auth here alongside workspace \`authType\` causes "${AUTH_CONFLICT_ERROR_MSG}".
    - If workspace has no \`api.authType\`: pass \`authHeader\` only (no \`authScheme\`).
-7. **If the workspace file does not exist**, or the needed values (language, framework, outputDir) are missing from the workspace config, ASK the user which language and framework they want before calling the tool.
+7. **If the workspace file does not exist**, or the needed values (language, framework, testDirectory) are missing from the workspace config, ASK the user which language, framework, and outputDir they want before calling the tool.
 8. The user can always override workspace defaults by explicitly specifying values in their request.
 `,
 });
@@ -118,7 +119,7 @@ const prompts = [
     registerRecommendTestsPrompt,
     registerTraceRecordingPrompt,
 ];
-if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
+if (isTestbotEnabled()) {
     prompts.push(registerTestbotPrompt);
     registerTestbotResource(server);
     logger.info("TestBot prompt enabled via SKYRAMP_FEATURE_TESTBOT");
@@ -169,7 +170,7 @@ const infrastructureTools = [
     registerTraceTool,
     registerTraceStopTool,
 ];
-if (process.env.SKYRAMP_FEATURE_TESTBOT === "1") {
+if (isTestbotEnabled()) {
     infrastructureTools.push(registerSubmitReportTool);
     logger.info("TestBot tools enabled via SKYRAMP_FEATURE_TESTBOT");
 }

package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js CHANGED Viewed

@@ -77,11 +77,15 @@ Create one service entry per deployable unit. You MUST include:
 - \`framework\` — \`playwright\` | \`pytest\` | \`robot\` | \`junit\`
   Detect from: pytest.ini/playwright.config/jest.config/junit in pom.xml
   MUST match the language: python → pytest or robot | typescript/javascript → playwright | java → junit
-- \`testDirectory\` — path relative to repo root where generated tests will be placed. **MUST match the test framework's configured test directory**:
-  - **Playwright**: Read \`playwright.config.ts\` (or \`.js\`/\`.mjs\`) and extract the \`testDir\` value. If no \`testDir\` is specified, common defaults: "tests/", "test/".
-  - **pytest**: Read \`pytest.ini\`, \`pyproject.toml [tool.pytest.ini_options]\`, or \`setup.cfg [tool:pytest]\` for \`testpaths\`. Common defaults: "tests/", "test/".
-  - **JUnit**: Usually "src/test/java" — check \`pom.xml\` or \`build.gradle\` for custom test source directories.
-  ⚠️ **CRITICAL**: If the framework config specifies a test directory, you MUST use that exact path
+- testDirectory — stable path relative to repo root where generated tests for this service will be placed.
+  - For each service, use the test directory configured by that service's test framework when one is discoverable:
+    - Playwright: Read playwright.config.ts (or .js/.mjs) and extract the testDir value.
+    - pytest: Read pytest.ini, pyproject.toml [tool.pytest.ini_options], or setup.cfg [tool:pytest] for testpaths.
+    - JUnit: Usually src/test/java — check pom.xml or build.gradle for custom test source directories.
+  - If no framework-configured test directory is available, use the Skyramp deterministic fallback:
+    - Single generated-test service: set testDirectory to tests/.
+    - Multiple generated-test services: set testDirectory to tests/<serviceName>, where <serviceName> is the exact serviceName with path separators and whitespace replaced by -.
+  Framework config precedence: If framework config specifies a test directory, use that exact path. Use the Skyramp deterministic fallback only when no framework-configured test directory is available.
 **API fields:**
 - \`api.schemaPath\` — path or URL to OpenAPI/Protobuf/GraphQL schema
@@ -154,12 +158,12 @@ Create one service entry per deployable unit. You MUST include:
 Before calling \`skyramp_init_workspace\`, confirm all of the following:
 - ALWAYS SCAN REPO AND FIND SERVICES. A REPO SHOULD HAVE AT LEAST ONE SERVICE.
-- **CRITICAL**: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
+- CRITICAL: ALL services are included — backend AND frontend. The workspace config is a complete registry of the entire repo, not just the service relevant to your current task. A fullstack or monorepo MUST have multiple services — if you found only one, re-scan every top-level directory before proceeding.
 - Services NOT in docker-compose.yml (e.g. a frontend run with pnpm/npm locally) MUST still be included with runtime "local".
 - Every service has \`api.baseUrl\` set to a valid, discoverable URL — localhost for local services, or the actual deployment URL for cloud/external services. Never fabricate a URL.
 - Every service with \`authType: apiKey\` has \`authHeader\` explicitly set to the actual custom header name (e.g. \`"X-API-Key"\`, \`"X-Admin-Key"\`). If you cannot find the header name in the source code, env vars, or README, do NOT use \`authType: apiKey\` — use \`authType: none\` and add a YAML comment explaining auth is unresolved.
 - \`framework\` matches \`language\` (python → pytest/robot | typescript/javascript → playwright | java → junit)
-- \`testDirectory\` matches the framework's config file (Playwright: \`testDir\` in playwright.config.ts | pytest: \`testpaths\` in pytest.ini/pyproject.toml | JUnit: test source dir in pom.xml/build.gradle). If no config file is found, use the common defaults: "tests/", "test/".
+- \`testDirectory\` follows the stable resolution rules above: framework config file when present (Playwright: \`testDir\` in playwright.config.ts | pytest: \`testpaths\` in pytest.ini/pyproject.toml | JUnit: test source dir in pom.xml/build.gradle); otherwise the deterministic default (\`tests/\` for a single service, \`tests/<serviceName>\` for multiple services).
 - \`serverStartCommand\` matches \`runtime\`
 - For services in docker-compose.yml: runtime MUST be "docker" and command MUST be a docker command (e.g. "docker compose up -d <service-name>").
 - NEVER use application-level commands (uvicorn, npm, node, python, java, etc.) with runtime "docker".

package/build/prompts/personas.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { isTestbotEnabled } from "../utils/featureFlags.js";
 /**
  * Skyramp personas injected into tool descriptions and prompts.
  *
@@ -19,5 +20,5 @@ export const SKYRAMP_QA_PERSONA = `You are acting as a Skyramp QA Automation Eng
  * avoid duplicating it in every tool description.
  */
 export function getPersonaPrefix() {
-    return process.env.SKYRAMP_FEATURE_TESTBOT ? '' : `${SKYRAMP_QA_PERSONA}\n\n`;
+    return isTestbotEnabled() ? '' : `${SKYRAMP_QA_PERSONA}\n\n`;
 }

package/build/prompts/test-maintenance/drift-analysis-prompt.js CHANGED Viewed

@@ -74,8 +74,9 @@ ${candidateFilesSection}`;
     if (inlineMode) {
         // Testbot inline mode: all maintenance logic lives here so the testbot
         // prompt only orchestrates steps without duplicating rules.
+        // No persona statement here — the outer testbot prompt already establishes
+        // the agent's context; a nested identity statement causes role confusion.
         return `<drift_analysis_rules>
-You are acting as a Skyramp Integration Architect.
 For this maintenance step: assess each existing test against the diff returned by \`skyramp_analyze_changes\` and apply the correct action (IGNORE, UPDATE, REGENERATE, or DELETE) directly — no separate analysis step.
 ${buildActionDecisionMatrix()}

package/build/prompts/test-maintenance/drift-analysis-prompt.test.js CHANGED Viewed

@@ -1,4 +1,32 @@
 import { buildDriftAnalysisPrompt } from "./drift-analysis-prompt.js";
+describe("buildDriftAnalysisPrompt - inline mode (no stateFile)", () => {
+    function inlinePrompt() {
+        return buildDriftAnalysisPrompt({
+            existingTests: [],
+            scannedEndpoints: [],
+            repositoryPath: "/repo",
+            // stateFile omitted → inline mode
+        });
+    }
+    it("wraps inline rules in drift_analysis_rules XML tags", () => {
+        const prompt = inlinePrompt();
+        expect(prompt).toContain("<drift_analysis_rules>");
+        expect(prompt).toContain("</drift_analysis_rules>");
+    });
+    it("does not contain the persona statement", () => {
+        const prompt = inlinePrompt();
+        expect(prompt).not.toContain("You are acting as a Skyramp Integration Architect");
+    });
+    it("does not contain the standalone Test Health Analysis header", () => {
+        const prompt = inlinePrompt();
+        expect(prompt).not.toContain("# Test Health Analysis");
+    });
+    it("does not contain the skyramp_actions CTA (that belongs to standalone mode)", () => {
+        const prompt = inlinePrompt();
+        // Inline mode final step directs applying changes directly, not calling skyramp_actions
+        expect(prompt).not.toContain("call `skyramp_actions`");
+    });
+});
 describe("buildDriftAnalysisPrompt - scanned endpoints rendering", () => {
     // Reproduces the [object Object] bug: skeletonEndpoints from analyzeChangesTool
     // stores methods as objects { method: string, ... }, not plain strings.

package/build/prompts/test-recommendation/analysisOutputPrompt.js CHANGED Viewed

@@ -12,12 +12,22 @@ const FRONTEND_EXT = /\.(tsx?|jsx?|vue|svelte|css|scss|less|html|svg)$/i;
  * Returned as an empty string when no router context is available.
  */
 function buildPathResolutionTableStep(p) {
-    if (!p.routerMountContext.length || p.wsSchemaPath)
-        return "";
-    return `### Step 1.5: Build path resolution table
-The **Routing entry-point files** section above lists the files to read.
-**Read each of those files** and trace every router mount call to understand nesting — the pattern varies by framework but the structure is universal: a parent attaches a child router with an optional extra prefix segment. If a prefix is a variable (e.g. \`prefix=api_prefix\`), resolve the variable's value by reading the assignment or the config/settings file it comes from. Examples of what to look for (non-exhaustive):
+    // Case A: spec was fetched successfully — instruct LLM to validate paths against it
+    if (p.wsSchemaPath && p.specFetchSucceeded) {
+        return `### Step 1.5: Validate all endpoint paths against the OpenAPI spec
+Fetch \`${p.wsSchemaPath}\` and extract all keys from \`spec.paths\`.
+**Before placing any path in a tool call**, confirm it exists in that list.
+If a path is NOT in the spec **and it did not come from the PR diff**, find the correct spelling by matching resource name — do NOT use it unverified.
+Paths the PR explicitly added or modified may not yet appear in the spec (spec lag) — treat those as valid.
+`;
+    }
+    // Case B: no spec (or spec unreachable) but router mount context available
+    if (p.routerMountContext.length) {
+        const hasInlined = (p.routerFileContents?.length ?? 0) > 0;
+        return `### Step 1.5: Build path resolution table
+${hasInlined
+            ? "The **Routing entry-point files** section above contains the inlined file contents — use them directly to trace every router mount call"
+            : "The **Routing entry-point files** section above lists the files to read.\n\n**Read each of those files** and trace every router mount call"} to understand nesting — the pattern varies by framework but the structure is universal: a parent attaches a child router with an optional extra prefix segment. If a prefix is a variable (e.g. \`prefix=api_prefix\`), resolve the variable's value by reading the assignment or the config/settings file it comes from. Examples of what to look for (non-exhaustive):
 - Python (FastAPI/Flask): \`parent.include_router(child, prefix="...")\`, \`app.register_blueprint(...)\`
 - JS/TS (Express/Fastify/Hapi): \`app.use('/path', childRouter)\`, \`router.use('/path', sub)\`
 - NestJS: \`@Module({ imports: [FeatureModule] })\` — trace the module import chain; each \`@Controller('prefix')\` contributes a segment
@@ -33,6 +43,20 @@ Chain all segments from the app root down through every intermediate mount to ea
 **This table is authoritative.** Before placing any URL in a tool call, look up the source file. If the pre-built catalog shows a different path, use the table value.
+`;
+    }
+    // Case C: no spec AND no router context — source-verify fallback
+    // Note: also fires when a spec was configured (wsSchemaPath set) but could not be
+    // fetched at analysis time (specFetchSucceeded = false). When that happens the LLM
+    // should know a spec was expected so it can be extra-skeptical about path correctness.
+    const specFailedNote = p.wsSchemaPath && !p.specFetchSucceeded
+        ? `\n> ⚠️ A spec was configured (\`${p.wsSchemaPath}\`) but could not be loaded at analysis time — treat all paths as unverified until confirmed against source.`
+        : "";
+    return `### Step 1.5: Verify endpoint paths from source files
+The endpoint catalog below was produced by static regex analysis and is **unverified**.
+Before using any path in a tool call, read the route definition file identified in the "Source" column and confirm the path string exactly.
+Pay special attention to mount prefixes — a router at \`/api/v1\` + route \`/version\` → path is \`/api/v1/version\`, not \`/api/server-version\`.
+${specFailedNote}
 `;
 }
 // Inline note added to any step where the LLM reads Java source files. Java Spring
@@ -125,6 +149,33 @@ No diff was available — read the changed source files listed above directly to
 ${diffHasJavaFiles ? JAVA_SPRING_NOTE : ""}
 For each endpoint found: note the HTTP method, full path, and source file.
 Also compare against the endpoint catalog to identify any endpoints that appear in the catalog but are no longer present in the source files — these are removed endpoints.`;
+    // Step 2.3: Caller-tracing instruction — only emitted when the PR touches backend code
+    // files that contain no route annotations (utilities, helpers, services). Tells the LLM
+    // to search for callers of the changed functions to find the actual HTTP surface
+    // rather than falling back to the proximity-scanned CRUD endpoints. (Bug 5 fix)
+    //
+    // We filter out:
+    //   - Frontend component files (.jsx/.tsx/.vue/.svelte) — UI changes have no callers
+    //     in the HTTP graph; emitting this block for them produces irrelevant instructions.
+    //   - Non-code files (docs, config, assets, lockfiles) — they have no "changed symbols"
+    //     to trace and listing them as bullets is misleading.
+    const BACKEND_CODE_EXT = /\.(ts|js|mjs|cjs|py|java|kt|rb|go|cs|php|rs|scala|swift|c|cpp|h|hpp)$/i;
+    const traceableUnmatched = (p.unmatchedFiles ?? []).filter(f => BACKEND_CODE_EXT.test(f));
+    const callerTracingStep = isDiffScope && !isUIOnly && traceableUnmatched.length > 0
+        ? `
+### Step 2.3: Trace callers of changed non-route files
+The following changed files contain **no HTTP endpoint registrations** (no route annotations, controller mappings, or handler decorators). Their changes will only be tested if you find and target the HTTP endpoints that *call* them:
+${traceableUnmatched.map(f => `- \`${f}\``).join("\n")}
+For each file above:
+1. **Find the changed symbols** — read the diff (or the file) to identify which functions, methods, or classes were modified.
+2. **Search for callers** — look for import statements and call sites of those symbols across service, handler, and controller files. Use fully qualified names (e.g. \`DataUtils.addFileData\`, not just \`addFileData\`) to avoid false matches in large monorepos.
+3. **Trace to HTTP registration** — from each caller, follow up to the route/controller registration (Spring \`@PostMapping\`, Express \`router.post\`, FastAPI \`@router.post\`, etc.) to identify the endpoint(s) that invoke the changed logic.
+4. **Augment the endpoint list** from Step 2 with these execution-path endpoints.
+5. If an execution or processing endpoint is found (path ending in \`/execute\`, \`/run\`, \`/trigger\`, \`/process\`, \`/invoke\`, or similar), it **MUST** be included in the test candidates. Do not produce coverage consisting solely of CRUD endpoints when an execution-path endpoint was found — CRUD tests may still be included but must not be the only coverage.
+`
+        : "";
     const criticalPatternStep = `### Step 2.5: Identify critical patterns for test categorization
 Look for these patterns in model/schema/handler files to inform test recommendations:
 - **Unique constraints**: \`@unique\`, \`unique: true\`, unique indexes, \`.refine()\` uniqueness checks, \`UNIQUE\` in SQL migrations
@@ -168,22 +219,29 @@ Call \`skyramp_recommend_tests\` with:
 ### Step 1: Read the changed files and diff
 ${changedFiles}${diffFileRef}
 ${buildPathResolutionTableStep(p)}${step2}
+${callerTracingStep}
 ${criticalPatternStep}
 ${step3Content}`;
 }
 export function buildAnalysisOutputText(p) {
     const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
-    // Router mounting context is unique to this prompt (not in recommendationPrompt).
-    // Branch diff, endpoint catalog, auth config, and OpenAPI spec are omitted here
-    // because they are already present in the recommendation prompt that is
-    // concatenated in the same tool response.
-    const routerSection = !p.wsSchemaPath && p.routerMountContext.length
+    // Router mounting context is unique to this prompt; shown whenever mount context
+    // is available, regardless of whether a spec is configured.
+    const routerSection = p.routerMountContext.length
         ? `
 ## Routing entry-point files
-Read these in Step 1.5 to trace the full router/module hierarchy:
-${p.routerMountContext.map(f => `- \`${f}\``).join("\n")}`
+${p.routerFileContents?.length
+            ? p.routerFileContents.map(({ file, content }) => `### \`${file}\`\n\`\`\`\n${content}\n\`\`\``)
+                .join("\n\n") + (p.routerMountContext.length > (p.routerFileContents?.length ?? 0)
+                ? `\n\nAdditional files (too large to inline — read manually if needed):\n` +
+                    p.routerMountContext
+                        .filter(f => !(p.routerFileContents ?? []).some(r => r.file === f))
+                        .map(f => `- \`${f}\``)
+                        .join("\n")
+                : "")
+            : `Read these in Step 1.5 to trace the full router/module hierarchy:\n` +
+                p.routerMountContext.map(f => `- \`${f}\``).join("\n")}`
         : "";
     const enrichment = buildEnrichmentInstructions(p);
     return `# Repository Analysis

package/build/prompts/test-recommendation/analysisOutputPrompt.test.js ADDED Viewed

@@ -0,0 +1,154 @@
+jest.mock("@skyramp/skyramp", () => ({
+    WorkspaceConfigManager: { create: jest.fn() },
+}));
+import { buildAnalysisOutputText } from "./analysisOutputPrompt.js";
+import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
+// ---------------------------------------------------------------------------
+// Minimal fixture factory
+// ---------------------------------------------------------------------------
+function baseParams(overrides = {}) {
+    return {
+        sessionId: "test-session-id",
+        repositoryPath: "/repo",
+        analysisScope: AnalysisScope.CurrentBranchDiff,
+        scannedEndpoints: [],
+        wsBaseUrl: "http://localhost:3000",
+        wsAuthHeader: "Authorization",
+        wsAuthType: "",
+        wsSchemaPath: "",
+        routerMountContext: [],
+        parsedDiff: {
+            changedFiles: [],
+            newEndpoints: [],
+            modifiedEndpoints: [],
+        },
+        ...overrides,
+    };
+}
+// ---------------------------------------------------------------------------
+// Step 2.3 caller-tracing block
+// ---------------------------------------------------------------------------
+describe("buildAnalysisOutputText — unmatchedFiles / Step 2.3 caller-tracing", () => {
+    it("includes Step 2.3 block when unmatchedFiles is non-empty and scope is CurrentBranchDiff", () => {
+        const params = baseParams({
+            unmatchedFiles: [
+                "server/src/main/java/helpers/DataUtils.java",
+                "server/src/main/java/helpers/MustacheHelper.java",
+            ],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).toContain("### Step 2.3: Trace callers of changed non-route files");
+        expect(output).toContain("DataUtils.java");
+        expect(output).toContain("MustacheHelper.java");
+        expect(output).toContain("/execute");
+    });
+    it("lists each unmatched file as a bullet in the Step 2.3 block", () => {
+        const params = baseParams({
+            unmatchedFiles: ["src/services/OrderService.ts", "src/utils/pricingHelper.ts"],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).toContain("- `src/services/OrderService.ts`");
+        expect(output).toContain("- `src/utils/pricingHelper.ts`");
+    });
+    it("omits Step 2.3 block when unmatchedFiles is empty", () => {
+        const params = baseParams({ unmatchedFiles: [] });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+        expect(output).not.toContain("Trace callers of changed non-route files");
+    });
+    it("omits Step 2.3 block when unmatchedFiles is undefined", () => {
+        const params = baseParams({ unmatchedFiles: undefined });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+    });
+    it("omits Step 2.3 block when scope is full_repo even if unmatchedFiles is non-empty", () => {
+        const params = baseParams({
+            analysisScope: AnalysisScope.FullRepo,
+            unmatchedFiles: ["src/services/SomeService.ts"],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+    });
+    it("Step 2.3 appears before Step 2.5 in the output", () => {
+        const params = baseParams({
+            unmatchedFiles: ["src/utils/helper.ts"],
+        });
+        const output = buildAnalysisOutputText(params);
+        const pos23 = output.indexOf("Step 2.3");
+        const pos25 = output.indexOf("Step 2.5");
+        expect(pos23).toBeGreaterThan(-1);
+        expect(pos25).toBeGreaterThan(-1);
+        expect(pos23).toBeLessThan(pos25);
+    });
+    it("Step 2.5 critical-patterns block is always present regardless of unmatchedFiles", () => {
+        const withUnmatched = buildAnalysisOutputText(baseParams({ unmatchedFiles: ["src/utils/foo.ts"] }));
+        const withoutUnmatched = buildAnalysisOutputText(baseParams({ unmatchedFiles: [] }));
+        expect(withUnmatched).toContain("Step 2.5: Identify critical patterns");
+        expect(withoutUnmatched).toContain("Step 2.5: Identify critical patterns");
+    });
+    it("omits Step 2.3 block when unmatchedFiles contains only frontend component files (UI-only PR)", () => {
+        // Frontend files (.tsx, .jsx, .vue, .svelte) end up in unmatchedFiles because they
+        // have no route annotations, but they have no HTTP callers to trace — emitting
+        // Step 2.3 for them would produce irrelevant instructions. (Copilot review fix)
+        const params = baseParams({
+            unmatchedFiles: [
+                "src/components/Button.tsx",
+                "src/pages/Dashboard.jsx",
+                "src/views/UserProfile.vue",
+                "src/routes/Settings.svelte",
+            ],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+        expect(output).not.toContain("Trace callers of changed non-route files");
+    });
+    it("omits Step 2.3 block when unmatchedFiles contains only non-code files (docs/config)", () => {
+        // README.md, package.json, etc. have no changed symbols to trace — listing them
+        // in Step 2.3 is misleading. (Copilot review fix)
+        const params = baseParams({
+            unmatchedFiles: [
+                "README.md",
+                "package.json",
+                "docker-compose.yml",
+                ".github/workflows/ci.yml",
+            ],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+        expect(output).not.toContain("Trace callers of changed non-route files");
+    });
+    it("emits Step 2.3 for backend code files but excludes frontend/non-code siblings", () => {
+        // Mixed PR: one Java helper + one React component + one config file.
+        // Only the Java file should appear in the Step 2.3 bullets.
+        const params = baseParams({
+            unmatchedFiles: [
+                "server/helpers/DataUtils.java",
+                "client/components/ActionButton.tsx",
+                "package.json",
+            ],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).toContain("Step 2.3");
+        expect(output).toContain("DataUtils.java");
+        expect(output).not.toContain("ActionButton.tsx");
+        expect(output).not.toContain("package.json");
+    });
+    it("omits Step 2.3 when unmatchedFiles contains .ts/.js frontend files but isUIOnly is true", () => {
+        // Angular services, React hooks, Vue composables — all .ts/.js — pass the
+        // BACKEND_CODE_EXT filter but belong to a UI-only PR. The !isUIOnly guard
+        // prevents Step 2.3 from emitting contradictory caller-tracing instructions
+        // alongside the UI-only Step 2 guidance. (Copilot review fix)
+        const params = baseParams({
+            // parsedDiff.changedFiles drives isUIOnly detection; all frontend-ext → isUIOnly=true
+            parsedDiff: {
+                changedFiles: ["src/services/auth.service.ts", "src/hooks/useAuth.ts"],
+                newEndpoints: [],
+                modifiedEndpoints: [],
+            },
+            unmatchedFiles: ["src/services/auth.service.ts", "src/hooks/useAuth.ts"],
+        });
+        const output = buildAnalysisOutputText(params);
+        expect(output).not.toContain("Step 2.3");
+        expect(output).not.toContain("Trace callers of changed non-route files");
+    });
+});

package/build/prompts/test-recommendation/recommendationSections.js CHANGED Viewed

@@ -1,7 +1,9 @@
 import { isContractConsumerModeEnabled } from "../../utils/featureFlags.js";
+import { resolveServiceDetailsRef } from "../../utils/utils.js";
 import { WorkspaceAuthType, getAuthScheme, isAuthorizationHeaderName, AUTH_MIDDLEWARE_PATTERNS_STR } from "../../utils/workspaceAuth.js";
-// Cached at module-load — the flag is process-wide and cannot change per call.
+// Cached at module-load — flags are process-wide and cannot change per call.
 const CONSUMER_MODE_ENABLED = isContractConsumerModeEnabled();
+const SERVICE_REFS = resolveServiceDetailsRef();
 export const MAX_TESTS_TO_GENERATE = 3;
 export const MAX_RECOMMENDATIONS = 20;
 export const MAX_CRITICAL_TESTS = 3;
@@ -356,7 +358,7 @@ Only provider-side contract tests are supported. Pass \`providerMode: true\` for
 3. Interact using \`browser_click\`, \`browser_type\`, \`browser_fill_form\`, etc.
 4. \`browser_snapshot\` after each interaction that changes the page
 5. \`skyramp_export_zip\` with an **absolute** output path: \`<repositoryPath>/.skyramp/<test_name>_trace.zip\`
-6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip, and \`outputDir\` = the **frontend** service's \`testDirectory\` from workspace.yml (e.g. \`frontend/tests\`). Do NOT use the backend service's testDirectory — UI tests must go in the frontend service's test directory.
+6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip, and \`outputDir\` = ${SERVICE_REFS.frontendTestDirRef} (e.g. \`frontend/tests\`). Do NOT use the backend service's testDirectory — UI tests must go in the frontend service's test directory.
 Tips: For custom dropdowns (Radix, MUI): click combobox → snapshot → click option (NOT \`browser_select_option\`).

package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { logger } from "../../utils/logger.js";
 import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
 import { ScenarioSource, AnalysisScope } from "../../types/RepositoryAnalysis.js";
 import { SCENARIO_CATEGORIES } from "../../types/TestRecommendation.js";
+import { inferExpectedStatus } from "../../utils/httpDefaults.js";
 export function mergeEnrichedScenarios(serverScenarios, raw) {
     const rejectionNotes = [];
     let parsed;
@@ -55,10 +56,7 @@ export function mergeEnrichedScenarios(serverScenarios, raw) {
                 queryParams: st.queryParams,
                 responseBody: st.responseBody,
                 // Default status code by method if omitted to avoid `statusCode: undefined` in tool calls
-                expectedStatusCode: st.expectedStatusCode ??
-                    (String(st.method ?? "").toUpperCase() === "POST" ? 201
-                        : String(st.method ?? "").toUpperCase() === "DELETE" ? 204
-                            : 200),
+                expectedStatusCode: st.expectedStatusCode ?? inferExpectedStatus(String(st.method ?? "GET")),
                 expectedResponseFields: st.expectedResponseFields,
                 bodyMustInclude: st.bodyMustInclude,
                 chainsFrom: st.chainsFrom,
@@ -153,11 +151,29 @@ export function registerRecommendTestsPrompt(server) {
             }
         }
         if (!fullAnalysis) {
+            if (sessionId) {
+                logger.warning(`Session not found in memory (sessionId=${sessionId}) — server may have restarted; falling back to state file`);
+            }
             fullAnalysis = state.repositoryAnalysis.fullAnalysis;
         }
         if (!fullAnalysis) {
             throw new Error(`Analysis data for session not found in memory or on disk. Re-run skyramp_analyze_changes.`);
         }
+        // Hydrate testLocations from the disk-persisted field when fullAnalysis came from disk
+        // (after a server restart, fullAnalysis is loaded from state.repositoryAnalysis.fullAnalysis
+        // but testLocations was persisted separately under state.repositoryAnalysis.testLocations)
+        if (fullAnalysis.existingTests &&
+            !fullAnalysis.existingTests.testLocations &&
+            state.repositoryAnalysis.testLocations) {
+            fullAnalysis = {
+                ...fullAnalysis,
+                existingTests: {
+                    ...fullAnalysis.existingTests,
+                    testLocations: state.repositoryAnalysis.testLocations,
+                },
+            };
+            logger.debug("Hydrated existingTests.testLocations from disk-persisted state", { sessionId });
+        }
         // Normalize legacy state files: before AnalysisScope enum normalization, state stored
         // the user-facing param value "branch_diff". Map it explicitly so diff-mode detection
         // works correctly on state created before this deployment (2-hour TTL window).

package/build/prompts/test-recommendation/test-recommendation-prompt.js CHANGED Viewed

@@ -6,6 +6,9 @@ import { extractResourceFromPath } from "../../utils/routeParsers.js";
 import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildFewShotExamples, buildVerificationChecklist, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
 import { CATEGORY_PRIORITY, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
 import { buildScopeAssessmentSection, isFrontendFile } from "./scopeAssessment.js";
+import { resolveServiceDetailsRef } from "../../utils/utils.js";
+// Cached at module-load — flag is process-wide and cannot change per call.
+const SERVICE_REFS = resolveServiceDetailsRef();
 function formatTestLocations(locs) {
     const entries = Object.entries(locs || {});
     if (entries.length === 0)
@@ -448,7 +451,7 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
                 ? (`**#${rank} — GENERATE** | ui | workflow | new\n` +
                     `Scenario: ui-test-from-trace-${rank} (rename from the actual changed component/flow)\n` +
                     `Validates: UI interactions for a changed frontend component or flow.\n\n` +
-                    `**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\``)
+                    `**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}`)
                 : (`**#${rank} — GENERATE** | ui | workflow | new\n` +
                     `Scenario: ui-test-for-changed-component-${rank} (rename from the actual changed component/flow)\n` +
                     `Validates: UI interactions for changed frontend component/flow ${rank}.\n\n` +
@@ -457,7 +460,7 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
                     `  2. Interact with the changed component (read the diff to identify which component changed and what interactions it supports)\n` +
                     `  3. \`browser_snapshot()\` after each key interaction\n` +
                     `  4. \`skyramp_export_zip({ outputPath: "${zipPath}" })\` — absolute path\n` +
-                    `  5. \`skyramp_ui_test_generation({ playwrightInput: "${zipPath}", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\`\n\n` +
+                    `  5. \`skyramp_ui_test_generation({ playwrightInput: "${zipPath}", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}\n\n` +
                     `Each item must target a distinct changed component or user flow.`);
         }).join("\n\n")
         : "";
@@ -469,7 +472,7 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
             ? (`**#${uiRank} — GENERATE** | ui | workflow | new\n` +
                 `Scenario: ui-test-for-changed-components (rename from the actual changed component/flow)\n` +
                 `Validates: UI interactions for the changed frontend components in this PR.\n\n` +
-                `**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\``)
+                `**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}`)
             : (`**#${uiRank} — GENERATE** | ui | workflow | new\n` +
                 `Scenario: ui-test-for-changed-components (rename from the actual changed component/flow)\n` +
                 `Validates: UI interactions for the changed frontend components in this PR.\n\n` +
@@ -478,7 +481,7 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
                 `  2. Interact with the changed component (read the diff to identify which component changed and what interactions it supports)\n` +
                 `  3. \`browser_snapshot()\` after each key interaction\n` +
                 `  4. \`skyramp_export_zip({ outputPath: "<repositoryPath>/.skyramp/ui_mixed_pr_trace.zip" })\` — absolute path\n` +
-                `  5. \`skyramp_ui_test_generation({ playwrightInput: "<repositoryPath>/.skyramp/ui_mixed_pr_trace.zip", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\`\n\n` +
+                `  5. \`skyramp_ui_test_generation({ playwrightInput: "<repositoryPath>/.skyramp/ui_mixed_pr_trace.zip", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}\n\n` +
                 `Derive scenario name and steps from the actual changed frontend files.`)
         : "";
     const generateBlocks = generateItems.map((item, i) => {
@@ -571,7 +574,7 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
     const uiGuidance = !isUIOnlyPR ? `
 **UI/E2E tests (add per your Budget Plan):** If your Budget Plan requires UI/E2E items beyond what is already in your GENERATE list, append an [ADDITIONAL] entry for each. If a UI test already occupies a GENERATE slot above, that slot satisfies your UI/E2E generate count — do NOT add it again to ADDITIONAL. Tool workflow for each new item:
 - **E2E**: ${hasTraces ? "Use discovered trace/recording files with `skyramp_e2e_test_generation`." : "Add to additionalRecommendations with a note that both a backend API trace (`skyramp_start_trace_collection` / `skyramp_stop_trace_collection`) and a browser Playwright recording must be collected in a live environment first. Do NOT attempt `skyramp_e2e_test_generation` without both traces present."}
-- **UI**: ${hasTraces ? "Use an existing Playwright `.zip` trace with `skyramp_ui_test_generation`." : "Record a trace using `browser_navigate` + `browser_snapshot` + `skyramp_export_zip`, then call `skyramp_ui_test_generation({ playwrightInput: \"<zip_path>\", outputDir: \"<frontend testDirectory from workspace.yml>\" })`."}
+- **UI**: ${hasTraces ? "Use an existing Playwright `.zip` trace with `skyramp_ui_test_generation`." : `Record a trace using \`browser_navigate\` + \`browser_snapshot\` + \`skyramp_export_zip\`, then call \`skyramp_ui_test_generation({ playwrightInput: "<zip_path>", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}.`}
 Derive scenario names and steps from the actual changed frontend files. If your Budget Plan calls for 0% UI/E2E, omit this entirely.` : "";
     const supplementNote = `\n**If your Budget Plan total exceeds the pre-ranked items listed above:** draft additional tests from source-code enrichment (Step 1). For each new or changed endpoint, identify boundary or variation scenarios — formula parameters, search/filter constraints, required field validation. Only after exhausting PR-specific scenarios, add generic patterns (auth boundary → 401, non-existent ID → 404). Do NOT supplement with tests whose endpoint + test type match a GENERATE item.`;
     // ── PR / branch-diff mode: execution plan ────────────────────────────────
@@ -753,7 +756,7 @@ Output should be concise and immediately actionable.`
                 changedLines.push(`  ${m.method} ${ep.path} [removed]`);
             }
         }
-        endpointLines = `**Changed in this PR:**\n${changedLines.join("\n") || "  none"}\n\n**Other endpoints (reference only — do not prioritize for testing):**\n${otherLines.join("\n") || "  none"}`;
+        endpointLines = `**Likely changed in this PR (from static file→endpoint mapping — verify against diff in Step 2):**\n${changedLines.join("\n") || "  none"}\n\n**Other endpoints (reference only):**\n${otherLines.join("\n") || "  none"}`;
     }
     else {
         endpointLines = allEndpoints
@@ -826,7 +829,7 @@ Framework: ${analysis.projectClassification.primaryFramework} (${analysis.projec
 Project type: ${analysis.projectClassification.projectType}
 Auth: ${authMethod} (header: ${authHeaderValue}${authTypeValue ? `, type: ${authTypeValue}` : ""})
 Base URL: ${analysis.apiEndpoints.baseUrl}
-Endpoints (${analysis.apiEndpoints.totalCount}):
+Candidate endpoints from static scan — unverified, confirm paths against spec or source before use (${analysis.apiEndpoints.totalCount}):
 ${endpointLines}${testFingerprint}
 `.trim();
     // ── Branch diff ──
@@ -847,7 +850,7 @@ Affected services: ${diffContext.affectedServices.join(", ") || "N/A"}
 Focus on tests that validate these changes and how they interact with existing resources.
 For removed endpoints: verify they now return 404 or the appropriate deprecation status code.
-Allocate your test budget to endpoints listed under "Changed in this PR". Use other endpoints only as setup steps (e.g. creating a resource before testing its deletion).
+Allocate your test budget to endpoints listed under "Likely changed in this PR". Use other endpoints only as setup steps (e.g. creating a resource before testing its deletion).
 `;
     }
     // ── Interactions ──

package/build/prompts/test-recommendation/test-recommendation-prompt.test.js CHANGED Viewed

@@ -934,9 +934,9 @@ describe("buildRecommendationPrompt — multi-method endpoint partitioning", ()
         });
         const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 10);
         // Both GET and POST for /api/products should be in "Changed in this PR"
-        expect(prompt).toContain("Changed in this PR");
-        expect(prompt).toMatch(/Changed in this PR:[\s\S]*GET \/api\/products/);
-        expect(prompt).toMatch(/Changed in this PR:[\s\S]*POST \/api\/products/);
+        expect(prompt).toContain("Likely changed in this PR");
+        expect(prompt).toMatch(/Likely changed in this PR[\s\S]*GET \/api\/products/);
+        expect(prompt).toMatch(/Likely changed in this PR[\s\S]*POST \/api\/products/);
         // /api/items should NOT be in changed section
         expect(prompt).toMatch(/Other endpoints[\s\S]*GET \/api\/items/);
     });
@@ -983,8 +983,8 @@ describe("buildRecommendationPrompt — multi-method endpoint partitioning", ()
         });
         const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 10);
         // Both products and orders should be in changed section
-        expect(prompt).toMatch(/Changed in this PR:[\s\S]*GET \/api\/products/);
-        expect(prompt).toMatch(/Changed in this PR:[\s\S]*POST \/api\/orders/);
+        expect(prompt).toMatch(/Likely changed in this PR[\s\S]*GET \/api\/products/);
+        expect(prompt).toMatch(/Likely changed in this PR[\s\S]*POST \/api\/orders/);
     });
 });
 // ---------------------------------------------------------------------------
@@ -1021,7 +1021,7 @@ describe("buildRecommendationPrompt — removed endpoint listing", () => {
         });
         const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 10);
         expect(prompt).toContain("DELETE /api/legacy [removed]");
-        expect(prompt).toContain("Changed in this PR");
+        expect(prompt).toContain("Likely changed in this PR");
     });
 });
 // ---------------------------------------------------------------------------