npm - @skyramp/mcp - Versions diffs - 0.1.2 → 0.1.4 - Mend

@skyramp/mcp 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/build/prompts/test-maintenance/driftAnalysisSections.js +2 -2
package/build/prompts/test-recommendation/analysisOutputPrompt.js +26 -21
package/build/prompts/test-recommendation/recommendationSections.js +42 -10
package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +2 -5
package/build/prompts/test-recommendation/test-recommendation-prompt.js +114 -157
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +250 -18
package/build/prompts/testbot/testbot-prompts.js +17 -9
package/build/services/ScenarioGenerationService.js +2 -1
package/build/services/TestDiscoveryService.js +22 -7
package/build/services/TestDiscoveryService.test.js +44 -0
package/build/tools/generate-tests/generateBatchScenarioRestTool.js +3 -4
package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +9 -0
package/build/tools/submitReportTool.js +4 -3
package/build/tools/submitReportTool.test.js +16 -2
package/build/tools/test-management/analyzeChangesTool.js +264 -140
package/build/tools/test-management/analyzeChangesTool.test.js +3 -1
package/build/tools/test-management/analyzeTestHealthTool.js +5 -0
package/build/types/RepositoryAnalysis.js +8 -0
package/build/types/TestRecommendation.js +2 -0
package/build/utils/branchDiff.js +24 -8
package/build/utils/featureFlags.js +25 -0
package/build/utils/httpDefaults.js +12 -0
package/build/utils/repoScanner.js +16 -2
package/build/utils/routeParsers.js +79 -79
package/build/utils/routeParsers.test.js +192 -66
package/build/utils/scenarioDrafting.js +116 -497
package/build/utils/scenarioDrafting.test.js +260 -480
package/package.json +1 -1

package/build/prompts/test-recommendation/test-recommendation-prompt.test.js CHANGED Viewed

@@ -459,7 +459,7 @@ describe("buildRecommendationPrompt — maxGenerateOverride", () => {
         expect(prompt).toContain("Test type mix — MANDATORY");
         expect(prompt).toContain("Present up to 6 recommendations.");
     });
-    it("full_repo mode pre-allocates E2E and UI sections for full-stack repos", () => {
+    it("full_repo mode includes E2E/UI guidance for full-stack repos via Budget Plan", () => {
         const fullStackAnalysis = minimalAnalysis({
             projectClassification: {
                 projectType: "full-stack",
@@ -476,11 +476,11 @@ describe("buildRecommendationPrompt — maxGenerateOverride", () => {
             },
         });
         const prompt = buildRecommendationPrompt(fullStackAnalysis, AnalysisScope.FullRepo, 10);
-        // E2E and UI sections must be present even though scenarioDrafting only produces backend types
-        expect(prompt).toContain("### E2E");
-        expect(prompt).toContain("### UI");
+        // E2E/UI split is now driven by LLM's Budget Plan, not hardcoded pre-allocation.
+        // The prompt must still reference the tools and provide guidance.
         expect(prompt).toContain("skyramp_e2e_test_generation");
         expect(prompt).toContain("skyramp_ui_test_generation");
+        expect(prompt).toContain("Budget Plan");
         // Backend sections should still be present
         expect(prompt).toContain("### Integration");
     });
@@ -689,7 +689,7 @@ describe("buildRecommendationPrompt — GENERATE slot allocation", () => {
     function makeScenario(name) {
         return minimalScenario({ scenarioName: name, category: "new_endpoint" });
     }
-    it("UI-only PR: all GENERATE slots are UI placeholders (no backend)", () => {
+    it("UI-only PR: provides UI guidance with tool workflow (LLM derives scenarios)", () => {
         const analysis = minimalAnalysis({
             businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [] },
             branchDiffContext: {
@@ -702,15 +702,14 @@ describe("buildRecommendationPrompt — GENERATE slot allocation", () => {
             },
         });
         const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 10);
-        // The GENERATE slots are the UI placeholder blocks — check their specific scenario names
-        expect(prompt).toContain("#1 — GENERATE** | ui | workflow | new");
-        expect(prompt).toContain("ui-test-for-changed-component-1");
-        expect(prompt).toContain("ui_test_1_trace.zip");
+        // UI-only PR: guidance tells LLM to derive UI tests from changed files
+        expect(prompt).toContain("UI-only PR");
         expect(prompt).toContain("skyramp_ui_test_generation");
-        // Each slot targets a distinct changed component/flow
+        expect(prompt).toContain("skyramp_export_zip");
+        // Each item must be distinct
         expect(prompt).toContain("distinct changed component or user flow");
     });
-    it("mixed PR: last GENERATE slot is UI, preceding slots are backend scenarios", () => {
+    it("mixed PR: all GENERATE slots are backend; UI/E2E added per Budget Plan", () => {
         const scenarios = [
             makeScenario("orders-create"),
             makeScenario("orders-update"),
@@ -728,14 +727,12 @@ describe("buildRecommendationPrompt — GENERATE slot allocation", () => {
             },
         });
         const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 10);
-        // Last GENERATE slot is UI (not E2E)
-        expect(prompt).toContain("— GENERATE** | ui");
-        expect(prompt).toContain("skyramp_ui_test_generation");
-        expect(prompt).not.toContain("— GENERATE** | e2e");
-        // At least one backend scenario in GENERATE (#1 or #2)
+        // Backend scenarios fill GENERATE slots (no hardcoded UI placeholder)
         expect(prompt).toContain("#1 — GENERATE** | integration");
-        // Scenario name from the pre-ranked list (orders-create or orders-update)
         expect(prompt).toContain("orders-create");
+        // UI/E2E guidance is present for the LLM to add per its Budget Plan
+        expect(prompt).toContain("UI/E2E tests (add per your Budget Plan)");
+        expect(prompt).toContain("skyramp_ui_test_generation");
     });
     it("backend-only PR: all GENERATE slots are backend scenarios (no E2E injection)", () => {
         const scenarios = [makeScenario("items-create"), makeScenario("items-get"), makeScenario("items-delete")];
@@ -842,12 +839,21 @@ describe("buildRecommendationPrompt — Mandatory Reasoning Protocol", () => {
         expect(protocol).toContain("requestBody");
         expect(protocol).toContain("endpointURL");
         expect(protocol).toContain("authHeader");
-        expect(protocol).toContain("FK path params");
+        expect(protocol).toContain("Foreign Key path params");
     });
     it("reasoning protocol instructs to read source file when value cannot be sourced", () => {
         const protocol = buildReasoningProtocol();
         expect(protocol).toContain("read the relevant source file");
     });
+    it("reasoning protocol includes Coverage Reasoning Block for all 3 PR types", () => {
+        const protocol = buildReasoningProtocol();
+        expect(protocol).toContain("Coverage Reasoning Block");
+        expect(protocol).toContain("backend-only PRs");
+        expect(protocol).toContain("frontend-only PRs");
+        expect(protocol).toContain("mixed (frontend + backend) PRs");
+        expect(protocol).toContain("All HTTP methods affected");
+        expect(protocol).toContain("Testable surfaces:");
+    });
 });
 // ---------------------------------------------------------------------------
 // Tests — Context Fetching Guidance
@@ -886,6 +892,145 @@ describe("buildRecommendationPrompt — Tool Contract Framing", () => {
     });
 });
 // ---------------------------------------------------------------------------
+// Tests — Multi-method endpoint partitioning
+// ---------------------------------------------------------------------------
+describe("buildRecommendationPrompt — multi-method endpoint partitioning", () => {
+    it("classifies all methods of a changed endpoint as changed", () => {
+        // When classifyEndpointsByChangedFiles identifies a file as changed,
+        // all methods from that endpoint's scanned catalog entry are included
+        // with concrete methods (no MULTI sentinels).
+        const analysis = minimalAnalysis({
+            apiEndpoints: {
+                totalCount: 2,
+                baseUrl: "http://localhost:3000",
+                endpoints: [
+                    {
+                        path: "/api/products",
+                        resourceGroup: "products",
+                        pathParams: [],
+                        methods: [
+                            { method: "GET", description: "List products", queryParams: [], authRequired: false, sourceFile: "app/api/products/route.ts", interactions: [] },
+                            { method: "POST", description: "Create product", queryParams: [], authRequired: false, sourceFile: "app/api/products/route.ts", interactions: [] },
+                        ],
+                    },
+                    {
+                        path: "/api/items",
+                        resourceGroup: "items",
+                        pathParams: [],
+                        methods: [
+                            { method: "GET", description: "List items", queryParams: [], authRequired: false, sourceFile: "routes/items.ts", interactions: [] },
+                        ],
+                    },
+                ],
+            },
+            branchDiffContext: {
+                baseBranch: "main",
+                currentBranch: "feature/products",
+                changedFiles: ["app/api/products/route.ts"],
+                newEndpoints: [{
+                        path: "/api/products",
+                        methods: [
+                            { method: "GET", sourceFile: "app/api/products/route.ts", interactionCount: 0 },
+                            { method: "POST", sourceFile: "app/api/products/route.ts", interactionCount: 0 },
+                        ],
+                    }],
+                modifiedEndpoints: [],
+                affectedServices: [],
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 10);
+        // Both GET and POST for /api/products should be in "Changed in this PR"
+        expect(prompt).toContain("Changed in this PR");
+        expect(prompt).toMatch(/Changed in this PR:[\s\S]*GET \/api\/products/);
+        expect(prompt).toMatch(/Changed in this PR:[\s\S]*POST \/api\/products/);
+        // /api/items should NOT be in changed section
+        expect(prompt).toMatch(/Other endpoints[\s\S]*GET \/api\/items/);
+    });
+    it("handles mix of new and modified endpoints with concrete methods", () => {
+        const analysis = minimalAnalysis({
+            apiEndpoints: {
+                totalCount: 2,
+                baseUrl: "http://localhost:3000",
+                endpoints: [
+                    {
+                        path: "/api/products",
+                        resourceGroup: "products",
+                        pathParams: [],
+                        methods: [
+                            { method: "GET", description: "List", queryParams: [], authRequired: false, sourceFile: "routes.ts", interactions: [] },
+                            { method: "POST", description: "Create", queryParams: [], authRequired: false, sourceFile: "routes.ts", interactions: [] },
+                        ],
+                    },
+                    {
+                        path: "/api/orders",
+                        resourceGroup: "orders",
+                        pathParams: [],
+                        methods: [
+                            { method: "POST", description: "Create order", queryParams: [], authRequired: false, sourceFile: "routes.ts", interactions: [] },
+                        ],
+                    },
+                ],
+            },
+            branchDiffContext: {
+                baseBranch: "main",
+                currentBranch: "feature/mix",
+                changedFiles: ["routes.ts"],
+                newEndpoints: [
+                    { path: "/api/products", methods: [
+                            { method: "GET", sourceFile: "routes.ts", interactionCount: 0 },
+                            { method: "POST", sourceFile: "routes.ts", interactionCount: 0 },
+                        ] },
+                ],
+                modifiedEndpoints: [
+                    { path: "/api/orders", methods: [{ method: "POST", sourceFile: "routes.ts", changeType: "modified" }] },
+                ],
+                affectedServices: [],
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 10);
+        // Both products and orders should be in changed section
+        expect(prompt).toMatch(/Changed in this PR:[\s\S]*GET \/api\/products/);
+        expect(prompt).toMatch(/Changed in this PR:[\s\S]*POST \/api\/orders/);
+    });
+});
+// ---------------------------------------------------------------------------
+// Tests — Removed endpoint [removed] marker in prompt (Fix 3 verification)
+// ---------------------------------------------------------------------------
+describe("buildRecommendationPrompt — removed endpoint listing", () => {
+    it("appends [removed] marker for removed endpoints not in current catalog", () => {
+        const analysis = minimalAnalysis({
+            apiEndpoints: {
+                totalCount: 1,
+                baseUrl: "http://localhost:3000",
+                endpoints: [{
+                        path: "/api/items",
+                        resourceGroup: "items",
+                        pathParams: [],
+                        methods: [{
+                                method: "GET", description: "List items", queryParams: [],
+                                authRequired: false, sourceFile: "routes.ts", interactions: [],
+                            }],
+                    }],
+            },
+            branchDiffContext: {
+                baseBranch: "main",
+                currentBranch: "feature/remove",
+                changedFiles: ["routes.ts"],
+                newEndpoints: [],
+                modifiedEndpoints: [],
+                removedEndpoints: [{
+                        path: "/api/legacy",
+                        methods: [{ method: "DELETE", sourceFile: "routes.ts", changeType: "removed" }],
+                    }],
+                affectedServices: [],
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 10);
+        expect(prompt).toContain("DELETE /api/legacy [removed]");
+        expect(prompt).toContain("Changed in this PR");
+    });
+});
+// ---------------------------------------------------------------------------
 // Tests — Long-context best practices: XML tags structure
 // ---------------------------------------------------------------------------
 describe("buildRecommendationPrompt — XML tag structure (long-context best practice)", () => {
@@ -1373,3 +1518,90 @@ describe("externalDedupKey", () => {
         expect(externalDedupKey(scenario)).toBe("POST::orders::contract");
     });
 });
+// ---------------------------------------------------------------------------
+// Tests — UI-only PR classification fix
+// ---------------------------------------------------------------------------
+describe("buildRecommendationPrompt — isUIOnlyPR classification", () => {
+    it("does not classify as UI-only when backend service files changed but no endpoints detected", () => {
+        const analysis = minimalAnalysis({
+            branchDiffContext: {
+                baseBranch: "main",
+                currentBranch: "feature/field-rbac",
+                changedFiles: [
+                    "api/src/services/items.ts",
+                    "api/src/services/permissions.ts",
+                    "api/src/middleware/validate-access.ts",
+                    "app/src/components/fields.vue",
+                ],
+                newEndpoints: [],
+                modifiedEndpoints: [],
+                affectedServices: ["items", "permissions"],
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 6);
+        expect(prompt).toContain("Endpoint Discovery Required");
+        expect(prompt).not.toContain("UI-only PR");
+        expect(prompt).not.toContain("frontend-only PR — set **100% UI/E2E**");
+    });
+    it("correctly classifies as UI-only when only frontend files changed", () => {
+        const analysis = minimalAnalysis({
+            branchDiffContext: {
+                baseBranch: "main",
+                currentBranch: "feature/ui-tweak",
+                changedFiles: [
+                    "app/src/components/fields.vue",
+                    "app/src/views/settings.vue",
+                ],
+                newEndpoints: [],
+                modifiedEndpoints: [],
+                affectedServices: [],
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 6);
+        expect(prompt).toContain("UI-only PR");
+        expect(prompt).not.toContain("Endpoint Discovery Required");
+    });
+    it("does not classify as UI-only when endpoints are directly detected", () => {
+        const analysis = minimalAnalysis({
+            branchDiffContext: {
+                baseBranch: "main",
+                currentBranch: "feature/new-route",
+                changedFiles: [
+                    "api/src/routes/items.ts",
+                    "app/src/components/fields.vue",
+                ],
+                newEndpoints: [{
+                        path: "/api/items",
+                        methods: [{ method: "POST", sourceFile: "api/src/routes/items.ts", interactionCount: 1 }],
+                    }],
+                modifiedEndpoints: [],
+                affectedServices: ["items"],
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 6);
+        expect(prompt).not.toContain("UI-only PR");
+        expect(prompt).toContain("Mixed PR");
+    });
+    it("backend-only PR: no UI-only or Mixed classification in mode preamble", () => {
+        const analysis = minimalAnalysis({
+            branchDiffContext: {
+                baseBranch: "main",
+                currentBranch: "feature/add-endpoint",
+                changedFiles: [
+                    "api/src/routes/orders.ts",
+                    "api/src/services/orders.ts",
+                ],
+                newEndpoints: [{
+                        path: "/api/orders",
+                        methods: [{ method: "POST", sourceFile: "api/src/routes/orders.ts", interactionCount: 2 }],
+                    }],
+                modifiedEndpoints: [],
+                affectedServices: ["orders"],
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 6);
+        // Mode preamble should NOT label this as UI-only or Mixed
+        expect(prompt).not.toContain("**UI-only PR**");
+        expect(prompt).not.toContain("**Mixed PR**");
+    });
+});

package/build/prompts/testbot/testbot-prompts.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { AnalyticsService } from "../../services/AnalyticsService.js";
 import { MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, PATH_PARAM_UUID_GUIDANCE, AUTH_CONFLICT_ERROR_MSG, } from "../test-recommendation/recommendationSections.js";
 import { buildDriftAnalysisPrompt } from "../test-maintenance/drift-analysis-prompt.js";
 import { getTraceRecordingPromptText } from "../../playwright/traceRecordingPrompt.js";
-import { isContractConsumerModeEnabled } from "../../utils/featureFlags.js";
+import { isContractConsumerModeEnabled, resolveServiceDetailsRef } from "../../utils/featureFlags.js";
 import { readWorkspaceConfigRaw } from "../../utils/workspaceAuth.js";
 // Cached at module-load — the flag is process-wide and cannot change per call.
 const CONSUMER_MODE_ENABLED = isContractConsumerModeEnabled();
@@ -100,16 +100,23 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
   Keep advancing until you have created exactly ${maxGenerate} new test files OR exhausted all candidates.
 - Example: If enrichment reveals that sending \`discount_value\` without \`discount_type\` silently orphans the value (a concrete bug), complete all planned GENERATE items first, then generate this discovered scenario as an extra test and report it in \`newTestsCreated\`.
 - Total generated: Follow the "Budget: N generate" line in the Execution Plan. Process every GENERATE-tagged item in order. Backfill from ADDITIONAL candidates (highest-ranked first) until \`newTestsCreated\` reaches ${maxGenerate} or all candidates are exhausted.
-- **UI test priority**: If the diff contains frontend/UI changes (e.g. \`.tsx\`, \`.jsx\`, \`.vue\`, \`.svelte\` files), you MUST attempt to generate at least one UI test. Use \`browser_navigate\` to the app's base URL — if the app responds, record a trace and generate the test.
-  **Skip only if one of these conditions is met:**
+- **Backend tests come first (MANDATORY):** All pre-ranked GENERATE items are backend tests (contract/integration). You MUST generate them before spending budget on UI tests. UI/E2E tests fill the Budget Plan's UI% allocation AFTER backend GENERATE items are complete — they do NOT replace backend tests.
+- **Backfill priority (MANDATORY):** When filling budget slots beyond the pre-ranked GENERATE items, follow this order strictly:
+  1. PR-endpoint edge cases — error paths (404, 422), auth boundary (401/403), validation for endpoints changed in this PR
+  2. Same-resource alternative flows — different HTTP methods or state variations on the same resource
+  3. Cross-resource workflows involving a PR endpoint
+  4. UI/E2E tests per your Budget Plan's UI% allocation
+  5. Unrelated endpoint coverage — NEVER backfill with tests for endpoints or pages not touched by this PR unless ALL options 1–4 are exhausted
+- **UI test generation** (only when Budget Plan allocates UI% > 0): Use \`browser_navigate\` to the app's base URL — if the app responds, record a trace and generate the test.
+  **Skip UI only if one of these conditions is met:**
   - **(a) App is unreachable** — \`browser_navigate\` fails or connection is refused.
-  - **(b) Unintegrated non-route component** — the changed file is a leaf component (not a framework route/entrypoint) that has no integration point in the running app. To confirm:
+  - **(b) Budget Plan allocates 0% UI/E2E** (backend-only PR with no frontend files changed).
+  - **(c) Unintegrated non-route component** — the changed file is a leaf component (not a framework route/entrypoint) that has no integration point in the running app. To confirm:
     1. Grep for the component's exported name AND its module path/filename across all production source files (excluding \`*.test.*\`, \`*.spec.*\`, \`*.stories.*\`, \`__tests__/\` directories — only production code imports count).
     2. If no production file imports, re-exports, or renders it, the component has no DOM node in the running app → unintegrated.
     3. **Exception**: if the same PR also adds a route/page file (e.g. under Next.js \`pages/\` or \`app/\`) that imports the component, the route IS the integration point — test through it.
   **Never** apply the unintegrated heuristic to framework route/entrypoint files themselves — those are always reachable by convention.
   **Never** generate tests for unrelated pages as a substitute for an unintegrated component.
-  This rule takes priority over generating additional backend-only tests.
 - **Always generate a test for critical bugs, even if it will fail.** When a GENERATE-tagged item targets a page or endpoint with a known bug, do NOT skip it because you expect the test to fail — a failing test that documents a bug is more valuable than a text-only description. This applies within the existing GENERATE budget; do not add extra tests beyond the plan.
    - For UI rendering bugs: navigate to the broken page and add a \`browser_assert\` that verifies the page rendered its expected content (e.g. assert the page heading is visible). The assertion will fail on the broken page, which is the correct outcome — it documents the bug as a failing test.
    - The assertion MUST target the broken page itself, not a different page that works. If \`/orders/{id}/edit\` crashes, assert on \`/orders/{id}/edit\` (e.g. "Edit Order" heading visible), NOT on \`/orders\`.
@@ -121,8 +128,8 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
 - Critical-category tests are already ranked first by the pre-computed scores — follow the plan order.
 **Auth — determine ONCE, apply to EVERY tool call:**
-1. Read auth params from the Execution Plan returned by \`skyramp_analyze_changes\` — they are resolved directly from workspace.yml. **Use these as-is; do not infer or override.**
-2. If workspace shows \`authType: none\` or \`authHeader: ""\` → proceed with no auth (\`authHeader: ""\`). If tests fail due to 401/403, add to \`issuesFound\`: "Auth may be required — update \`api.authType\` in workspace.yml."
+1. Read auth params from the Execution Plan returned by \`skyramp_analyze_changes\` — they are pre-resolved from ${resolveServiceDetailsRef().authSourceRef}. **Use these as-is; do not infer or override.**
+2. If workspace shows \`authType: none\` or \`authHeader: ""\` → proceed with no auth (\`authHeader: ""\`). If tests fail due to 401/403, add to \`issuesFound\`: "Auth may be required — update \`api.authType\` in ${resolveServiceDetailsRef().authSourceRef}."
 3. **Auth params by header type — quick reference:**
    | \`authHeader\` | \`authType\` examples | \`skyramp_batch_scenario_*\` / \`skyramp_contract_*\` | \`skyramp_integration_test_generation\` (scenarioFile) |
@@ -133,7 +140,7 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
    | none / \`""\` | \`none\` | \`authHeader: ""\` only when endpoint confirmed unauthenticated | \`authHeader: ""\` |
    **Omit \`authToken\` entirely** — \`SKYRAMP_PLACEHOLDER_TOKEN\` is auto-inserted at execution time.
-   The \`authScheme\` for \`Authorization\` headers is pre-resolved in the Execution Plan — use it exactly (e.g. \`"Bearer"\`, \`"Token"\`, or a custom scheme from \`api.authScheme\` in workspace.yml).
+   The \`authScheme\` for \`Authorization\` headers is pre-resolved in the Execution Plan — use it exactly (e.g. \`"Bearer"\`, \`"Token"\`, or a custom scheme from ${resolveServiceDetailsRef().authSourceRef}).
    Passing auth alongside workspace \`authType\` on \`skyramp_integration_test_generation\` causes "${AUTH_CONFLICT_ERROR_MSG}" — follow the table.
 4. Only pass \`authHeader: ""\` if you can confirm the endpoint is truly unauthenticated.
@@ -141,7 +148,7 @@ ${userPrompt ? "Generate only the tests that the user requested from the Additio
 **How to generate each type (for ADD):**
 - **Integration**: call \`skyramp_batch_scenario_test_generation\` with ALL steps in a single call (pass the \`steps\` array with method, path, requestBody, statusCode for each step). Then call \`skyramp_integration_test_generation\` with the returned scenario file.
   **Use the pre-built scenario JSON from the Execution Plan** — pass the steps array directly. Do NOT read source code models to construct request bodies if the plan already provides them.
-  Scenario JSON and test files go in the \`testDirectory\` from \`workspace.yml\` (visible in the service context block at the top of this prompt). Do NOT create a new \`tests/\` directory at the repo root — use the path the workspace config specifies. If no \`testDirectory\` is configured, default to the language-conventional location (e.g. \`src/test/java/...\` for Java, \`tests/\` for Python).
+  Scenario JSON and test files go in ${resolveServiceDetailsRef().testDirRef}. Do NOT create a new \`tests/\` directory at the repo root — use that path. If not configured, default to the language-conventional location (e.g. \`src/test/java/...\` for Java, \`tests/\` for Python).
   **Pipeline for speed**: Call ALL \`skyramp_batch_scenario_test_generation\` calls in one batch. When they return, call ALL \`skyramp_integration_test_generation\` calls in the next batch. Do NOT serialize per-scenario (batch→integration→batch→integration) — batch ALL scenarios first, then generate ALL integration tests.
 - **Contract**: call \`skyramp_contract_test_generation\` with \`endpointURL\`, \`method\`, and \`requestData\` for POST/PUT/PATCH.
   Pass \`apiSchema\` if an OpenAPI spec exists.
@@ -149,6 +156,7 @@ ${CONTRACT_MODE_GUIDANCE}
 - ${PATH_PARAM_UUID_GUIDANCE}
 - **UI**: First check for existing Playwright trace \`.zip\` files in the repo (Testbot scans recursively up to 5 directory levels — the per-service output directories, \`frontend/\`, \`public/\`, \`.skyramp/\`, or any subdirectory).
   If a relevant trace exists (covers the UI changes in this PR), use it directly with \`skyramp_ui_test_generation\` and \`modularizeCode: false\`.
+  **Output directory**: When calling \`skyramp_ui_test_generation\`, set \`outputDir\` to ${resolveServiceDetailsRef().frontendTestDirRef} — NOT \`.skyramp/\` (that directory is only for trace \`.zip\` files and workspace config).
   If NO relevant trace exists, **you MUST write out your full trace plan as text BEFORE calling \`browser_navigate\`**. Do not touch the browser until the plan is written.
   **Browser authentication (check BEFORE navigating)**: If \`<ui-credentials>\` appears in your context above, the app requires login. Parse the credentials — each line is \`username:password\`. Type the values verbatim (they are not encoded or escaped). Before navigating to ANY feature URL:

package/build/services/ScenarioGenerationService.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { AUTH_PLACEHOLDER_TOKEN } from "../types/TestTypes.js";
 import { isAuthorizationHeaderName } from "../utils/workspaceAuth.js";
+import { inferExpectedStatus } from "../utils/httpDefaults.js";
 import { logger } from "../utils/logger.js";
 import fs from "fs";
 import path from "path";
@@ -124,7 +125,7 @@ ${JSON.stringify(traceRequest, null, 2)}
         }
         const timestamp = new Date().toISOString();
         const method = params.method;
-        const statusCode = params.statusCode ?? (method === "POST" ? 201 : method === "DELETE" ? 204 : 200);
+        const statusCode = params.statusCode ?? inferExpectedStatus(method);
         const requestBody = params.requestBody ||
             (method === "GET" || method === "DELETE" ? "" : "{}");
         const responseHeaders = params.responseHeaders

package/build/services/TestDiscoveryService.js CHANGED Viewed

@@ -62,10 +62,11 @@ export class TestDiscoveryService {
      * Uses fast-glob for cross-platform file scanning, then classifies discovered files
      * as Skyramp-generated tests, external tests, or not-a-test during processing.
      *
-     * When `options.changedResources` is provided (PR mode), external files are partitioned
-     * by relevance: files whose path/name overlaps with the changed resource names get full
-     * endpoint extraction; low-relevance files are returned as name-only entries (no reads).
-     * This eliminates the old hard cap while keeping state file size bounded.
+     * External test handling depends on `options.changedResources`:
+     * - `string[]` with entries (PR mode, endpoints detected): partition by relevance.
+     * - `[]` empty array (PR mode, scanner found no endpoints): skip external tests entirely
+     *   rather than flooding context with irrelevant files.
+     * - `undefined` (full-repo mode, no diff): cap at MAX_EXTERNAL_FULL_REPO.
      */
     async discoverTests(repositoryPath, options = {}) {
         logger.info(`Starting test discovery in: ${repositoryPath}`);
@@ -86,23 +87,37 @@ export class TestDiscoveryService {
         skyrampTests.forEach(t => { t.source = TestSource.Skyramp; });
         // Partition external tests into relevant (full extraction) and low-relevance (name-only).
         //
-        // PR mode (changedResources provided):
+        // PR mode + endpoints detected (changedResources is non-empty array):
         //   Files whose path/name token-overlaps with the changed resource names are "relevant".
         //   Only they get full endpoint extraction. Low-relevance files get name-only entries.
         //   No hard cap — the relevance filter naturally bounds the read set to PR scope.
+        //   The sentinel ["unknown"] falls into this branch — most files score 0 (low-relevance)
+        //   and get name-only entries, so external coverage is preserved without context flood.
         //
-        // Full-repo mode (no changedResources):
+        // PR mode + truly no endpoints (changedResources is empty array []):
+        //   Diff contained no endpoints at all (new, modified, or removed) — skip external
+        //   tests entirely rather than flooding the prompt with hundreds of irrelevant files.
+        //
+        // Full-repo mode (changedResources is undefined):
         //   No diff context — all external files treated as potentially relevant.
         //   Cap at MAX_EXTERNAL_FULL_REPO to avoid reading hundreds of files.
         const { changedResources } = options;
         let relevantExternal;
         let otherExternal;
         if (changedResources?.length) {
+            // PR mode with detected endpoints — partition by relevance
             ({ relevant: relevantExternal, other: otherExternal } =
                 this.partitionByRelevance(classified.external, changedResources));
         }
+        else if (changedResources !== undefined) {
+            // PR mode with an explicit empty endpoint list from diff parsing — don't flood
+            // context with irrelevant external tests. The LLM will work from Skyramp tests
+            // and scanned endpoints only.
+            relevantExternal = [];
+            otherExternal = [];
+        }
         else {
-            // Full-repo mode: cap full-extraction set, remaining become name-only
+            // Full-repo mode (no diff context): cap full-extraction set, remaining become name-only
             relevantExternal = classified.external.slice(0, this.MAX_EXTERNAL_FULL_REPO);
             otherExternal = classified.external.slice(this.MAX_EXTERNAL_FULL_REPO);
         }

package/build/services/TestDiscoveryService.test.js CHANGED Viewed

@@ -348,6 +348,50 @@ describe("TestDiscoveryService", () => {
             const withEndpoints = externalTests.filter(t => t.apiEndpoint !== "");
             expect(withEndpoints.length).toBe(12);
         });
+        it("returns zero external tests when changedResources is empty array (PR mode, no endpoints)", async () => {
+            // Simulate PR mode where a parsed diff produced no detected endpoints:
+            // newEndpoints=[], modifiedEndpoints=[], and removedEndpoints=[] → changedResources = []
+            writeFile("test_orders_api.py", 'import requests\nrequests.get("/api/orders")');
+            writeFile("test_products_api.py", 'import requests\nrequests.get("/api/products")');
+            const result = await service.discoverTests(tmpDir, { changedResources: [] });
+            const externalTests = result.tests.filter(t => t.source === TestSource.External);
+            // Empty changedResources = PR mode with no detected endpoints → zero external tests
+            expect(externalTests.length).toBe(0);
+            expect(result.relevantExternalTestPaths.length).toBe(0);
+        });
+        it("still returns external tests in full-repo mode (changedResources undefined)", async () => {
+            // Full-repo mode: changedResources not provided → should use capped full-repo behavior
+            writeFile("test_orders_api.py", 'import requests\nrequests.get("/api/orders")');
+            writeFile("test_products_api.py", 'import requests\nrequests.get("/api/products")');
+            const result = await service.discoverTests(tmpDir); // no options → undefined
+            const externalTests = result.tests.filter(t => t.source === TestSource.External);
+            expect(externalTests.length).toBe(2);
+            expect(result.relevantExternalTestPaths.length).toBe(2);
+        });
+        it("Skyramp tests are unaffected by empty changedResources", async () => {
+            writeFile("tests/test_orders_smoke.py", '# Generated by Skyramp\nskyramp generate smoke rest');
+            writeFile("test_external.py", 'import pytest\ndef test(): pass');
+            const result = await service.discoverTests(tmpDir, { changedResources: [] });
+            const skyrampTests = result.tests.filter(t => t.source === TestSource.Skyramp);
+            const externalTests = result.tests.filter(t => t.source === TestSource.External);
+            // Skyramp tests always discovered regardless of changedResources
+            expect(skyrampTests.length).toBe(1);
+            // External tests suppressed in PR-mode-no-endpoints
+            expect(externalTests.length).toBe(0);
+        });
+        it("returns external tests as name-only with ['unknown'] sentinel (unresolvable resources)", async () => {
+            // When diff endpoints exist but all paths resolve to "unknown" (e.g. decorator-relative
+            // paths like "/{order_id}"), changedResources = ["unknown"]. External tests should be
+            // discovered (not skipped) but scored as low-relevance since "unknown" won't match filenames.
+            writeFile("test_orders_api.py", 'import requests\nrequests.get("/api/orders")');
+            writeFile("test_products_api.py", 'import requests\nrequests.get("/api/products")');
+            const result = await service.discoverTests(tmpDir, { changedResources: ["unknown"] });
+            const externalTests = result.tests.filter(t => t.source === TestSource.External);
+            // External tests discovered (not skipped like empty array)
+            expect(externalTests.length).toBe(2);
+            // But all are low-relevance (name-only) since "unknown" doesn't match any filename tokens
+            expect(result.relevantExternalTestPaths.length).toBe(0);
+        });
         it("low-relevance files have empty apiEndpoint and empty framework in PR mode", async () => {
             writeFile("test_orders_api.py", 'import requests\nrequests.get("/api/orders")');
             writeFile("test_products_api.py", 'import requests\nrequests.get("/api/products")');

package/build/tools/generate-tests/generateBatchScenarioRestTool.js CHANGED Viewed

@@ -57,8 +57,7 @@ export const stepSchema = z.object({
         .describe("JSON string of the expected response body"),
     statusCode: z
         .number()
-        .optional()
-        .describe("Expected HTTP status code. Defaults: POST→201, DELETE→204, GET/PUT/PATCH→200."),
+        .describe("Expected HTTP status code — determine from the source code (e.g. 200, 201, 204)."),
     responseHeaders: z
         .record(z.array(z.string()))
         .optional()
@@ -141,7 +140,7 @@ export function registerBatchScenarioTestTool(server) {
 This tool accepts AI-parsed structured parameters from a natural language scenario description. Analyze the scenario and provide structured parameters for all steps.
 1. **Dynamic context**: If \`skyramp_analyze_changes\` has already run and returned a \`sessionId\`, fetch endpoint detail before building each step: \`skyramp://analysis/{sessionId}/endpoints/{path}/{method}\`. This gives you exact request body fields, types, and required vs optional — use it instead of guessing from field names.
 2. **Endpoints**: Confirm each step's method + path exists as a real endpoint (from OpenAPI spec, source code routes, or skyramp_analyze_changes output). Do NOT invent paths.
-3. **Status codes**: Confirm expected status code per step (defaults: POST→201, DELETE→204, GET/PUT/PATCH→200 — note if non-standard).
+3. **HTTP Status Codes**: Determine expected HTTP status code per step from the source code — do not assume conventions.
 4. **Request bodies**: Identify each field and its source (schema / prior step response / user input). For GET/DELETE steps, confirm filters go in queryParams — NEVER in requestBody.
 5. **Chaining**: For steps that use an ID from a prior step, use the same concrete value in the path (e.g. \`/api/v1/orders/1\`) that will appear in the prior step's response body (e.g. \`{"id": 1}\`). The backend auto-detects chaining by matching values across step responses.
 6. **Echo-back fields**: Identify which request body fields will be returned unchanged in the response — these will need exact-value assertions in the generated test.
@@ -259,7 +258,7 @@ Call \`skyramp_integration_test_generation\` with the returned \`scenarioFile\`
                     type: "text",
                     text: `**Batch Scenario Generated — ${stepCount} steps**\n\n`
                         + `**Scenario:** ${params.scenarioName}\n`
-                        + `**Steps:**\n${steps.map((s, i) => `  ${i + 1}. ${s.method} ${s.path} → ${s.statusCode ?? "default"}`).join("\n")}\n\n`
+                        + `**Steps:**\n${steps.map((s, i) => `  ${i + 1}. ${s.method} ${s.path} → ${s.statusCode ?? ""}`).join("\n")}\n\n`
                         + `**File:** ${filePath}\n\n`
                         + `**Next:** Call \`skyramp_integration_test_generation\` with \`scenarioFile: "${filePath}"\``,
                 },