npm - @skyramp/mcp - Versions diffs - 0.0.64-rc.4 → 0.0.64-rc.6 - Mend

@skyramp/mcp 0.0.64-rc.4 → 0.0.64-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/build/prompts/test-recommendation/test-recommendation-prompt.js +182 -16
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +580 -11
package/build/prompts/testbot/testbot-prompts.js +6 -6
package/build/utils/trace-parser.js +21 -3
package/build/utils/trace-parser.test.js +140 -0
package/package.json +1 -1

package/build/prompts/test-recommendation/test-recommendation-prompt.js CHANGED Viewed

@@ -44,7 +44,16 @@ function computeTiebreakerSeed(endpoints, diffFiles) {
     return crypto.createHash("sha256").update(canonical).digest("hex").slice(0, 8);
 }
 // ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
-function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
+function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, isDiffScope = false, isFrontendProject = false, isFrontendOnlyProject = false) {
+    // Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
+    // Capped so E2E+UI together never exceed topN.
+    // Referenced in supplementNote below, but the ternary conditions that use them
+    // (`isFrontendProject && !isDiffScope`) are always false in PR/diff mode.
+    const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
+    const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
+    const slotsFloor = Math.floor(topN / 2);
+    const minE2ESlots = Math.min(rawE2E, slotsFloor);
+    const minUISlots = Math.min(rawUI, Math.max(0, topN - minE2ESlots));
     const generateItems = scored.slice(0, Math.min(maxGen, scored.length));
     const additionalItems = scored.slice(maxGen, topN);
     const authRef = authHeaderValue
@@ -109,9 +118,10 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
                 prereqNote);
         }
     }).join("\n\n");
-    // For mixed PRs, always reserve slots for UI and E2E recommendations regardless of whether
-    // traces already exist — the user can record them later or the bot can record during the run.
-    const needsE2ESlot = hasFrontendChanges && !isUIOnlyPR;
+    // Reserve slots for UI/E2E ADDITIONAL recommendations on mixed PRs.
+    // E2E requires traces to generate — only reserve the slot when traces are available.
+    // UI can be recommended without traces (agent can record inline).
+    const needsE2ESlot = hasFrontendChanges && !isUIOnlyPR && hasTraces;
     const needsUISlot = hasFrontendChanges && !isUIOnlyPR;
     const frontendSlots = (needsE2ESlot ? 1 : 0) + (needsUISlot ? 1 : 0);
     const backendAdditionalItems = frontendSlots > 0
@@ -143,8 +153,159 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
     })() : "";
     const supplementCount = topN - generateItems.length - backendAdditionalItems.length - frontendSlots;
     const supplementNote = supplementCount > 0
-        ? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them from endpoint interactions and source code patterns not yet covered. Use the same 5-dimension rubric and quality gate to assign priority (HIGH/MEDIUM/LOW), testType, and category.${hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT produce fewer than ${topN} total.`
+        ? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them in this priority order — exhaust each tier before moving to the next:\n\n**Tier 1:** Edge-case and error-path tests for endpoints already in the ${isDiffScope ? "GENERATE set" : "list"} — boundary values for numeric fields (e.g. 0%, 100%, >100% discount), invalid/non-existent IDs (→ 404), empty arrays where a minimum is required, missing required fields (→ 422), auth boundary (call without Authorization header → 403/401).\n\n**Tier 2:** Auth-boundary contract tests for any endpoint not yet covered.\n\n**Tier 3:** Cross-resource integration tests — ONLY when one resource's POST body contains the other's \`_id\` field. NEVER pair resources where neither POST body has the other's ID.\n\n**Tier 4:** CRUD lifecycle tests for any resource not yet covered.\n\nUse the same 5-dimension rubric to assign priority (HIGH/MEDIUM/LOW), testType, and category. For each supplement item, apply the same source-code enrichment from Step 1 — use real field names from the route handler, not generic placeholders.${isFrontendOnlyProject && !isDiffScope ? ` Since this is a frontend repo, the supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.` : isFrontendProject && !isDiffScope ? ` Since this is a full-stack repo, the supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\` — full browser-to-backend flow) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\` — component-level interaction flows). Add these before exhausting backend tiers.` : hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT produce fewer than ${topN} total.`
         : "";
+    // ── Full-repo mode: recommendations only, no execution ──────────────────
+    if (!isDiffScope) {
+        const toTitle = (name) => name.replace(/-/g, " ").replace(/\b\w/g, c => c.toUpperCase());
+        // Coverage ranking (highest to lowest breadth):
+        // E2E first:          full browser-to-backend flow — exercises both frontend and backend.
+        // UI second:          frontend components call backend APIs — also exercises backend.
+        // Integration third:  backend API chains validated directly.
+        // Contract last:      single-endpoint boundary only.
+        const TYPE_ORDER = ["e2e", "ui", "integration", "contract"];
+        const TYPE_LABEL = {
+            e2e: "E2E", ui: "UI", integration: "Integration", contract: "Contract",
+        };
+        // All scored items up to topN, already sorted by priority/novelty
+        const allItems = scored.slice(0, topN);
+        // Group by test type while preserving priority ordering within each group
+        const byType = new Map();
+        for (const t of TYPE_ORDER)
+            byType.set(t, []);
+        for (const item of allItems) {
+            const t = item.scenario.testType ?? (item.scenario.steps.length === 1 ? "contract" : "integration");
+            if (!byType.has(t))
+                byType.set(t, []);
+            byType.get(t).push(item);
+        }
+        const renderItem = (item, rank) => {
+            const s = item.scenario;
+            const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
+            const title = toTitle(s.scenarioName);
+            if (testType === "contract") {
+                const step = s.steps[0];
+                const endpointURL = `${baseUrl}${step.path}`;
+                const isBodyMethod = ["POST", "PUT", "PATCH"].includes(step.method);
+                const dataParam = isBodyMethod
+                    ? `, requestData: <${step.method} ${step.path} required fields from source code>`
+                    : "";
+                return [
+                    `**${rank}. ${title}**`,
+                    `  ${s.description}`,
+                    `  ${step.method} ${step.path} → ${step.expectedStatusCode}`,
+                    `  Tool: \`skyramp_contract_test_generation({ endpointURL: "${endpointURL}", method: "${step.method}"${authRef}${dataParam} })\``,
+                    `  From source: fill in requestData field names and the specific production boundary this validates`,
+                ].join("\n");
+            }
+            else {
+                const stepLines = s.steps.map(st => {
+                    const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
+                    const bodyHint = isBody ? ` — body: <${st.method} ${st.path} required fields from source>` : "";
+                    return `  ${st.order}. ${st.method} ${st.path} → ${st.expectedStatusCode}: ${st.description}${bodyHint}`;
+                }).join("\n");
+                const toolCalls = s.steps.map(st => {
+                    const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
+                    const dataParam = isBody
+                        ? `, requestBody: <${st.method} ${st.path} required fields from source>`
+                        : "";
+                    return `    skyramp_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${s.scenarioName}", baseURL: "${baseUrl}", method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${scenarioAuthRef}${dataParam} })`;
+                }).join("\n");
+                // E2E and UI use trace-based generation, not the scenario pipeline.
+                // Only emit per-step skyramp_scenario_test_generation calls for integration type.
+                const isTraceBased = testType === "e2e" || testType === "ui";
+                const finalTool = testType === "e2e"
+                    ? `skyramp_e2e_test_generation({ playwrightZip: "<trace zip path>", traceFile: "<backend trace path>"${authHeaderOnlyRef} })`
+                    : testType === "ui"
+                        ? `skyramp_ui_test_generation({ playwrightZip: "<trace zip path>"${authHeaderOnlyRef} })`
+                        : `skyramp_integration_test_generation({ scenarioFile: "scenario_${s.scenarioName}.json"${authHeaderOnlyRef} })`;
+                const toolCallsBlock = isTraceBased
+                    ? `    ${finalTool}`
+                    : `${toolCalls}\n    ${finalTool}`;
+                return [
+                    `**${rank}. ${title}**`,
+                    `  ${s.description}`,
+                    `  Steps:`,
+                    stepLines,
+                    `  Tool calls:`,
+                    toolCallsBlock,
+                    `  From source: fill in requestBody field values and assert all computed response fields`,
+                ].join("\n");
+            }
+        };
+        const sections = TYPE_ORDER
+            .filter(t => (byType.get(t) ?? []).length > 0)
+            .map(t => {
+            const items = byType.get(t);
+            const label = TYPE_LABEL[t];
+            let globalRank = 0;
+            for (const prev of TYPE_ORDER) {
+                if (prev === t)
+                    break;
+                globalRank += (byType.get(prev) ?? []).length;
+            }
+            const entries = items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n");
+            return `### ${label} (${items.length})\n\n${entries}`;
+        })
+            .join("\n\n");
+        const repoSupplementNote = supplementNote; // already built above with isDiffScope=false tier ordering
+        return `## Test Recommendations (${topN} total)
+> **Repo mode — no tests are executed.** Use the tool calls below to generate any recommendation on demand.
+> Highest-value tests appear first within each type. Use the "From source" hint in each item to fill in field names and assertions before calling the tool.
+**Step 1 — Source-Code Enrichment (MANDATORY before presenting anything)**
+For each endpoint listed in the Repository Context above, read the route handler source code. Look for:
+- **All required request body fields** (names and types) for POST/PUT/PATCH — use in step descriptions and tool call params
+- **Computed/derived response fields** (e.g. \`total_amount\`, \`discount_percent\`) and their formulas — assert these
+- **Auth middleware** (HTTPBearer, \`Depends(get_current_user)\`, \`@UseGuards\`, \`jwt.verify\`) — set \`authHeader\`/\`authScheme\`; FastAPI HTTPBearer → **403** (not 401)
+- **Storage backend** — if Redis or schema-less, discard unique-constraint and cascade-delete scenarios
+- **Delete behavior** — hard-delete → 204; soft-delete/cancel → 200
+Do NOT present generic placeholders. Replace every \`<... from source>\` hint with actual field names and realistic values.
+${buildTestPatternGuidelines()}
+${buildTestExamples()}
+**Cascade vs referential integrity:** Keep only the scenario that matches the actual FK delete policy in source (CASCADE → cascade-delete; RESTRICT/no cascade → delete-blocked). Remove the other silently — do NOT create a "Removed Recommendations" or "Not Applicable" section.
+**Unique constraints:** If the storage backend is Redis, in-memory, or schema-less with no explicit \`UNIQUE\` index, discard unique-constraint scenarios entirely and replace them with a different high-value test. Do NOT list removed scenarios.
+**NEVER create a "Removed Recommendations", "Not Applicable", or similar section.** If a scenario is inapplicable, silently replace it with an equivalent-priority scenario from the supplement tiers. The output must contain ONLY the ${topN} recommendations.
+${buildTestQualityCriteria()}
+**5-dimension rubric — use to assign priority for supplement items:**
+| Dimension | What to assess |
+| Production Safety | Guards a critical boundary (auth, unique constraint, cascade delete, data integrity, breaking migration)? → HIGH |
+| Bug-Finding Potential | Targets a known failure mode (race condition, data consistency, state transition, cascade effect)? → HIGH |
+| User Journey Relevance | Reflects how real users interact (from traces, business flows, critical paths)? → HIGH or MEDIUM |
+| Coverage Gap | Addresses an area with zero existing test coverage? → bump up one tier |
+| Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
+**Per-recommendation format (apply to ALL items):**
+- Title and one-sentence description of what it validates (business rule, not just "tests the endpoint")
+- Steps with concrete field names and realistic values derived from source code
+- Ready-to-use tool call — replace all \`<...>\` placeholders with real values before presenting
+- "From source" note — the specific production risk or business rule this prevents
+**MANDATORY: Every pre-ranked item listed above MUST appear in your output — do not drop or skip any.**
+${sections}
+${repoSupplementNote}
+**Test type mix — MANDATORY:**
+${isFrontendOnlyProject
+            ? `This is a frontend repo. Focus on E2E and UI tests only — E2E covers the full browser-to-backend flow (highest coverage), UI exercises frontend components that call backend APIs. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
+            : isFrontendProject
+                ? `This is a full-stack repo. Coverage ranking: E2E (full browser-to-backend flow) > UI (frontend exercises backend APIs) > Integration (backend chains) > Contract (single endpoint). Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`), in addition to backend integration and contract tests.`
+                : `Focus on integration and contract tests for all API endpoints.`}
+**No smoke tests. No fuzz tests.**
+**You MUST present EXACTLY ${topN} recommendations. Do NOT execute any tests. Do NOT produce fewer than ${topN}.**`;
+    }
+    // ── PR / branch-diff mode: execution plan ────────────────────────────────
     return `## Execution Plan
 Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length} generate + ${Math.max(topN - generateItems.length, 0)} additional = ${topN} total
@@ -370,7 +531,7 @@ ${detailBlocks}
             const errorA = a.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
             const errorB = b.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
             if (errorB !== errorA)
-                return errorB - errorA;
+                return errorA - errorB;
             // Use locale-independent comparison to avoid runtime-locale non-determinism
             const nameA = a.scenario.scenarioName;
             const nameB = b.scenario.scenarioName;
@@ -427,7 +588,10 @@ Do not churn recommendations without cause.
 `;
     }
     else if (scored.length > 0) {
-        mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces);
+        const projectType = analysis.projectClassification.projectType;
+        const isFrontendProject = projectType === "full-stack" || projectType === "frontend";
+        const isFrontendOnlyProject = projectType === "frontend";
+        mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, isDiffScope, isFrontendProject, isFrontendOnlyProject);
     }
     else {
         mainSection = `
@@ -501,13 +665,12 @@ and adjust the test approach if needed.
             historyBody += `
 ### Previously Recommended (not generated)
 ${recLines}
-**Stability rule**: If a previously recommended test still applies to the current code
-(the endpoint exists, the business logic hasn't changed), carry it forward in your
-additionalRecommendations — match by scenarioName (for multi-step scenarios) or by
-endpoint (for single-endpoint tests). Re-derive category and priority from the test
-content. Do NOT drop a previous recommendation unless the underlying code was removed
-or the test is now covered by a generated test.
-Only add NEW recommendations for code paths introduced in the latest commit.
+**Stability rule**: Carry forward previously recommended tests unchanged in
+additionalRecommendations if they still apply — match by scenarioName (for multi-step
+scenarios) or by endpoint (for single-endpoint tests). Re-derive category and priority
+from the test content. Drop only if the underlying endpoint was removed, business logic
+changed, or the test is now covered by a generated test.
+GENERATE items are always executed regardless of prior recommendations — do not suppress them.
 `;
         }
         prHistorySection = `
@@ -515,8 +678,11 @@ Only add NEW recommendations for code paths introduced in the latest commit.
 Tests from prior bot runs are still in the working tree — the maintenance pipeline
 (Task 2) keeps them up to date. Use the history below to **avoid duplicating** existing
 coverage and to fill gaps:
-- **Do NOT re-recommend** tests listed under "Previously Generated Tests" — they already
-  exist and are maintained automatically.
+- **GENERATE section is unaffected by prior history** — always execute ALL pre-ranked
+  GENERATE items regardless of what was generated in prior runs. The execution pipeline
+  handles deduplication at the file level.
+- Tests listed under "Previously Generated Tests" are maintained automatically by Task 2 —
+  do NOT include them in additionalRecommendations.
 - **Carry forward** previously recommended-but-not-generated tests unchanged in
   additionalRecommendations if they still apply. Promote the highest-priority ones
   into generation slots if capacity allows.

package/build/prompts/test-recommendation/test-recommendation-prompt.test.js CHANGED Viewed

@@ -202,14 +202,15 @@ describe("buildRecommendationPrompt — PR History section", () => {
         expect(prompt).toContain("Promote the highest-priority ones");
         expect(prompt).toContain("into generation slots if capacity allows");
     });
-    it("includes do-not-re-recommend instruction for implemented tests", () => {
+    it("instructs that GENERATE is unaffected by prior history for implemented tests (Gap 4)", () => {
         const ctx = makePRContext({
             previousRecommendations: [
                 { testType: "contract", endpoint: "GET /api/items", status: "implemented", commentId: "1" },
             ],
         });
         const prompt = buildRecommendationPrompt(minimalAnalysis(), "current_branch_diff", 10, ctx);
-        expect(prompt).toContain("Do NOT re-recommend");
+        expect(prompt).toContain("GENERATE section is unaffected by prior history");
+        expect(prompt).not.toContain("Do NOT re-recommend");
         expect(prompt).toContain("Previously Generated Tests");
     });
     it("de-duplicates multi-step scenario entries to one line per scenario", () => {
@@ -281,29 +282,31 @@ function minimalScenario(overrides = {}) {
     };
 }
 describe("buildRecommendationPrompt — Stability and supplement section", () => {
-    it("includes Recommendation Stability section in output when scenarios exist", () => {
+    // Recommendation Stability is a PR-mode (branch_diff) concept — carry-forward across bot runs.
+    // Full-repo mode is presentation-only; there is no previous-run state to carry forward.
+    it("includes Recommendation Stability section in output when scenarios exist (PR mode)", () => {
         const analysis = minimalAnalysis({
             businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
         });
-        const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
+        const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
         expect(prompt).toContain("## Recommendation Stability");
     });
-    it("stability section uses scenarioName/endpoint matching strategy", () => {
+    it("stability section uses scenarioName/endpoint matching strategy (PR mode)", () => {
         const analysis = minimalAnalysis({
             businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
         });
-        const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
+        const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
         const stabilityStart = prompt.indexOf("## Recommendation Stability");
         const stabilityBlock = prompt.slice(stabilityStart, stabilityStart + 500);
         expect(stabilityBlock).toContain("scenarioName");
         expect(stabilityBlock).toContain("endpoint");
         expect(stabilityBlock).toContain("Re-derive category and priority");
     });
-    it("stability section specifies when to drop a recommendation", () => {
+    it("stability section specifies when to drop a recommendation (PR mode)", () => {
         const analysis = minimalAnalysis({
             businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
         });
-        const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
+        const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
         expect(prompt).toContain("target endpoint was removed");
         expect(prompt).toContain("business logic changed");
         expect(prompt).toContain("covered by a generated test");
@@ -321,12 +324,12 @@ describe("buildRecommendationPrompt — Stability and supplement section", () =>
     it("MAX_TESTS_TO_GENERATE is 3", () => {
         expect(MAX_TESTS_TO_GENERATE).toBe(3);
     });
-    it("uses MAX_CRITICAL_TESTS in category-aware selection rules", () => {
+    it("uses MAX_CRITICAL_TESTS in category-aware selection rules (PR mode)", () => {
         const analysis = minimalAnalysis({
             businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
         });
-        const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
-        // The critical-category minimum line references MAX_CRITICAL_TESTS (= 3)
+        // MAX_CRITICAL_TESTS applies to PR mode (GENERATE items) — full_repo mode only presents, does not execute
+        const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
         expect(prompt).toContain("GENERATE items MUST be from HIGH-priority categories");
     });
 });
@@ -369,3 +372,569 @@ describe("PATH_PARAM_UUID_GUIDANCE — no hardcoded UUID anchor", () => {
         expect(prompt).not.toMatch(UUID_V4_REGEX);
     });
 });
+// ---------------------------------------------------------------------------
+// Regression tests — PR #110 quality baseline
+//
+// Guard against regressions in recommendation quality. These tests assert that
+// the key signals that made PR #110's recommendations excellent are present in
+// both full_repo and branch_diff (PR) modes.
+// Baseline: https://github.com/letsramp/demoshop-fullstack/pull/110
+// ---------------------------------------------------------------------------
+function mockDiffScenario(overrides = {}) {
+    return {
+        scenarioName: "orders-update-with-discount",
+        description: "PUT /api/v1/orders/{order_id} with discount_percent — verifies total_amount formula",
+        category: "business_rule",
+        priority: "high",
+        steps: [
+            { order: 1, method: "POST", path: "/api/v1/products", expectedStatusCode: 201, description: "Create product", interactionType: "success" },
+            { order: 2, method: "POST", path: "/api/v1/orders", expectedStatusCode: 201, description: "Create order", interactionType: "success", chainsFrom: { sourceField: "id", sourceStep: 1, sourceLocation: "body", targetParam: "product_id", targetLocation: "body" } },
+            { order: 3, method: "PUT", path: "/api/v1/orders/{order_id}", expectedStatusCode: 200, description: "Apply discount", interactionType: "success", chainsFrom: { sourceField: "order_id", sourceStep: 2, sourceLocation: "body", targetParam: "order_id", targetLocation: "path" } },
+        ],
+        chainingKeys: ["id", "order_id"],
+        requiresAuth: true,
+        estimatedComplexity: "moderate",
+        testType: "integration",
+        ...overrides,
+    };
+}
+function analysisWithScenario(scope) {
+    const base = minimalAnalysis({
+        businessContext: {
+            mainPurpose: "E-commerce demo",
+            userFlows: [],
+            dataFlows: [],
+            integrationPatterns: [],
+            draftedScenarios: [mockDiffScenario()],
+        },
+    });
+    if (scope === "current_branch_diff") {
+        return {
+            ...base,
+            branchDiffContext: {
+                currentBranch: "shiny/edit-order",
+                baseBranch: "main",
+                changedFiles: ["backend/app/routers/orders.py"],
+                newEndpoints: [{
+                        path: "/api/v1/orders/{order_id}",
+                        methods: [{ method: "PUT", sourceFile: "orders.py", interactionCount: 3 }],
+                    }],
+                modifiedEndpoints: [],
+                affectedServices: ["orders"],
+            },
+        };
+    }
+    return base;
+}
+describe("PR #110 quality baseline — full_repo mode", () => {
+    let prompt;
+    beforeAll(() => { prompt = buildRecommendationPrompt(analysisWithScenario("full_repo"), "full_repo", 20); });
+    it("source enrichment targets each endpoint's route handler, not 'changed files'", () => {
+        expect(prompt).toContain("Source-Code Enrichment");
+        expect(prompt).toContain("route handler");
+        expect(prompt).not.toContain("Read the source code for ALL changed files");
+    });
+    it("includes test pattern guidelines for quality anchoring", () => {
+        expect(prompt).toContain("Test Pattern Guidelines");
+    });
+    it("includes concrete impressive/deprioritise examples", () => {
+        expect(prompt).toContain("Impressive (these catch prod bugs)");
+        expect(prompt).toContain("Deprioritise");
+    });
+    it("supplement ordering puts edge cases before cross-resource (Tier 1 before Tier 3)", () => {
+        const tier1Idx = prompt.indexOf("Tier 1");
+        const tier3Idx = prompt.indexOf("Tier 3");
+        expect(tier1Idx).toBeGreaterThan(-1);
+        expect(tier3Idx).toBeGreaterThan(-1);
+        expect(tier1Idx).toBeLessThan(tier3Idx);
+    });
+    it("supplement Tier 1 calls out boundary values and invalid IDs explicitly", () => {
+        expect(prompt).toMatch(/Tier 1.*boundary values/s);
+        expect(prompt).toMatch(/Tier 1.*invalid.*non-existent IDs/s);
+    });
+    it("includes 5-dimension quality rubric", () => {
+        expect(prompt).toContain("Production Safety");
+        expect(prompt).toContain("Bug-Finding Potential");
+        expect(prompt).toContain("Coverage Gap");
+    });
+    it("includes per-recommendation format instruction", () => {
+        // Full-repo mode hides category/priority from user output — check for format label and key fields
+        expect(prompt).toContain("Per-recommendation format");
+        expect(prompt).toContain("tool call");
+        expect(prompt).toContain("From source");
+    });
+    it("includes unique-constraint storage gating for Redis", () => {
+        expect(prompt).toContain("Unique constraints");
+        expect(prompt).toContain("Redis");
+    });
+});
+// ---------------------------------------------------------------------------
+// Tests — full_repo output format and execution guardrails
+//
+// Guard that full_repo mode:
+//   - never emits execution/GENERATE language
+//   - groups items by test type with section headers
+//   - hides category/priority labels from user-facing rendered items
+//   - emits "Do NOT execute any tests"
+//   - renders pre-ranked item names
+//   - includes cascade guidance
+//   - scopes Tier 1 supplement to "list" (not "GENERATE set")
+// ---------------------------------------------------------------------------
+function fullRepoAnalysisWithScenarios(overrides = {}, scenarios = []) {
+    return minimalAnalysis({
+        businessContext: {
+            mainPurpose: "E-commerce API",
+            userFlows: [],
+            dataFlows: [],
+            integrationPatterns: [],
+            draftedScenarios: scenarios.length > 0 ? scenarios : [mockDiffScenario()],
+        },
+        ...overrides,
+    });
+}
+function makeContractScenario() {
+    return {
+        scenarioName: "create-product-contract",
+        description: "POST /api/v1/products auth boundary",
+        category: "security_boundary",
+        priority: "high",
+        steps: [{ order: 1, method: "POST", path: "/api/v1/products", expectedStatusCode: 201, description: "Create product", interactionType: "success" }],
+        chainingKeys: [],
+        requiresAuth: true,
+        estimatedComplexity: "simple",
+        testType: "contract",
+    };
+}
+describe("full_repo mode — output format and execution guardrails", () => {
+    let prompt;
+    beforeAll(() => {
+        prompt = buildRecommendationPrompt(fullRepoAnalysisWithScenarios({}, [mockDiffScenario(), makeContractScenario()]), "full_repo", 10);
+    });
+    it("does NOT contain GENERATE execution language", () => {
+        expect(prompt).not.toContain("### GENERATE");
+        expect(prompt).not.toContain("execute these in order");
+        expect(prompt).not.toContain("one retry on failure then skip");
+    });
+    it("does NOT contain the PR-mode ADDITIONAL section header", () => {
+        // The '### ADDITIONAL (list in additionalRecommendations...)' header is a PR-mode structural
+        // concept; it must not appear in the full_repo grouped output.
+        expect(prompt).not.toContain("### ADDITIONAL (list in additionalRecommendations");
+    });
+    it("contains explicit 'Do NOT execute any tests' instruction", () => {
+        expect(prompt).toContain("Do NOT execute any tests");
+    });
+    it("contains 'Repo mode' header or preamble", () => {
+        expect(prompt).toContain("Repo mode");
+    });
+    it("groups items by test type — Integration section header present", () => {
+        expect(prompt).toMatch(/### (Integration|Contract)/);
+    });
+    it("E2E section appears before Integration section (E2E ranked highest coverage)", () => {
+        const e2eIdx = prompt.indexOf("### E2E");
+        const integrationIdx = prompt.indexOf("### Integration");
+        // If E2E section exists, it must appear before Integration
+        if (e2eIdx !== -1 && integrationIdx !== -1) {
+            expect(e2eIdx).toBeLessThan(integrationIdx);
+        }
+        // At minimum, E2E appears before Contract
+        const contractIdx = prompt.indexOf("### Contract");
+        if (e2eIdx !== -1 && contractIdx !== -1) {
+            expect(e2eIdx).toBeLessThan(contractIdx);
+        }
+    });
+    it("UI section appears before Integration and Contract sections", () => {
+        const uiIdx = prompt.indexOf("### UI");
+        const integrationIdx = prompt.indexOf("### Integration");
+        const contractIdx = prompt.indexOf("### Contract");
+        if (uiIdx !== -1 && integrationIdx !== -1) {
+            expect(uiIdx).toBeLessThan(integrationIdx);
+        }
+        if (uiIdx !== -1 && contractIdx !== -1) {
+            expect(uiIdx).toBeLessThan(contractIdx);
+        }
+    });
+    it("prompt forbids the LLM from creating a 'Removed Recommendations' section", () => {
+        // The prompt must contain the 'NEVER create' instruction so the LLM doesn't add such a section
+        expect(prompt).toContain("NEVER create a");
+        // The prompt must NOT have an actual section heading titled 'Removed Recommendations'
+        // (it may contain the phrase inside the NEVER instruction itself, which is expected)
+        expect(prompt).not.toMatch(/^##+ Removed Recommendations/m);
+        expect(prompt).not.toMatch(/^##+ Not Applicable/m);
+    });
+    it("rendered item does NOT contain 'priority=' label visible to user", () => {
+        // priority= is a PR-mode label; must not appear in rendered sections
+        expect(prompt).not.toMatch(/priority=(HIGH|MEDIUM|LOW|CRITICAL)/);
+    });
+    it("rendered item does NOT contain pipe-delimited category label", () => {
+        // | category | pattern used in PR-mode GENERATE blocks
+        expect(prompt).not.toMatch(/\| (security_boundary|business_rule|data_integrity|crud|workflow) \|/);
+    });
+    it("renders the pre-ranked scenario name in the output", () => {
+        expect(prompt).toContain("orders-update-with-discount");
+    });
+    it("includes cascade vs referential integrity guidance", () => {
+        expect(prompt).toContain("Cascade vs referential integrity");
+    });
+    it("supplement Tier 1 is scoped to 'list' (not 'GENERATE set') in full_repo", () => {
+        // In full_repo there is no GENERATE set — supplement references the pre-ranked list
+        expect(prompt).toMatch(/Tier 1.*list/s);
+        expect(prompt).not.toMatch(/Tier 1.*GENERATE set/s);
+    });
+    it("supplement note references 5-dimension rubric for priority assignment", () => {
+        expect(prompt).toContain("5-dimension rubric");
+    });
+    it("cascade guidance instructs silent removal — no 'Removed Recommendations' section", () => {
+        // The cascade guidance must say to remove silently, not to list removed items
+        expect(prompt).toContain("silently");
+        expect(prompt).toContain("Do NOT list removed scenarios");
+    });
+});
+// ---------------------------------------------------------------------------
+// Tests — full_repo mode: full-stack vs backend-only test mix
+// ---------------------------------------------------------------------------
+describe("full_repo mode — full-stack repo test mix", () => {
+    function fullStackAnalysis() {
+        return fullRepoAnalysisWithScenarios({
+            projectClassification: {
+                projectType: "full-stack",
+                primaryLanguage: "TypeScript",
+                primaryFramework: "Next.js",
+                deploymentPattern: "full-stack",
+            },
+        });
+    }
+    function backendOnlyAnalysis() {
+        return fullRepoAnalysisWithScenarios({
+            projectClassification: {
+                projectType: "rest-api",
+                primaryLanguage: "Python",
+                primaryFramework: "FastAPI",
+                deploymentPattern: "traditional",
+            },
+        });
+    }
+    // topN=10 → 15% × 10 = 1.5 → round → 2 for both E2E and UI
+    it("full-stack repo mandates percentage-based UI slots (topN=10 → ≥2)", () => {
+        const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
+        expect(prompt).toContain("skyramp_ui_test_generation");
+        expect(prompt).toMatch(/at least 2 UI test/);
+    });
+    it("full-stack repo mandates percentage-based E2E slots (topN=10 → ≥2)", () => {
+        const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
+        expect(prompt).toContain("skyramp_e2e_test_generation");
+        expect(prompt).toMatch(/at least 2 E2E test/);
+    });
+    // topN=20 → 15% × 20 = 3 for both E2E and UI (scales up vs fixed ≥1/≥2)
+    it("full-stack repo scales to ≥3 E2E and ≥3 UI at topN=20", () => {
+        const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 20);
+        expect(prompt).toMatch(/at least 3 E2E test/);
+        expect(prompt).toMatch(/at least 3 UI test/);
+    });
+    // topN=5 → 15% × 5 = 0.75 → round → 1, floor at 1
+    it("full-stack repo floors at ≥1 E2E and ≥1 UI for small topN=5", () => {
+        const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 5);
+        expect(prompt).toMatch(/at least 1 E2E test/);
+        expect(prompt).toMatch(/at least 1 UI test/);
+    });
+    it("full-stack repo explicitly excludes smoke and fuzz tests", () => {
+        const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
+        expect(prompt).toContain("No smoke tests");
+        expect(prompt).toContain("No fuzz tests");
+    });
+    it("backend-only (rest-api) repo does NOT mandate UI/E2E tests", () => {
+        const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
+        // Tool names appear in generic buildToolWorkflows docs — check for the mandate text instead
+        expect(prompt).not.toMatch(/at least \d+ (UI|E2E) test/);
+        expect(prompt).not.toContain("supplement MUST include");
+        expect(prompt).not.toContain("full-stack repo");
+    });
+    it("backend-only repo focuses on integration and contract tests", () => {
+        const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
+        expect(prompt).toContain("integration and contract tests");
+    });
+    it("backend-only repo still excludes smoke and fuzz tests", () => {
+        const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
+        expect(prompt).toContain("No smoke tests");
+        expect(prompt).toContain("No fuzz tests");
+    });
+    it("'frontend' project type focuses on UI/E2E only — NOT backend tests", () => {
+        const frontendAnalysis = fullRepoAnalysisWithScenarios({
+            projectClassification: {
+                projectType: "frontend",
+                primaryLanguage: "TypeScript",
+                primaryFramework: "React",
+                deploymentPattern: "traditional",
+            },
+        });
+        const prompt = buildRecommendationPrompt(frontendAnalysis, "full_repo", 10);
+        // topN=10 → 15% × 10 = 1.5 → round → 2 for both
+        expect(prompt).toMatch(/at least 2 UI test/);
+        expect(prompt).toMatch(/at least 2 E2E test/);
+        // Should NOT say "in addition to backend integration and contract tests"
+        expect(prompt).not.toContain("in addition to backend integration and contract tests");
+        // Should explicitly say no integration/contract
+        expect(prompt).toContain("Do NOT add integration or contract tests");
+    });
+    it("'frontend' project type says 'frontend repo' not 'full-stack repo'", () => {
+        const frontendAnalysis = fullRepoAnalysisWithScenarios({
+            projectClassification: {
+                projectType: "frontend",
+                primaryLanguage: "TypeScript",
+                primaryFramework: "React",
+                deploymentPattern: "traditional",
+            },
+        });
+        const prompt = buildRecommendationPrompt(frontendAnalysis, "full_repo", 10);
+        expect(prompt).toContain("frontend repo");
+        expect(prompt).not.toContain("full-stack repo");
+    });
+    it("'full-stack' project type includes BOTH backend and frontend tests", () => {
+        const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
+        expect(prompt).toContain("full-stack repo");
+        expect(prompt).toContain("in addition to backend integration and contract tests");
+    });
+    it("full-stack repo explains E2E > UI > Integration > Contract coverage ranking", () => {
+        const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
+        expect(prompt).toContain("Coverage ranking");
+        expect(prompt).toContain("E2E");
+        expect(prompt).toContain("UI");
+    });
+    // Critical: scenarioDrafting.ts NEVER generates UI or E2E testType —
+    // they only come from the LLM supplement. The supplement note MUST
+    // explicitly tell the LLM to add UI/E2E for full-stack repos, otherwise
+    // the LLM fills the supplement with backend-only tiers (edge cases, CRUD)
+    // and never produces UI/E2E recommendations (PR #110 regression risk).
+    it("full-stack supplement note explicitly mandates UI and E2E with percentage-based counts (PR #110 regression guard)", () => {
+        // topN=20, 15% → 3 E2E + 3 UI mandated in the supplement note
+        const analysis = fullRepoAnalysisWithScenarios({
+            projectClassification: {
+                projectType: "full-stack",
+                primaryLanguage: "TypeScript",
+                primaryFramework: "Next.js",
+                deploymentPattern: "full-stack",
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis, "full_repo", 20);
+        // Tool names must appear in supplement (not just test-mix footer)
+        const requiredIdx = prompt.indexOf("REQUIRED — You MUST add");
+        const e2eIdx = prompt.indexOf("skyramp_e2e_test_generation");
+        expect(requiredIdx).toBeGreaterThan(-1);
+        expect(e2eIdx).toBeGreaterThan(-1);
+        expect(e2eIdx).toBeGreaterThan(requiredIdx); // inside supplement note
+        // Percentage-based count: topN=20 → 3
+        expect(prompt).toMatch(/at least 3 E2E test/);
+        expect(prompt).toMatch(/at least 3 UI test/);
+    });
+    it("backend-only repo supplement note does NOT add UI/E2E mandate", () => {
+        const analysis = fullRepoAnalysisWithScenarios({
+            projectClassification: {
+                projectType: "rest-api",
+                primaryLanguage: "Python",
+                primaryFramework: "FastAPI",
+                deploymentPattern: "traditional",
+            },
+        });
+        const prompt = buildRecommendationPrompt(analysis, "full_repo", 20);
+        const requiredIdx = prompt.indexOf("REQUIRED — You MUST add");
+        if (requiredIdx === -1)
+            return; // no supplement needed
+        const supplementBlock = prompt.slice(requiredIdx, requiredIdx + 800);
+        // Backend-only repos should NOT mandate UI/E2E in the supplement tiers
+        expect(supplementBlock).not.toContain("full-stack repo, the supplement MUST include");
+    });
+});
+// ---------------------------------------------------------------------------
+// Tests — full_repo mode: PR mode must NOT be affected by these changes
+// ---------------------------------------------------------------------------
+describe("full_repo mode — PR mode unchanged by full_repo changes", () => {
+    let prPrompt;
+    beforeAll(() => {
+        prPrompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 10);
+    });
+    it("PR mode still contains GENERATE execution language", () => {
+        expect(prPrompt).toContain("### GENERATE");
+    });
+    it("PR mode still shows priority= labels on GENERATE items", () => {
+        expect(prPrompt).toMatch(/priority=(HIGH|MEDIUM|LOW|CRITICAL)/);
+    });
+    it("PR mode does not show 'Do NOT execute any tests'", () => {
+        expect(prPrompt).not.toContain("Do NOT execute any tests");
+    });
+    it("PR mode does not show 'Repo mode' preamble", () => {
+        expect(prPrompt).not.toContain("Repo mode — no tests are executed");
+    });
+});
+describe("PR #110 quality baseline — branch_diff (PR) mode", () => {
+    let prompt;
+    beforeAll(() => { prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 20); });
+    it("source enrichment references changed files (not 'each endpoint')", () => {
+        expect(prompt).toContain("Source-Code Enrichment");
+        expect(prompt).toContain("changed files");
+        expect(prompt).not.toContain("For each endpoint listed in the Repository Context above, read the route handler");
+    });
+    it("supplement Tier 1 scoped to GENERATE set", () => {
+        expect(prompt).toMatch(/Tier 1.*GENERATE set/s);
+    });
+    it("supplement ordering puts edge cases before cross-resource", () => {
+        const tier1Idx = prompt.indexOf("Tier 1");
+        const tier3Idx = prompt.indexOf("Tier 3");
+        expect(tier1Idx).toBeGreaterThan(-1);
+        expect(tier3Idx).toBeGreaterThan(-1);
+        expect(tier1Idx).toBeLessThan(tier3Idx);
+    });
+    it("includes cascade vs referential integrity guidance", () => {
+        expect(prompt).toContain("Cascade vs referential integrity");
+    });
+    it("includes per-recommendation format requirements", () => {
+        expect(prompt).toContain("Per-recommendation format");
+    });
+    it("GENERATE block present for the business_rule scenario", () => {
+        expect(prompt).toContain("GENERATE");
+        expect(prompt).toContain("orders-update-with-discount");
+    });
+});
+// ---------------------------------------------------------------------------
+// Regression tests — v3 gap fixes
+// ---------------------------------------------------------------------------
+describe("Gap 1 — happy-path ranking: success scenarios ranked before error/edge-case scenarios", () => {
+    function makeScenarioByInteraction(name, interactionType) {
+        return mockDiffScenario({
+            scenarioName: name,
+            steps: [
+                { order: 1, method: "POST", path: "/api/items", expectedStatusCode: interactionType === "success" ? 201 : 404, description: "step", interactionType },
+                { order: 2, method: "GET", path: "/api/items/{id}", expectedStatusCode: interactionType === "success" ? 200 : 404, description: "verify", interactionType },
+                { order: 3, method: "DELETE", path: "/api/items/{id}", expectedStatusCode: interactionType === "success" ? 204 : 404, description: "cleanup", interactionType },
+            ],
+        });
+    }
+    it("happy-path scenario ranked before error-path scenario in GENERATE block", () => {
+        const analysis = {
+            ...analysisWithScenario("current_branch_diff"),
+            businessContext: {
+                mainPurpose: "Test",
+                userFlows: [], dataFlows: [], integrationPatterns: [],
+                draftedScenarios: [
+                    makeScenarioByInteraction("error-path-scenario", "error"),
+                    makeScenarioByInteraction("happy-path-scenario", "success"),
+                ],
+            },
+        };
+        const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 5);
+        const happyIdx = prompt.indexOf("happy-path-scenario");
+        const errorIdx = prompt.indexOf("error-path-scenario");
+        expect(happyIdx).toBeGreaterThan(-1);
+        expect(errorIdx).toBeGreaterThan(-1);
+        // Happy path should appear first (lower index = earlier in the output)
+        expect(happyIdx).toBeLessThan(errorIdx);
+    });
+});
+describe("Gap 2 — E2E ADDITIONAL slot gated on hasTraces", () => {
+    function makeMixedPRAnalysis(hasTraceFiles) {
+        // Needs draftedScenarios so scored.length > 0 and buildExecutionPlan is reached
+        const base = analysisWithScenario("current_branch_diff");
+        return {
+            ...base,
+            artifacts: {
+                openApiSpecs: [],
+                playwrightRecordings: [],
+                traceFiles: hasTraceFiles ? [{ path: "/repo/tests/trace.json", format: "skyramp" }] : [],
+                notFound: [],
+            },
+            branchDiffContext: {
+                currentBranch: "test",
+                baseBranch: "main",
+                // frontend/components/.tsx triggers hasFrontendChanges; newEndpoints makes it a mixed PR (not UI-only)
+                changedFiles: ["frontend/components/App.tsx", "backend/routers/orders.py"],
+                newEndpoints: [{ path: "/api/v1/orders/{order_id}", methods: [{ method: "PUT", sourceFile: "orders.py", interactionCount: 3 }] }],
+                modifiedEndpoints: [],
+                affectedServices: ["orders"],
+            },
+        };
+    }
+    it("E2E [ADDITIONAL] slot present when hasTraces=true and frontend+API changes exist", () => {
+        const prompt = buildRecommendationPrompt(makeMixedPRAnalysis(true), "current_branch_diff", 10);
+        expect(prompt).toMatch(/\[ADDITIONAL\].*E2E/s);
+    });
+    it("E2E [ADDITIONAL] slot absent when hasTraces=false and frontend+API changes exist", () => {
+        const prompt = buildRecommendationPrompt(makeMixedPRAnalysis(false), "current_branch_diff", 10);
+        // UI slot should still be present, E2E slot should not
+        expect(prompt).toMatch(/\[ADDITIONAL\].*UI/s);
+        // [ADDITIONAL] E2E label must not appear (tool docs contain "E2E" but not as [ADDITIONAL] label)
+        expect(prompt).not.toContain("[ADDITIONAL] | E2E |");
+    });
+});
+describe("Gap 4 — PR history does NOT suppress GENERATE items on 2nd+ run", () => {
+    it("prompt contains GENERATE-unaffected instruction when prior history exists", () => {
+        const ctx = makePRContext({
+            previousRecommendations: [
+                { testType: "integration", endpoint: "POST /api/v1/orders", scenarioName: "orders-update-with-discount", status: "implemented", commentId: "1" },
+            ],
+        });
+        const prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 5, ctx);
+        expect(prompt).toContain("GENERATE section is unaffected by prior history");
+    });
+    it("prompt does NOT contain old suppression text 'Do NOT re-recommend'", () => {
+        const ctx = makePRContext({
+            previousRecommendations: [
+                { testType: "integration", endpoint: "POST /api/v1/orders", status: "implemented", commentId: "1" },
+            ],
+        });
+        const prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 5, ctx);
+        expect(prompt).not.toContain("Do NOT re-recommend");
+    });
+});
+describe("renderItem — correct tool for E2E and UI testTypes in full_repo mode", () => {
+    function makeTypedScenario(testType) {
+        return mockDiffScenario({
+            scenarioName: `${testType}-scenario`,
+            testType,
+            steps: [
+                { order: 1, method: "GET", path: "/api/items", expectedStatusCode: 200, description: "list items", interactionType: "success" },
+                { order: 2, method: "POST", path: "/api/items", expectedStatusCode: 201, description: "create item", interactionType: "success" },
+            ],
+        });
+    }
+    it("integration scenario uses skyramp_integration_test_generation in full_repo", () => {
+        const analysis = minimalAnalysis({
+            businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("integration")] },
+        });
+        const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
+        expect(prompt).toContain("skyramp_integration_test_generation");
+    });
+    it("e2e scenario uses skyramp_e2e_test_generation and omits scenario step calls in full_repo", () => {
+        const analysis = minimalAnalysis({
+            businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("e2e")] },
+        });
+        const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
+        // Extract recommendation content only (before Tool Workflows docs which list all tools)
+        const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
+        const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
+        expect(mainContent).toContain("skyramp_e2e_test_generation");
+        expect(mainContent).not.toContain("skyramp_integration_test_generation");
+        // E2E does not use per-step scenario pipeline
+        expect(mainContent).not.toContain("skyramp_scenario_test_generation");
+    });
+    it("ui scenario uses skyramp_ui_test_generation and omits scenario step calls in full_repo", () => {
+        const analysis = minimalAnalysis({
+            businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("ui")] },
+        });
+        const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
+        // Extract recommendation content only (before Tool Workflows docs which list all tools)
+        const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
+        const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
+        expect(mainContent).toContain("skyramp_ui_test_generation");
+        expect(mainContent).not.toContain("skyramp_integration_test_generation");
+        // UI does not use per-step scenario pipeline
+        expect(mainContent).not.toContain("skyramp_scenario_test_generation");
+    });
+    it("integration scenario still emits per-step skyramp_scenario_test_generation calls in full_repo", () => {
+        const analysis = minimalAnalysis({
+            businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("integration")] },
+        });
+        const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
+        const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
+        const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
+        expect(mainContent).toContain("skyramp_scenario_test_generation");
+        expect(mainContent).toContain("skyramp_integration_test_generation");
+    });
+});

package/build/prompts/testbot/testbot-prompts.js CHANGED Viewed

@@ -120,6 +120,7 @@ Generate a net-new test. Use a unique descriptive filename to avoid overwriting
 **How to generate each type (for ADD and REGENERATE):**
 - **Integration**: call \`skyramp_scenario_test_generation\` per step (sequentially), then \`skyramp_integration_test_generation\` with the scenario file.
   Scenario JSON goes in the same \`outputDir\` (e.g. \`tests/scenario_<name>.json\`), not \`.skyramp/\`.
+  **Required fields (MANDATORY before generating any scenario step):** For every POST/PUT/PATCH step — including prerequisite/setup steps (e.g. create a product before creating an order) — read the route handler source code or OpenAPI schema to identify ALL required request body fields. Include every required field with a realistic value. Do NOT omit fields just because they are not the focus of the test.
 - **Contract**: call \`skyramp_contract_test_generation\` with \`endpointURL\`, \`method\`, and \`requestData\` for POST/PUT/PATCH.
   Pass \`apiSchema\` if an OpenAPI spec exists.
   For internal/microservice APIs: add \`providerMode: true\` to verify implementation matches the contract.
@@ -159,11 +160,10 @@ await page.waitForTimeout(1500);
 \`\`\`
 Then re-run the test. This is a common issue with SSR/SPA frameworks where the DOM is rendered but not yet interactive.
-**After generation, fix chaining and enhance assertions only:**
-- Path params like \`id = 'id'\` → \`skyramp.get_response_value(prev_response, "id")\`
-- Hardcoded IDs in request bodies → dynamic values from prior response
-- **Integration tests and contract provider tests**: after the test generation, you MUST enhance response body assertions as instructed in the tool output.
-- Change ONLY chaining values and enhance assertions. Preserve everything else exactly as generated.
+**After generation, you MUST do exactly two things — nothing more, nothing less:**
+1. **Fix chaining**: replace hardcoded IDs with dynamic response values — path params like \`id = 'id'\` → \`skyramp.get_response_value(prev_response, "id")\`, and hardcoded IDs in request bodies → dynamic values from prior responses.
+2. **Enhance assertions**: for integration tests and contract provider tests, follow the assertion enhancement instructions returned in the tool output. Add response body assertions for every request. This step is MANDATORY — do NOT skip it even if chaining is already correct.
+Do not make any other changes to the generated test file.
 After all actions, execute ONLY the test files you created (ADD), regenerated (REGENERATE),
 or edited (UPDATE). Do NOT execute VERIFY'd tests — they are unaffected by the diff and do not
@@ -191,7 +191,7 @@ Call \`skyramp_submit_report\` with \`summaryOutputFile\`: "${summaryOutputFile}
    VERIFY: note that the test was verified as unaffected by the diff — no file changes made.
    Do NOT include files that were newly created in this run (those go in \`newTestsCreated\`).
-**additionalRecommendations** — items you could not act on (quota exceeded, missing traces, etc.):
+**additionalRecommendations** — remaining recommendations from the ranked list (MUST contain EXACTLY ${maxRecommendations - maxGenerate} items):
    \`testId\` (human-readable kebab-case, e.g. \`integration-products-orders-workflow\`), \`testType\`, \`category\`, \`scenarioName\`, \`priority\` (high/medium/low — used for sorting, not displayed), \`description\`, \`steps\`, \`reasoning\`
    Keep each \`description\` to one sentence. Omit \`requestBody\` and \`responseBody\` from steps.
    Include at most 3 steps per recommendation.

package/build/utils/trace-parser.js CHANGED Viewed

@@ -156,6 +156,8 @@ export async function parseTraceFile(filePath) {
     return { entries, userFlows, format };
 }
 const SKIP_DIRS = new Set(["node_modules", ".git", "dist", "build", ".next", ".nuxt", "coverage", "__pycache__", ".venv", "venv"]);
+/** Known test-artifact directories where testbot-generated traces are written. */
+const TRACE_SCAN_DIRS = [".skyramp", "tests", "test", "e2e", "playwright"];
 /**
  * Recursively scan a directory for files matching a predicate, up to maxDepth levels.
  */
@@ -180,6 +182,22 @@ function scanDir(dir, predicate, maxDepth, results) {
         }
     }
 }
+/**
+ * Scan only known test-artifact directories for trace files.
+ * Root-level files are checked at depth 0; named test-artifact subdirs are scanned
+ * at full depth. This prevents picking up committed demo assets (e.g. frontend/public/traces/).
+ */
+function scanTraceArtifactDirs(repositoryPath, predicate, results) {
+    // Root-level files only (depth 0)
+    scanDir(repositoryPath, predicate, 0, results);
+    // Named test-artifact subdirectories (full depth)
+    for (const dir of TRACE_SCAN_DIRS) {
+        const full = path.join(repositoryPath, dir);
+        if (fs.existsSync(full)) {
+            scanDir(full, predicate, 5, results);
+        }
+    }
+}
 /**
  * Discover trace JSON files in a repository path.
  */
@@ -191,12 +209,12 @@ export function discoverTraceFiles(repositoryPath) {
         if (fs.existsSync(full))
             found.push(full);
     }
-    // Recursive scan: any *trace*.json|har, but exclude scenario files and test output files
+    // Recursive scan scoped to test-artifact dirs: any *trace*.json|har, excluding scenario/test output files
     const isTraceJson = (name) => /\.(json|har)$/i.test(name) &&
         /trace/i.test(name) &&
         !/^scenario_/i.test(name) &&
         !/_test\.(json|har)$/i.test(name);
-    scanDir(repositoryPath, isTraceJson, 5, found);
+    scanTraceArtifactDirs(repositoryPath, isTraceJson, found);
     // Deduplicate and sort for deterministic ordering
     return [...new Set(found)].sort();
 }
@@ -209,6 +227,6 @@ export function discoverPlaywrightZips(repositoryPath) {
     const isPlaywrightZip = (name) => /\.zip$/i.test(name) && (/playwright/i.test(name) ||
         /_trace\.zip$/i.test(name) ||
         name.toLowerCase() === "trace.zip");
-    scanDir(repositoryPath, isPlaywrightZip, 5, found);
+    scanTraceArtifactDirs(repositoryPath, isPlaywrightZip, found);
     return [...new Set(found)].sort();
 }

package/build/utils/trace-parser.test.js ADDED Viewed

@@ -0,0 +1,140 @@
+/**
+ * Unit tests for trace-parser.ts — specifically the scanTraceArtifactDirs scoping
+ * introduced to prevent demo/fixture files (e.g. frontend/public/traces/) from being
+ * misidentified as testbot-generated traces.
+ */
+import * as fs from "fs";
+import * as os from "os";
+import * as path from "path";
+import { discoverTraceFiles, discoverPlaywrightZips } from "./trace-parser.js";
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function mkdirp(dir) {
+    fs.mkdirSync(dir, { recursive: true });
+}
+function touch(file) {
+    mkdirp(path.dirname(file));
+    fs.writeFileSync(file, "");
+}
+function withTempRepo(fn) {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), "trace-parser-test-"));
+    try {
+        fn(dir);
+    }
+    finally {
+        fs.rmSync(dir, { recursive: true, force: true });
+    }
+}
+// ---------------------------------------------------------------------------
+// discoverPlaywrightZips — scoping tests
+// ---------------------------------------------------------------------------
+describe("discoverPlaywrightZips — scanTraceArtifactDirs scoping", () => {
+    it("does NOT discover playwright zip in frontend/public/traces/ (demo fixture dir)", () => {
+        withTempRepo(repo => {
+            touch(path.join(repo, "frontend", "public", "traces", "ui_test_playwright.zip"));
+            expect(discoverPlaywrightZips(repo)).toEqual([]);
+        });
+    });
+    it("discovers playwright zip in tests/ (test-artifact dir)", () => {
+        withTempRepo(repo => {
+            const zip = path.join(repo, "tests", "ui_test_playwright.zip");
+            touch(zip);
+            expect(discoverPlaywrightZips(repo)).toContain(zip);
+        });
+    });
+    it("discovers playwright zip in .skyramp/ (test-artifact dir)", () => {
+        withTempRepo(repo => {
+            const zip = path.join(repo, ".skyramp", "recording_playwright.zip");
+            touch(zip);
+            expect(discoverPlaywrightZips(repo)).toContain(zip);
+        });
+    });
+    it("discovers playwright zip in e2e/ (test-artifact dir)", () => {
+        withTempRepo(repo => {
+            const zip = path.join(repo, "e2e", "flow_playwright.zip");
+            touch(zip);
+            expect(discoverPlaywrightZips(repo)).toContain(zip);
+        });
+    });
+    it("discovers playwright zip in playwright/ (test-artifact dir)", () => {
+        withTempRepo(repo => {
+            const zip = path.join(repo, "playwright", "trace.zip");
+            touch(zip);
+            expect(discoverPlaywrightZips(repo)).toContain(zip);
+        });
+    });
+    it("does NOT discover zip in src/ (not a test-artifact dir)", () => {
+        withTempRepo(repo => {
+            touch(path.join(repo, "src", "recordings", "ui_playwright.zip"));
+            expect(discoverPlaywrightZips(repo)).toEqual([]);
+        });
+    });
+    it("does NOT discover zip in deeply nested non-test dir", () => {
+        withTempRepo(repo => {
+            touch(path.join(repo, "frontend", "src", "assets", "demo_playwright.zip"));
+            expect(discoverPlaywrightZips(repo)).toEqual([]);
+        });
+    });
+});
+// ---------------------------------------------------------------------------
+// discoverTraceFiles — scoping tests
+// ---------------------------------------------------------------------------
+describe("discoverTraceFiles — scanTraceArtifactDirs scoping", () => {
+    it("does NOT discover trace.json nested under frontend/public/traces/", () => {
+        withTempRepo(repo => {
+            touch(path.join(repo, "frontend", "public", "traces", "backend_trace.json"));
+            const found = discoverTraceFiles(repo);
+            // fixed-name root candidates don't match "backend_trace.json", and scan won't reach frontend/
+            expect(found.some(f => f.includes("frontend"))).toBe(false);
+        });
+    });
+    it("discovers trace.json in tests/ dir", () => {
+        withTempRepo(repo => {
+            const f = path.join(repo, "tests", "backend_trace.json");
+            touch(f);
+            expect(discoverTraceFiles(repo)).toContain(f);
+        });
+    });
+    it("discovers trace.json in .skyramp/ dir", () => {
+        withTempRepo(repo => {
+            const f = path.join(repo, ".skyramp", "skyramp_trace.json");
+            touch(f);
+            expect(discoverTraceFiles(repo)).toContain(f);
+        });
+    });
+    it("discovers root-level trace.json", () => {
+        withTempRepo(repo => {
+            const f = path.join(repo, "trace.json");
+            touch(f);
+            expect(discoverTraceFiles(repo)).toContain(f);
+        });
+    });
+    it("discovers root-level skyramp_traces.json via fixed-name check", () => {
+        withTempRepo(repo => {
+            const f = path.join(repo, "skyramp_traces.json");
+            touch(f);
+            expect(discoverTraceFiles(repo)).toContain(f);
+        });
+    });
+    it("does NOT discover scenario_ json files (excluded by predicate)", () => {
+        withTempRepo(repo => {
+            touch(path.join(repo, "tests", "scenario_orders_trace.json"));
+            expect(discoverTraceFiles(repo)).toEqual([]);
+        });
+    });
+    it("does NOT discover _test.json files (excluded by predicate)", () => {
+        withTempRepo(repo => {
+            touch(path.join(repo, "tests", "orders_trace_test.json"));
+            expect(discoverTraceFiles(repo)).toEqual([]);
+        });
+    });
+    it("results are deduplicated when fixed-name and scan both find the same root file", () => {
+        withTempRepo(repo => {
+            const f = path.join(repo, "trace.json");
+            touch(f);
+            const found = discoverTraceFiles(repo);
+            expect(found.filter(x => x === f)).toHaveLength(1);
+        });
+    });
+});

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@skyramp/mcp",
-  "version": "0.0.64-rc.4",
+  "version": "0.0.64-rc.6",
   "main": "build/index.js",
   "type": "module",
   "bin": {