npm - @skyramp/mcp - Versions diffs - 0.0.65 → 0.1.0-rc.2 - Mend

@skyramp/mcp 0.0.65 → 0.1.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/build/prompts/test-recommendation/test-recommendation-prompt.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import * as crypto from "crypto";
+import { AnalysisScope, isDiff, } from "../../types/RepositoryAnalysis.js";
 import { WorkspaceAuthType } from "../../utils/workspaceAuth.js";
-import { buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildTestExamples, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
+import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildFewShotExamples, buildVerificationChecklist, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
 import { CATEGORY_PRIORITY, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
 function formatTestLocations(locs) {
     const entries = Object.entries(locs || {});
@@ -55,15 +56,281 @@ function scenarioCoverageKey(scenario) {
     const resource = extractResourceFromPath(primaryStep?.path ?? "");
     return `${resource}::${testType}`;
 }
+// ── Shared budget mandate ──
+function buildBudgetMandate(topN, generateCount) {
+    const additionalCount = Math.max(topN - generateCount, 0);
+    return `**Produce ${topN} total recommendations: ${generateCount} to generate + ${additionalCount} as additionalRecommendations. Generate recommendations now.**`;
+}
 // ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
+function buildFullRepoRecommendations(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, isFrontendProject = false, isFrontendOnlyProject = false) {
+    // Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
+    const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
+    const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
+    const slotsFloor = Math.floor(topN / 2);
+    const minE2ESlots = Math.min(rawE2E, slotsFloor);
+    const minUISlots = Math.min(rawUI, Math.max(0, topN - minE2ESlots));
+    const authRef = authHeaderValue
+        ? `, authHeader: "${authHeaderValue}"${authSchemeSnippet}`
+        : `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
+    const hasWorkspaceAuthType = !!authTypeValue && authTypeValue !== "none";
+    const scenarioAuthRef = authRef;
+    const authHeaderOnlyRef = hasWorkspaceAuthType
+        ? ""
+        : authHeaderValue
+            ? `, authHeader: "${authHeaderValue}"`
+            : `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
+    // Supplement count for full-repo mode
+    const supplementCount = topN - Math.min(scored.length, topN);
+    const toTitle = (name) => name.replace(/-/g, " ").replace(/\b\w/g, c => c.toUpperCase());
+    const TYPE_ORDER = ["e2e", "ui", "integration", "contract"];
+    const TYPE_LABEL = {
+        e2e: "E2E", ui: "UI", integration: "Integration", contract: "Contract",
+    };
+    // For full-stack repos, carve out E2E and UI slots before filling with backend tests.
+    const backendSlotCount = isFrontendProject
+        ? Math.max(0, topN - minE2ESlots - minUISlots)
+        : topN;
+    const allItems = scored.slice(0, backendSlotCount);
+    const byType = new Map();
+    for (const t of TYPE_ORDER)
+        byType.set(t, []);
+    for (const item of allItems) {
+        const t = item.scenario.testType ?? (item.scenario.steps.length === 1 ? "contract" : "integration");
+        if (!byType.has(t))
+            byType.set(t, []);
+        byType.get(t).push(item);
+    }
+    const renderItem = (item, rank) => {
+        const s = item.scenario;
+        const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
+        const title = toTitle(s.scenarioName);
+        if (testType === "contract") {
+            const step = s.steps[0];
+            const endpointURL = `${baseUrl}${step.path}`;
+            const isBodyMethod = ["POST", "PUT", "PATCH"].includes(step.method);
+            const dataParam = isBodyMethod
+                ? `, requestData: <${step.method} ${step.path} required fields from source code>`
+                : "";
+            return [
+                `**${rank}. ${title}**`,
+                `  ${s.description}`,
+                `  ${step.method} ${step.path} \u2192 ${step.expectedStatusCode}`,
+                `  Tool: \`skyramp_contract_test_generation({ endpointURL: "${endpointURL}", method: "${step.method}"${authRef}${dataParam} })\``,
+                `  From source: fill in requestData field names and the specific production boundary this validates`,
+            ].join("\n");
+        }
+        else {
+            const stepLines = s.steps.map(st => {
+                const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
+                const bodyHint = isBody ? ` \u2014 body: <${st.method} ${st.path} required fields from source>` : "";
+                return `  ${st.order}. ${st.method} ${st.path} \u2192 ${st.expectedStatusCode}: ${st.description}${bodyHint}`;
+            }).join("\n");
+            const isTraceBased = testType === "e2e" || testType === "ui";
+            let toolCallsBlock;
+            if (isTraceBased) {
+                // E2E and UI need browser recording first, then generation
+                const frontendUrl = baseUrl.replace(/\/api.*$/, "") || baseUrl;
+                const zipPath = `<repositoryPath>/.skyramp/${s.scenarioName}_trace.zip`;
+                if (testType === "ui") {
+                    toolCallsBlock = [
+                        `  1. browser_navigate({ url: "${frontendUrl}" })`,
+                        `  2. Interact with the changed components (browser_click, browser_type, browser_fill_form, etc.)`,
+                        `  3. browser_snapshot() after each key interaction`,
+                        `  4. skyramp_export_zip({ outputPath: "${zipPath}" }) — use absolute path`,
+                        `  5. skyramp_ui_test_generation({ playwrightInput: "${zipPath}"${authHeaderOnlyRef} })`,
+                    ].join("\n");
+                }
+                else {
+                    toolCallsBlock = [
+                        `  1. browser_navigate({ url: "${frontendUrl}" }) — record frontend trace`,
+                        `  2. Interact with the user journey described above`,
+                        `  3. skyramp_export_zip({ outputPath: "${zipPath}" }) — use absolute path`,
+                        `  4. Capture backend trace JSON separately (skyramp_start_trace_collection / skyramp_stop_trace_collection)`,
+                        `  5. skyramp_e2e_test_generation({ playwrightInput: "${zipPath}", trace: "<backend trace path>"${authHeaderOnlyRef} })`,
+                    ].join("\n");
+                }
+            }
+            else {
+                // Integration: use batch scenario tool (all steps in one call)
+                let destinationHost = s.scenarioName;
+                try {
+                    destinationHost = new URL(baseUrl).hostname;
+                }
+                catch { /* keep fallback */ }
+                const batchSteps = s.steps.map(st => {
+                    const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
+                    let dataParam = "";
+                    if (isBody) {
+                        if (st.requestBody && Object.keys(st.requestBody).length > 0) {
+                            const bodyJson = JSON.stringify(st.requestBody).replace(/"/g, '\\"');
+                            dataParam = `, requestBody: "${bodyJson}"`;
+                        }
+                        else {
+                            dataParam = `, requestBody: <${st.method} ${st.path} required fields from source code>`;
+                        }
+                    }
+                    return `    { method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${dataParam} }`;
+                }).join(",\n");
+                toolCallsBlock = [
+                    `  skyramp_batch_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${destinationHost}", baseURL: "${baseUrl}"${scenarioAuthRef}, steps: [\n${batchSteps}\n  ] })`,
+                    `  skyramp_integration_test_generation({ scenarioFile: <filePath returned by skyramp_batch_scenario_test_generation above>${authHeaderOnlyRef} })`,
+                ].join("\n");
+            }
+            return [
+                `**${rank}. ${title}**`,
+                `  ${s.description}`,
+                `  Steps:`,
+                stepLines,
+                `  Tool calls:`,
+                toolCallsBlock,
+                `  From source: fill in requestBody field values and assert all computed response fields`,
+            ].join("\n");
+        }
+    };
+    const backendSections = TYPE_ORDER
+        .filter(t => (byType.get(t) ?? []).length > 0)
+        .map(t => {
+        const items = byType.get(t);
+        const label = TYPE_LABEL[t];
+        let globalRank = 0;
+        for (const prev of TYPE_ORDER) {
+            if (prev === t)
+                break;
+            globalRank += (byType.get(prev) ?? []).length;
+        }
+        const entries = items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n");
+        return `### ${label} (${items.length})\n\n${entries}`;
+    });
+    // Pre-allocate E2E and UI placeholder sections for full-stack repos.
+    const e2eSectionParts = [];
+    const uiSectionParts = [];
+    if (isFrontendProject) {
+        for (let i = 0; i < minE2ESlots; i++) {
+            const rank = i + 1;
+            e2eSectionParts.push(`**${rank}. E2E User Journey ${i + 1}**\n` +
+                `  End-to-end test covering a complete user journey through the frontend and backend.\n` +
+                `  To generate: record a browser trace, then call the generation tool.\n` +
+                `    browser_navigate({ url: "${baseUrl}" }) \u2192 exercise key user flow \u2192 skyramp_export_zip({ outputPath: "<repo>/.skyramp/e2e_journey_${i + 1}.zip" })\n` +
+                `  Tool: \`skyramp_e2e_test_generation({ playwrightInput: "<repo>/.skyramp/e2e_journey_${i + 1}.zip"${authHeaderOnlyRef} })\`\n` +
+                `  From source: read frontend components and their API calls to identify the highest-value user journey`);
+        }
+        for (let i = 0; i < minUISlots; i++) {
+            const rank = minE2ESlots + i + 1;
+            uiSectionParts.push(`**${rank}. UI Component Test ${i + 1}**\n` +
+                `  Test key UI component interactions and state changes.\n` +
+                `  To generate: record a browser trace, then call the generation tool.\n` +
+                `    browser_navigate({ url: "${baseUrl}" }) \u2192 interact with UI components \u2192 skyramp_export_zip({ outputPath: "<repo>/.skyramp/ui_component_${i + 1}.zip" })\n` +
+                `  Tool: \`skyramp_ui_test_generation({ playwrightInput: "<repo>/.skyramp/ui_component_${i + 1}.zip"${authHeaderOnlyRef} })\`\n` +
+                `  From source: read frontend component files to identify interactions, form submissions, and state transitions`);
+        }
+        // Offset backend section ranks by the number of E2E + UI placeholders
+        const offset = minE2ESlots + minUISlots;
+        backendSections.forEach((_, idx) => {
+            const t = TYPE_ORDER.filter(t => (byType.get(t) ?? []).length > 0)[idx];
+            if (!t)
+                return;
+            const items = byType.get(t);
+            const label = TYPE_LABEL[t];
+            let globalRank = offset;
+            for (const prev of TYPE_ORDER) {
+                if (prev === t)
+                    break;
+                globalRank += (byType.get(prev) ?? []).length;
+            }
+            backendSections[idx] = `### ${label} (${items.length})\n\n${items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n")}`;
+        });
+    }
+    const allSections = [
+        ...(e2eSectionParts.length > 0 ? [`### E2E (${e2eSectionParts.length})\n\n${e2eSectionParts.join("\n\n")}`] : []),
+        ...(uiSectionParts.length > 0 ? [`### UI (${uiSectionParts.length})\n\n${uiSectionParts.join("\n\n")}`] : []),
+        ...backendSections,
+    ];
+    const sections = allSections.join("\n\n");
+    const frontendTierNote = isFrontendOnlyProject
+        ? `\n\n**Frontend repo:** supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
+        : isFrontendProject
+            ? `\n\n**Full-stack repo:** supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Add these before exhausting backend tiers.`
+            : "";
+    const repoSupplementNote = supplementCount > 0
+        ? `
+<supplement_guidance>
+**When to use:** The pre-ranked sections above contain fewer than ${topN} items. Add exactly ${supplementCount} more using the tiers below — exhaust each tier before moving to the next.
+**Tier 1 — Error paths for endpoints already in the list** (highest value, do first):
+  • Auth boundary (no Authorization header → 403/401) → \`testType: contract, category: security_boundary\`
+  • Invalid/non-existent IDs (→ 404) → \`testType: contract, category: error_handling\`
+  • Missing required fields (→ 422) → \`testType: contract, category: data_validation\`
+  • Boundary values for numeric fields → \`testType: integration, category: data_validation\`
+  Note: DISCARD unique-constraint scenarios if the storage backend is Redis, MongoDB, or schema-less.
+**Tier 2 — Auth coverage for any endpoint not yet covered by Tier 1:**
+  → \`testType: contract, category: security_boundary\`
+**Tier 3 — Cross-resource integration** (only when one resource's POST body contains another's \`_id\` field):
+  → \`testType: integration, category: workflow\`
+**Tier 4 — CRUD lifecycle** for any resource not yet covered:
+  → \`testType: integration, category: crud\`
+**How to fill each item:** Use path parameters in \`{param}\` format. Use real field names from the analysis or handler source — no generic placeholders. Describe behavior in API terms (HTTP method, path, status code), not storage internals.${frontendTierNote}
+</supplement_guidance>`
+        : "";
+    const typeMixText = isFrontendOnlyProject
+        ? `This is a frontend repo. Focus on E2E and UI tests only. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
+        : isFrontendProject
+            ? `This is a full-stack repo. Coverage ranking: E2E > UI > Integration > Contract. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`), in addition to backend integration and contract tests.`
+            : `Focus on integration and contract tests for all API endpoints.`;
+    return `## Test Recommendations — ${topN} total (grouped by test type)
+> Repo mode — no tests are executed. Ranked by risk within each type.
+> To generate any item: read the handler source, fill \`<…from source>\` placeholders with real values, then call the tool.
+${sections}
+**Test type mix — MANDATORY. No smoke tests. No fuzz tests. Only: integration, contract, E2E, UI.**
+${typeMixText}
+${repoSupplementNote}
+**Present up to ${topN} recommendations.** Prioritize quality — only include a recommendation if it adds genuine new coverage. If fewer than ${topN} high-value tests exist for this codebase, stop at the last useful item rather than padding with trivial ones.
+---
+<enrichment_notes>
+**Path resolution (do this before filling in any tool call):**
+Cross-check every endpoint path against the Router Mounting / Nesting section in the analysis above. Sub-routers may be mounted at nested prefixes — e.g. a reviews router with \`@router.get("/")\` may actually be \`GET /api/v1/products/{product_id}/reviews\` if mounted under that prefix. Always use the fully-qualified nested path in tool calls, not the path as it appears in the route file alone.
+**Existing test files (check before assigning output filenames):**
+See the Existing Tests section above. If a recommendation's primary resource already has a test file listed there, prefer passing an explicit \`output\` filename (e.g. \`output: "orders_integration_test.py"\`) to update the existing file rather than creating a duplicate.
+Before filling in tool call parameters for each item, use the analysis data already provided above (endpoint interactions, source context) first. Only read the route handler source code directly when the analysis data does not contain the specific value you need:
+- Required request body fields (POST/PUT/PATCH) — use field names from the analysis interactions; read source only if they show \`{}\` or are missing
+- Computed/derived response fields and their formulas — assert exact values; read source for formula details not captured in the analysis
+- Auth middleware — set authHeader/authScheme from the repository context above; FastAPI HTTPBearer → 403 not 401
+- Storage backend — if Redis or schema-less, discard unique-constraint and cascade-delete scenarios
+- Delete behavior — hard-delete → 204; soft-delete/cancel → 200
+${buildTestQualityCriteria()}
+**5-dimension rubric — use to assign priority for supplement items:**
+| Dimension | What to assess |
+| Production Safety | Guards a critical boundary (auth, unique constraint, cascade delete, data integrity, breaking migration)? → HIGH |
+| Bug-Finding Potential | Targets a known failure mode (race condition, data consistency, state transition, cascade effect)? → HIGH |
+| User Journey Relevance | Reflects how real users interact (from traces, business flows, critical paths)? → HIGH or MEDIUM |
+| Coverage Gap | Addresses an area with zero existing test coverage? → bump up one tier |
+| Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
+</enrichment_notes>`;
+}
 function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
     // For mixed PRs (frontend + backend), reserve the last GENERATE slot for a UI test
     // so the agent has explicit room to record a browser trace and generate it.
     const reserveUIGenSlot = hasFrontendChanges && !isUIOnlyPR && maxGen > 1;
     const backendGenCount = reserveUIGenSlot ? maxGen - 1 : maxGen;
     const backendBudget = reserveUIGenSlot ? Math.max(topN - 1, 0) : topN;
-    const generateItems = scored.slice(0, Math.min(backendGenCount, scored.length));
+    let generateItems = scored.slice(0, Math.min(backendGenCount, scored.length));
     const rawAdditionalItems = scored.slice(backendGenCount, backendBudget);
+    // For UI-only PRs with no backend scenarios, ensure at least 1 UI generate slot
+    // by injecting a placeholder UI scenario that tells the LLM to record a browser trace.
+    const needsUIPlaceholder = isUIOnlyPR && generateItems.length === 0 && hasFrontendChanges;
     // Filter additional items whose primary resource + test type already appear in GENERATE
     const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
     const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
@@ -83,6 +350,23 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
         : authHeaderValue
             ? `, authHeader: "${authHeaderValue}"`
             : `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
+    // If we need a UI placeholder for UI-only PRs, inject it at the start
+    let uiPlaceholderBlock = "";
+    if (needsUIPlaceholder) {
+        uiPlaceholderBlock = `**#1 — GENERATE** | UI | workflow | new
+Scenario: ui-test-for-changed-frontend-components
+This is a UI-only PR with no backend endpoint changes. Generate UI tests for the changed frontend files.
+Tool workflow:
+1. Navigate to the app: \`browser_navigate({ url: "<frontend_url>" })\`
+2. Interact with the changed components (read the diff to determine which components changed)
+3. Take snapshots and add assertions: \`browser_snapshot()\`, \`browser_assert()\`
+4. Export the trace: \`skyramp_export_zip({ outputPath: ".skyramp/<component_name>_trace.zip" })\`
+5. Generate the UI test: \`skyramp_ui_test_generation({ playwrightInput: ".skyramp/<component_name>_trace.zip" })\`
+Do NOT skip this step. UI tests are required for UI-only PRs.
+`;
+    }
     const generateBlocks = generateItems.map((item, i) => {
         const rank = i + 1;
         const s = item.scenario;
@@ -92,12 +376,14 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
             const endpointURL = `${baseUrl}${step.path}`;
             const isBodyMethod = ["POST", "PUT", "PATCH"].includes(step.method);
             const dataParam = isBodyMethod
-                ? `, requestData: <${step.method} ${step.path} body from source code schemas>`
+                ? (step.requestBody && Object.keys(step.requestBody).length > 0
+                    ? `, requestData: "${JSON.stringify(step.requestBody).replace(/"/g, '\\"')}"`
+                    : `, requestData: <${step.method} ${step.path} body from source code schemas>`)
                 : "";
-            return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | priority=${item.priority} | ${item.novelty}\n` +
+            return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | ${item.novelty}\n` +
                 `${step.method} ${step.path} → ${step.expectedStatusCode}\n` +
                 `Tool: skyramp_contract_test_generation({ endpointURL: "${endpointURL}", method: "${step.method}"${authRef}${dataParam} })\n` + // contract tests always use full authRef
-                `From source: authScheme (OpenAPI securitySchemes or auth middleware)${isBodyMethod ? "; requestData field shapes" : ""}`);
+                `From source: authScheme (OpenAPI securitySchemes or auth middleware)${isBodyMethod && !(step.requestBody && Object.keys(step.requestBody).length > 0) ? "; requestData field shapes" : ""}`);
         }
         else {
             // integration / e2e / ui — multi-step scenario pipeline
@@ -119,15 +405,25 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
                 const isBodyMethod = ["POST", "PUT", "PATCH"].includes(st.method);
                 let dataParam = "";
                 if (isBodyMethod) {
-                    if (st.bodyMustInclude && st.bodyMustInclude.length > 0) {
+                    if (st.requestBody && Object.keys(st.requestBody).length > 0) {
+                        // Enriched scenario — use actual field values directly
+                        const bodyJson = JSON.stringify(st.requestBody).replace(/"/g, '\\"');
+                        dataParam = `, requestBody: "${bodyJson}"`;
+                    }
+                    else if (st.bodyMustInclude && st.bodyMustInclude.length > 0) {
                         const fields = st.bodyMustInclude.join(", ");
-                        dataParam = `, requestBody: <${st.method} ${st.path} body from source code — MUST include child collection fields: [${fields}]. Chain FK fields (e.g. product_id) from prior POST response IDs. Do NOT omit the collection array or send only metadata/discount fields.>`;
+                        dataParam = `, requestBody: <${st.method} ${st.path} body from source code — MUST include [${fields}]. Read handler source for field names, types, and FK references.>`;
                     }
                     else {
                         dataParam = `, requestBody: <${st.method} ${st.path} body from source code schemas>`;
                     }
                 }
-                return `    { method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${dataParam} }`;
+                let responseParam = "";
+                if (!isBodyMethod && st.responseBody && Object.keys(st.responseBody).length > 0) {
+                    const resJson = JSON.stringify(st.responseBody).replace(/"/g, '\\"');
+                    responseParam = `, responseBody: "${resJson}"`;
+                }
+                return `    { method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${dataParam}${responseParam} }`;
             }).join(",\n");
             let destinationHost = s.scenarioName;
             try {
@@ -137,15 +433,21 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
             catch { /* use scenarioName as fallback */ }
             const toolCalls = `  skyramp_batch_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${destinationHost}", baseURL: "${baseUrl}"${scenarioAuthRef}, steps: [\n${batchSteps}\n  ] })`;
             const prereqNote = s.category === "new_endpoint"
-                ? `\nPrerequisite discovery (MANDATORY for new_endpoint): Before executing these tool calls, read the source code for the new endpoint's request body. Look for FK fields (e.g. \`product_id\`, \`user_id\`, \`order_id\`). For each FK field found, prepend a step to the \`steps\` array in \`skyramp_batch_scenario_test_generation\` to create that prerequisite resource first, then chain its \`id\` into the dependent step. If no FK fields exist, proceed with the steps above as-is.`
+                ? `\nPrerequisite discovery (for new_endpoint): Before executing these tool calls, check the endpoint interactions in the analysis above for FK fields (e.g. \`product_id\`, \`user_id\`, \`order_id\`). If not present in the analysis, read the source code for the new endpoint's request body. For each FK field found, prepend a step to the \`steps\` array in \`skyramp_batch_scenario_test_generation\` to create that prerequisite resource first, then chain its \`id\` into the dependent step. If no FK fields exist, proceed with the steps above as-is.`
+                : "";
+            const bugLine = s.bugCatchingTarget
+                ? `Bug to catch: ${s.bugCatchingTarget}\n`
                 : "";
-            return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | priority=${item.priority} | ${item.novelty}\n` +
+            return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | ${item.novelty}\n` +
                 `Scenario: ${s.scenarioName} (${s.steps.length} steps)\n` +
+                bugLine +
                 `${stepLines}\n` +
                 `Tool calls:\n` +
                 `${toolCalls}\n` +
                 `  skyramp_integration_test_generation({ scenarioFile: <use the filePath returned by skyramp_batch_scenario_test_generation above>${authHeaderOnlyRef} })\n` +
-                `From source: requestBody shapes for POST/PUT/PATCH steps; responseBody shapes; authScheme` +
+                (s.source === "agent-enriched"
+                    ? `From source: authScheme (OpenAPI securitySchemes or auth middleware)`
+                    : `From source: requestBody shapes for POST/PUT/PATCH steps; responseBody shapes; authScheme`) +
                 prereqNote);
         }
     }).join("\n\n");
@@ -169,38 +471,45 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
         const target = s.steps.length === 1
             ? `${s.steps[0].method} ${s.steps[0].path} → ${s.steps[0].expectedStatusCode}`
             : `Scenario: ${s.scenarioName} (${s.steps.map(st => `${st.method} ${st.path}`).join(" → ")})`;
-        return `#${rank} [ADDITIONAL] | ${testType} | ${s.category} | priority=${item.priority} | ${item.novelty}\n  ${target}\n  Validates: ${s.description}`;
+        return `#${rank} [ADDITIONAL] | ${testType} | ${s.category} | ${item.novelty}\n  ${target}\n  Validates: ${s.description}`;
     }).join("\n\n");
     const uiSlotLine = needsUISlot ? (() => {
         const rank = maxGen + backendAdditionalItems.length + 1;
         const traceNote = hasTraces
             ? "Use an existing Playwright `.zip` trace from the repo."
             : "Record a trace using `browser_navigate` + `browser_snapshot` + `skyramp_export_zip`, then call `skyramp_ui_test_generation`.";
-        return `\n\n#${rank} [ADDITIONAL] | UI | workflow | priority=HIGH | new\n  Scenario: ui-interaction-for-changed-components (frontend files changed in this diff)\n  Validates: Component-level interaction flow for the changed UI — derive the scenario name and steps from the actual changed frontend files. ${traceNote}`;
+        return `\n\n#${rank} [ADDITIONAL] | UI | workflow | new\n  Scenario: ui-interaction-for-changed-components (frontend files changed in this diff)\n  Validates: Component-level interaction flow for the changed UI — derive the scenario name and steps from the actual changed frontend files. ${traceNote}`;
     })() : "";
     const e2eSlotLine = needsE2ESlot ? (() => {
         const rank = maxGen + backendAdditionalItems.length + (needsUISlot ? 1 : 0) + 1;
         const traceNote = hasTraces
             ? "Call `skyramp_e2e_test_generation` with the discovered trace/recording files."
             : "No traces exist yet — record a backend trace via `skyramp_start_trace_collection` + `skyramp_stop_trace_collection` and a UI trace via Playwright browser tools, then call `skyramp_e2e_test_generation`.";
-        return `\n\n#${rank} [ADDITIONAL] | E2E | workflow | priority=HIGH | new\n  Scenario: e2e-flow-for-changed-feature (frontend + backend files changed in this diff)\n  Validates: Full browser-level flow for the changed UI components end-to-end — derive the scenario name and steps from the actual changed frontend files. ${traceNote}`;
+        return `\n\n#${rank} [ADDITIONAL] | E2E | workflow | new\n  Scenario: e2e-flow-for-changed-feature (frontend + backend files changed in this diff)\n  Validates: Full browser-level flow for the changed UI components end-to-end — derive the scenario name and steps from the actual changed frontend files. ${traceNote}`;
     })() : "";
     const reservedUIGenCount = reserveUIGenSlot ? 1 : 0;
     const supplementCount = topN - generateItems.length - reservedUIGenCount - backendAdditionalItems.length - frontendSlots;
     const supplementNote = supplementCount > 0
-        ? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them from endpoint interactions and source code patterns not yet covered. Use the same 6-dimension rubric and quality gate to assign priority (HIGH/MEDIUM/LOW), testType, and category.${hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT produce fewer than ${topN} total. Do NOT supplement with tests whose primary endpoint and test type match a GENERATE item — those flows are already covered.`
+        ? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** For each new or changed endpoint in the GENERATE list, first read the diff source code and identify 2–3 boundary or variation scenarios specific to **this PR\'s logic** — e.g. formula parameters (discount math, price calculation), search/filter constraints (matching vs. empty results, missing required param), required field validation, or edge cases visible in the diff. Draft one scenario per variation. Only after exhausting PR-specific variations, add generic patterns (auth boundary → 401, non-existent ID → 404). Use the same 6-dimension rubric and quality gate to assign priority (HIGH/MEDIUM/LOW), testType, and category.${hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT supplement with tests whose primary endpoint and test type match a GENERATE item — those flows are already covered.`
         : "";
+    // ── PR / branch-diff mode: execution plan ────────────────────────────────
     return `## Execution Plan
-Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length + (reserveUIGenSlot ? 1 : 0)} generate + ${Math.max(topN - generateItems.length - (reserveUIGenSlot ? 1 : 0), 0)} additional = ${topN} total
+Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length + (reserveUIGenSlot ? 1 : 0) + (needsUIPlaceholder ? 1 : 0)} generate + ${Math.max(topN - generateItems.length - (reserveUIGenSlot ? 1 : 0) - (needsUIPlaceholder ? 1 : 0), 0)} additional = ${topN} total
-**Step 0 — Existing-test cross-check (MANDATORY before executing anything)**
+**Step 0 — Existing-test cross-check (before executing anything)**
 For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
-- **Contract tests**: If an existing contract test already covers that resource path → UPDATE the existing file instead of creating a new one. This does NOT count toward \`newTestsCreated\` — backfill from ADDITIONAL candidates to fill the open ADD slot.
+- **Contract tests**: If an existing contract test already covers that resource path → UPDATE the existing file instead of creating a new one. This does NOT count toward \`newTestsCreated\` — backfill from ADDITIONAL candidates to fill the open ADD slot using this priority order:
+  1. **BUG-CATCHING TESTS FIRST (CRITICAL)**: If source code analysis revealed a bug, logic error, or incorrect formula (e.g. discount math adding instead of subtracting, off-by-one errors, missing validation), CREATE A TEST THAT EXPOSES IT. The test SHOULD FAIL — that's the point. Document the bug. Example: if discount formula is wrong, test with discount=20% and assert correct math. If no bug found, skip to #2.
+  2. **PR-endpoint edge cases**: Look for integration test candidates covering error paths, boundary values, or alternative scenarios for the SAME endpoints changed in the PR diff. If no suitable candidate exists in ADDITIONAL, derive one from your source-code enrichment findings.
+  3. **Same-resource other scenarios**: Other HTTP methods or flows on the same resource group touched by the PR.
+  4. **Cross-resource workflows involving the PR endpoint**: Integration scenarios that include the PR's changed endpoint as one of the steps.
+  5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted or would only produce UPDATEs (not new files).
+  **Avoid backfilling with a test for a completely unrelated resource (e.g. \`POST /reviews\` when the PR only changes \`/orders\`) if any PR-endpoint edge-case integration test is feasible.**
 - **Integration/scenario tests**: Always generate as a new file via the scenario pipeline, even if an existing integration test covers the same resource. A new multi-step scenario is a distinct test. Count it toward \`newTestsCreated\`.
 - **UI tests**: Always generate as a new file. Count toward \`newTestsCreated\`.
-**Step 1 — Source-Code Enrichment (MANDATORY before executing anything)**
-Read the source code for ALL changed files. Look for:
+**Step 1 — Source-Code Enrichment (before executing anything)**
+Read the source code for ALL changed files. Before generating each recommendation, quote the relevant source code in a <source_evidence> block — include the route handler signature, request body schema fields, response shape, and any computed field formulas. Use these quotes to derive tool call parameters. Look for:
 - **Auth middleware** (passport, jwt.verify, authMiddleware, @requires_auth, Depends(get_current_user), @UseGuards, EnsureSessionDep, session middleware) — if found, override \`authHeader\` and \`authScheme\` in scenario and contract tool calls even if workspace.yml says authType: none. Exception: for \`skyramp_integration_test_generation\` with \`scenarioFile\`, omit auth params entirely if workspace has \`api.authType\` set (workspace handles it); if workspace has no \`authType\`, pass \`authHeader\` only.
 - Business rules and formulas (e.g. total_cost = compute * rate + memory * rate)
 - State transitions and domain constraints (e.g. budget cannot drop below current spend)
@@ -225,15 +534,13 @@ Assign category: ${TEST_CATEGORIES.join(" | ")}
 ${buildTestPatternGuidelines()}
-${buildTestExamples()}
 INSERT a source-code-derived candidate into the ranked list **only if ALL three conditions are met**:
 1. Priority is HIGH (it guards a critical boundary or would prevent a production incident)
 2. It is specific to THIS codebase — derived from a concrete business rule, formula, or constraint found in the changed files (not a general pattern that applies to any API)
 3. It is not already covered by a structural candidate in the list below
 If these conditions are not met, add it to ADDITIONAL only — do NOT displace a pre-ranked GENERATE item.
-**CRITICAL-tier items (category: new_endpoint) can NEVER be displaced** — they test the actual endpoints introduced in this PR and must always occupy GENERATE slots.
+**CRITICAL-tier items (category: new_endpoint) should never be displaced** — they test the actual endpoints introduced in this PR and must always occupy GENERATE slots.
 When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW; within the same priority, source-code-derived candidates go BEFORE structural ones. Re-number ranks after insertion. The top ${backendGenCount} become backend GENERATE items.${reserveUIGenSlot ? " The final GENERATE slot is reserved for a UI test and is not taken from this ranked list." : ""}
@@ -241,7 +548,23 @@ When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW;
 **Unique constraints:** Unique-constraint scenarios (duplicate POST → expect 409) are pre-drafted for all resources. Before keeping them, check whether the storage backend actually enforces uniqueness — look for SQL \`UNIQUE\` indexes, Mongoose \`unique: true\`, Prisma \`@unique\`, or explicit duplicate-check logic in the source. If the backend is Redis, an in-memory store, or a schema-less DB with no explicit unique constraint in the changed files, move the unique-constraint scenario to ADDITIONAL with a note that enforcement is unconfirmed — do NOT generate it as a GENERATE item.
-**Step 2 — Execute merged plan in rank order**
+**Step 2 — Diversity check (using enriched knowledge from Step 1)**
+Review the GENERATE list and verify that each item exercises a **distinct code path** — not just different input values on the same path.
+**What NOT to do (these are all violations — if you catch yourself doing any of these, STOP and replace one item):**
+- Do NOT generate two integration tests that both send a successful PUT/PATCH to the same endpoint and only differ in the request body values (e.g. 10% discount vs 5% discount vs 100% discount — these are the SAME test with different numbers)
+- Do NOT generate two tests with the same step sequence (e.g. both are POST→PUT→GET or both are POST→PUT) where the only variation is the payload
+- Do NOT count a "boundary value" as a separate test if the code path is identical to the happy path (e.g. discount=100% still returns 200 just like discount=10% — that is the same code path)
+- Do NOT use different scenario names to disguise duplicate tests (e.g. "orders-put-add-items-recalculate" and "orders-put-new-endpoint-happy-path" are duplicates if both POST an order then PUT with items and expect 200)
+**What TO do — each GENERATE item must exercise a different code path. Good diversity means a mix of:**
+- One **happy-path** integration test (the richest scenario: create prerequisites → call the new endpoint → verify computed fields and child collections)
+- One **error-path** test (trigger a distinct HTTP error status: 404 for non-existent resource, 422 for invalid input, 400 for malformed request — pick whichever the source code actually handles)
+- One **state-variation** test (different operation on the same endpoint that hits different logic: empty items array, removing items instead of adding, updating quantity without changing products)
+For each duplicate pair found, keep the richer item and replace the other with a test from a different category above. The replacement still targets the same PR endpoint and counts as a GENERATE item. Move the displaced item to ADDITIONAL.
+**Step 3 — Execute merged plan in rank order**
 Replace any scenario that pairs unrelated resources with one reflecting actual FK relationships in the codebase.
 Use realistic request bodies from source code schemas; verify response data (not just status codes).
@@ -258,34 +581,34 @@ ${buildGenerationRules(isUIOnlyPR)}
 **Never mark a recommendation "blocked":** No OpenAPI spec → use source code for shapes. No traces → provide \`skyramp_start_trace_collection\` instructions. No backend trace → use the scenario pipeline.
-**Critical-category minimum:** At least ${Math.min(MAX_CRITICAL_TESTS, maxGen)} of the ${maxGen} GENERATE items MUST be from HIGH-priority categories (security_boundary, business_rule, data_integrity, breaking_change). The pre-ranked plan below already prioritises this — only override if source-code enrichment reveals a higher-value candidate.
+**Critical-category minimum:** At least ${Math.min(MAX_CRITICAL_TESTS, maxGen)} of the ${maxGen} GENERATE items should be from HIGH-priority categories (security_boundary, business_rule, data_integrity, breaking_change). The pre-ranked plan below already prioritises this — only override if source-code enrichment reveals a higher-value candidate.
-### GENERATE (process these EXACTLY as listed, in order — do NOT reorder or replace any item with a different scenario; if Step 0 converts an item to UPDATE, backfill the ADD slot from ADDITIONAL)
+### GENERATE (process these EXACTLY as listed, in order — after completing Steps 0–2 above; if Step 0 converts an item to UPDATE, backfill the ADD slot from ADDITIONAL following the priority order in Step 0)
-${generateBlocks || "  (no pre-ranked generate items — draft your own based on endpoint analysis)"}${reserveUIGenSlot ? `
+${generateBlocks || (needsUIPlaceholder ? uiPlaceholderBlock : "  (no pre-ranked generate items — draft your own based on endpoint analysis)")}${reserveUIGenSlot ? `
-**#${generateItems.length + 1} — GENERATE** | UI | workflow | priority=HIGH | new
+**#${generateItems.length + 1} — GENERATE** | UI | workflow | new
 Scenario: ui-interaction-for-changed-components (frontend files changed in this diff)
 Record a browser trace for the changed UI components, then generate a UI test.
 Steps: browser_navigate → browser_snapshot → interact with changed components → browser_assert → skyramp_export_zip → skyramp_ui_test_generation
 This slot is RESERVED — you MUST attempt a UI test here. Only skip if browser_navigate fails (app unreachable).` : ""}
-**COMPLIANCE CHECK**: Before proceeding, verify your generate list matches the items above. If you plan to generate a scenario with a DIFFERENT name than what is listed (e.g. you want to generate "order-update-discount-calculation" but the plan says "orders-patch-add-items-recalculate"), STOP — use the plan's scenario name and steps. Add your alternative to ADDITIONAL instead. One retry on failure then skip to next item.
+**COMPLIANCE CHECK**: Before proceeding, verify your generate list matches the items above. If you plan to generate a scenario with a different name than what is listed (e.g. you want to generate "order-update-discount-calculation" but the plan says "orders-patch-add-items-recalculate"), STOP — use the plan's scenario name and steps. Add your alternative to ADDITIONAL instead. One retry on failure then skip to next item.
 ### ADDITIONAL (list in additionalRecommendations in this order after Step 1 insertion)
 ${additionalLines || "  (none pre-ranked)"}${uiSlotLine}${e2eSlotLine}
 ${supplementNote}
-**You MUST produce EXACTLY ${topN} total recommendations: ${generateItems.length + (reserveUIGenSlot ? 1 : 0)} to generate + ${Math.max(topN - generateItems.length - (reserveUIGenSlot ? 1 : 0), 0)} as additionalRecommendations. Do NOT produce fewer. Generate recommendations now.**
+${buildBudgetMandate(topN, generateItems.length + (reserveUIGenSlot ? 1 : 0) + (needsUIPlaceholder ? 1 : 0))}
 ## Recommendation Stability
 - **Carry forward** previous additionalRecommendations that still apply — match by scenarioName (multi-step) or endpoint (single-endpoint). Re-derive category and priority from test content.
 - **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
 - **Only add** new recommendations for code paths introduced since the last run.`;
 }
-export function buildRecommendationPrompt(analysis, analysisScope = "full_repo", topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType, maxGenerateOverride) {
-    const isDiffScope = analysisScope === "current_branch_diff";
+export function buildRecommendationPrompt(analysis, analysisScope = AnalysisScope.FullRepo, topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType, maxGenerateOverride, sessionId) {
+    const isDiffScope = isDiff(analysisScope);
     const diffContext = analysis.branchDiffContext;
     const openApiSpec = analysis.artifacts?.openApiSpecs?.[0];
     // ── Filter out bot-generated test files from changedFiles ──
@@ -365,7 +688,7 @@ Modified endpoints:
 ${fmtEps(diffContext.modifiedEndpoints, (m) => `${m.sourceFile}, ${m.changeType}`)}
 Affected services: ${diffContext.affectedServices.join(", ") || "N/A"}
-**CRITICAL**: Focus on tests that validate these changes and how they interact with existing resources.
+Focus on tests that validate these changes and how they interact with existing resources.
 `;
     }
     // ── Interactions ──
@@ -405,7 +728,7 @@ ${detailBlocks}
     // ── Scoring ──
     const endpointCount = allEndpoints.reduce((acc, ep) => acc + (ep.methods ?? []).length, 0);
     const baseMaxGen = Math.min(Math.max(maxGenerateOverride ?? (isDiffScope ? MAX_TESTS_TO_GENERATE : topN), 0), topN);
-    const maxGen = isUIOnlyPR ? (hasTraces ? baseMaxGen : 0) : baseMaxGen;
+    const maxGen = isUIOnlyPR ? Math.max(baseMaxGen, 1) : baseMaxGen;
     const scenarios = analysis.businessContext.draftedScenarios;
     let scored = [];
     let seed = "";
@@ -460,7 +783,7 @@ No backend API changes detected. Generate UI/E2E tests from the available traces
 1. Call \`skyramp_ui_test_generation\` or \`skyramp_e2e_test_generation\` using the trace files
 2. Generate exactly ${maxGen} tests targeting the changed UI flows
-**You MUST produce EXACTLY ${topN} total recommendations: ${maxGen} to generate + ${topN - maxGen} as additionalRecommendations. Do NOT produce fewer. Generate recommendations now.**
+${buildBudgetMandate(topN, maxGen)}
 Do not churn recommendations without cause.
 ` : `
@@ -468,29 +791,31 @@ Do not churn recommendations without cause.
 **Budget: ${maxGen} generate + ${topN - maxGen} additional = ${topN} total**
-No backend API changes detected and no traces available. Do NOT generate or execute any tests.
-All ${topN} recommendations go into additionalRecommendations only.
+No backend API changes detected. Record browser traces now and generate UI tests.
-Draft ${topN} UI and E2E test recommendations covering:
-- Component rendering (correct copy, classes, layout for changed components)
-- User interactions (clicks, form submissions, navigation triggered by changed UI)
-- Empty/error states (if the PR touches empty state or error UI)
-- Cross-browser or responsive behavior (if applicable)
-- Full E2E user journeys that exercise the changed UI flows end-to-end
+**Generate ${maxGen} UI test(s) using browser tools (AI-driven recording):**
+1. Read the changed frontend files to identify which components changed and what interactions they support
+2. \`browser_navigate({ url: "<frontend_url_from_workspace_config>" })\`
+3. Interact with the changed UI components (\`browser_click\`, \`browser_type\`, \`browser_fill_form\`, etc.)
+4. \`browser_snapshot()\` after each interaction that changes the page
+5. \`skyramp_export_zip({ outputPath: "<repositoryPath>/.skyramp/<component>_trace.zip" })\` — use an **absolute** path
+6. \`skyramp_ui_test_generation({ playwrightInput: "<absolute_path_to_zip>" })\`
-**To generate UI/E2E tests** (when ready):
-1. Call \`skyramp_start_trace_collection\` (playwright: true)
-2. Exercise the changed UI flows in the browser
-3. Call \`skyramp_stop_trace_collection\`
-4. Generate with \`skyramp_ui_test_generation\` or \`skyramp_e2e_test_generation\`
-**You MUST produce EXACTLY ${topN} total recommendations in additionalRecommendations. Do NOT produce fewer. Generate recommendations now.**
+${buildBudgetMandate(topN, maxGen)}
 Do not churn recommendations without cause.
 `;
     }
     else if (scored.length > 0) {
-        mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces);
+        const projectType = analysis.projectClassification.projectType;
+        const isFrontendProject = projectType === "full-stack" || projectType === "frontend";
+        const isFrontendOnlyProject = projectType === "frontend";
+        if (!isDiffScope) {
+            mainSection = buildFullRepoRecommendations(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, isFrontendProject, isFrontendOnlyProject);
+        }
+        else {
+            mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces);
+        }
     }
     else {
         mainSection = `
@@ -502,7 +827,7 @@ No pre-drafted scenarios available. Draft ${maxGen} tests from your analysis of
 For each test: pick the highest-impact endpoint(s), draft a realistic scenario with actual request/response shapes from source code, and execute the same pipeline described in Tool Workflows below.
-**You MUST produce EXACTLY ${topN} total recommendations: ${maxGen} to generate + ${topN - maxGen} as additionalRecommendations. Do NOT produce fewer. Generate recommendations now.**
+${buildBudgetMandate(topN, maxGen)}
 ## Recommendation Stability
 - **Carry forward** previous additionalRecommendations that still apply — match by scenarioName (multi-step) or endpoint (single-endpoint). Re-derive category and priority from test content.
@@ -592,34 +917,64 @@ coverage and to fill gaps:
 ${historyBody}`;
     }
     // ── Compose all sections ──
+    // Long-context best practice: all data at top, all instructions + query at bottom.
+    // See: https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/claude-prompting-best-practices#long-context-prompting
     const scopeNote = isDiffScope
         ? "Scoped to current branch changes."
         : "Covers the full repository.";
     return `
+${buildArchitectPreamble(isDiffScope)}
 ${modePreamble}
 Scope: ${scopeNote}
 ${sourcePriority}
+<repository_context>
 ## Repository Context
 ${repoContext}
 ${specNote}
-${diffSection}
+</repository_context>
+${diffSection ? `<branch_diff>\n${diffSection}\n</branch_diff>` : ""}
+<endpoint_interactions>
 ${interactionSection}
-${mainSection}
-${prHistorySection}
+</endpoint_interactions>
+<existing_tests>
 ## Existing Tests
 - Frameworks: ${analysis.existingTests.frameworks.join(", ") || "none"}
 ${formatTestLocations(analysis.existingTests.testLocations)}
+</existing_tests>
+${prHistorySection ? `<pr_history>\n${prHistorySection}\n</pr_history>` : ""}
+<instructions>
+${buildContextFetchingGuidance(sessionId)}
+${buildReasoningProtocol()}
+${isDiffScope ? buildFewShotExamples() : ""}
-${isUIOnlyPR
-        ? `## How to Generate Tests — Tool Workflows
+${isDiffScope
+        ? (isUIOnlyPR
+            ? `## How to Generate Tests — Tool Workflows
+**For UI tests (AI-driven recording):**
+1. \`browser_navigate({ url: "<frontend_url>" })\`
+2. Interact with changed components (\`browser_click\`, \`browser_type\`, \`browser_fill_form\`, etc.)
+3. \`browser_snapshot()\` after each interaction
+4. \`skyramp_export_zip({ outputPath: "<repositoryPath>/.skyramp/<component>_trace.zip" })\` — absolute path
+5. \`skyramp_ui_test_generation({ playwrightInput: "<absolute_path_to_zip>" })\`
+**For E2E tests:** Same browser recording flow, then call \`skyramp_e2e_test_generation\` with the zip.`
+            : buildToolWorkflows(authHeaderValue, authTypeValue))
+        : ""}
+${mainSection}
-**For UI tests:** \`skyramp_start_trace_collection\` (playwright: true) → perform browser steps → \`skyramp_stop_trace_collection\` → \`skyramp_ui_test_generation\` with the playwright zip.
-**For E2E tests:** Same trace flow, pass both trace file and playwright zip to \`skyramp_e2e_test_generation\`.
-Without traces, list as additionalRecommendations with instructions to record traces first.`
-        : buildToolWorkflows(authHeaderValue, authTypeValue)}
+${isDiffScope ? buildVerificationChecklist(topN, maxGen) : ""}
+</instructions>
 `;
 }