npm - @skyramp/mcp - Versions diffs - 0.1.2 → 0.1.4 - Mend

@skyramp/mcp 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/build/prompts/test-maintenance/driftAnalysisSections.js +2 -2
package/build/prompts/test-recommendation/analysisOutputPrompt.js +26 -21
package/build/prompts/test-recommendation/recommendationSections.js +42 -10
package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +2 -5
package/build/prompts/test-recommendation/test-recommendation-prompt.js +114 -157
package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +250 -18
package/build/prompts/testbot/testbot-prompts.js +17 -9
package/build/services/ScenarioGenerationService.js +2 -1
package/build/services/TestDiscoveryService.js +22 -7
package/build/services/TestDiscoveryService.test.js +44 -0
package/build/tools/generate-tests/generateBatchScenarioRestTool.js +3 -4
package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +9 -0
package/build/tools/submitReportTool.js +4 -3
package/build/tools/submitReportTool.test.js +16 -2
package/build/tools/test-management/analyzeChangesTool.js +264 -140
package/build/tools/test-management/analyzeChangesTool.test.js +3 -1
package/build/tools/test-management/analyzeTestHealthTool.js +5 -0
package/build/types/RepositoryAnalysis.js +8 -0
package/build/types/TestRecommendation.js +2 -0
package/build/utils/branchDiff.js +24 -8
package/build/utils/featureFlags.js +25 -0
package/build/utils/httpDefaults.js +12 -0
package/build/utils/repoScanner.js +16 -2
package/build/utils/routeParsers.js +79 -79
package/build/utils/routeParsers.test.js +192 -66
package/build/utils/scenarioDrafting.js +116 -497
package/build/utils/scenarioDrafting.test.js +260 -480
package/package.json +1 -1

package/build/prompts/test-maintenance/driftAnalysisSections.js CHANGED Viewed

@@ -143,8 +143,8 @@ When a diff adds a new HTTP method to a resource, UPDATE covers **all** existing
 ### PATCH/PUT with child collections (MANDATORY)
 When updating a contract or integration test for a PATCH or PUT endpoint whose request/response includes a child collection array (e.g. \`items\`, \`products\`, \`line_items\`):
-1. The request body MUST include the child array with at least one item containing the FK field (e.g. \`product_id\`) and a \`quantity\` field.
-2. Assert each item's FK field and \`quantity\` match the sent values.
+1. The request body MUST include the child array with at least one item containing the Foreign Key field (e.g. \`product_id\`) and a \`quantity\` field.
+2. Assert each item's Foreign Key field and \`quantity\` match the sent values.
 3. Assert the top-level computed total (e.g. \`total_amount\`) equals the expected math from the items.
 A test that only sends/asserts metadata (discount, status, notes) without asserting the items array is INCOMPLETE and will produce false passes even when the items/total logic is broken.

package/build/prompts/test-recommendation/analysisOutputPrompt.js CHANGED Viewed

@@ -52,10 +52,10 @@ The ranked test recommendation catalog is pre-built and shown below (after the s
 **Your only job is to present it.**
 1. Fill in every \`<…from source>\` placeholder using the field names, computed formulas, and auth details you found in Steps 1–2.
-2. Output the completed catalog **exactly as formatted — grouped by test type (### E2E / ### UI / ### Integration / ### Contract)**. Do NOT restructure, reorder, rename sections, or generate a new format.
+2. Output the completed catalog **exactly as formatted**, preserving whatever test-type section headings are already present in the catalog. Do NOT restructure, reorder, rename sections, invent missing sections, or generate a new format.
 3. Do NOT call any Skyramp generation tools. The catalog shows ready-to-use tool calls that can be executed on demand.
-**If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or FK relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
+**If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or Foreign Key relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
         const hasJavaFiles = p.candidateRouteFiles?.some(f => /\.(java|kt)$/.test(f)) ?? false;
         const routeFilesSection = p.candidateRouteFiles && p.candidateRouteFiles.length > 0
             ? `\nRoute/controller files found by static scan (read these to discover endpoints — the regex-based catalog below may be incomplete for your framework):\n${p.candidateRouteFiles.map(f => `- ${f}`).join("\n")}\n`
@@ -79,19 +79,23 @@ For GET list endpoints: identify query params (\`limit\`, \`offset\`, \`order\`,
 ${nextStep}`;
     }
     const changedFiles = p.parsedDiff?.changedFiles.join(", ") ?? "";
-    // Whether the regex pre-detected any API endpoints — used as a hint only.
-    // Step 2 always asks the LLM to extract endpoints from the diff so unknown
-    // frameworks (e.g. Spring class-level @RequestMapping, Django, Rails) are
-    // covered even when the static regex returns nothing.
-    const regexFoundEndpoints = p.parsedDiff && (p.parsedDiff.newEndpoints.length > 0 || p.parsedDiff.modifiedEndpoints.length > 0);
+    // Whether the scanner found API endpoints in any changed file.
+    const preDetectedEndpoints = p.parsedDiff && (p.parsedDiff.newEndpoints.length > 0 || p.parsedDiff.modifiedEndpoints.length > 0 || (p.parsedDiff.removedEndpoints?.length ?? 0) > 0);
     const diffFiles = p.parsedDiff?.changedFiles ?? [];
     const isUIOnly = diffFiles.length > 0 &&
-        !regexFoundEndpoints &&
+        !preDetectedEndpoints &&
         diffFiles.every(f => FRONTEND_EXT.test(f));
     const diffHasJavaFiles = diffFiles.some(f => /\.(java|kt)$/.test(f));
-    const diffSection = p.diffContent
-        ? `\n<diff>\n${p.diffContent}\n</diff>`
-        : "";
+    // Inline small diffs so the LLM sees them without a tool call. Large diffs
+    // stay as a temp file reference to avoid bloating the prompt.
+    const INLINE_DIFF_LIMIT = 12_000; // chars — roughly 300 lines
+    const canInline = p.diffContent && p.diffContent.length <= INLINE_DIFF_LIMIT;
+    const diffFileRef = canInline
+        ? `\n<diff>\n${p.diffContent}\n</diff>\n`
+            + (p.diffFilePath ? `Full diff also available at \`${p.diffFilePath}\`.\n` : "")
+        : p.diffFilePath
+            ? `\n**Full diff file**: \`${p.diffFilePath}\` — **you MUST read this file before proceeding to Step 2.** It contains the complete unified diff for this PR.\n`
+            : "";
     const step2 = isUIOnly
         ? `### Step 2: Identify consumed API endpoints and integration status
 UI-only PR — perform two checks:
@@ -105,26 +109,28 @@ If no production file imports, re-exports, or renders a changed component, mark
 Exception: if the same PR also adds a route/page file (e.g. under Next.js \`pages/\` or \`app/\`) that imports the component, the route IS the integration point — do NOT mark it as unintegrated.
 Do NOT apply the unintegrated heuristic to route/entrypoint files themselves — those are always reachable by convention.
 An unintegrated non-route component has no DOM node in the running app and cannot be browser-tested — it qualifies as a dead-code / unintegrated-component no-surface PR regardless of how complex the component logic is.`
-        : p.diffContent
-            ? `### Step 2: Extract new and modified API endpoints from the diff
-Read the \`<diff>\` above and identify every new or modified API endpoint — route registrations, handler methods, controller annotations. Then use the **Router Mounting / Nesting** section above to reconstruct the full URL path for each endpoint by chaining all parent router prefixes down to the handler (e.g. a handler in a file with \`prefix="/reviews"\` that is mounted at \`/{product_id}\` under a router mounted at \`/api/v1/products\` → full path \`/api/v1/products/{product_id}/reviews\`).
+        : (canInline || p.diffFilePath)
+            ? `### Step 2: Extract new, modified, and removed API endpoints from the diff
+${canInline ? "Read the `<diff>` above" : `Read the diff file at \`${p.diffFilePath}\``} and identify every new or modified API endpoint — route registrations, handler methods, controller annotations. Then use the **Router Mounting / Nesting** section above to reconstruct the full URL path for each endpoint by chaining all parent router prefixes down to the handler (e.g. a handler in a file with \`prefix="/reviews"\` that is mounted at \`/{product_id}\` under a router mounted at \`/api/v1/products\` → full path \`/api/v1/products/{product_id}/reviews\`).
 ${diffHasJavaFiles ? JAVA_SPRING_NOTE : ""}
 For each endpoint found: note the HTTP method, full path, and source file.
-${regexFoundEndpoints ? "The static analysis above pre-detected some endpoints — verify and augment with anything it missed." : "The static analysis did not detect endpoints for this framework — rely on the diff to extract them."}
+${preDetectedEndpoints ? "The endpoint catalog above already lists some changed endpoints — verify and augment with anything it missed." : "No endpoints were pre-detected in the changed files — extract them from the diff."}
+**Also identify removed endpoints**: Look for deleted route annotations (lines starting with \`-\` in the diff) in modified files (files that still exist but had routes deleted). A removed endpoint is a route definition present in the base branch but absent in the current branch. Cross-reference against the scanned endpoint listing below — if a deleted route annotation's endpoint still appears there (e.g. moved to another file), it is NOT removed. Only flag endpoints that are truly gone from the codebase.
 **CRITICAL — Query params vs body:** For GET endpoints (especially search/filter/list),
 identify which parameters are URL query params vs request body. Look at framework-specific
 annotations (FastAPI \`Query()\`, Express \`req.query\`, Spring \`@RequestParam\`, etc.).
 Pass these as \`queryParams\` (not \`requestBody\`) when generating scenarios.`
-            : `### Step 2: Extract new and modified API endpoints from source files
+            : `### Step 2: Extract new, modified, and removed API endpoints from source files
 No diff was available — read the changed source files listed above directly to identify new or modified API endpoints. Use the **Router Mounting / Nesting** section to reconstruct full paths.
 ${diffHasJavaFiles ? JAVA_SPRING_NOTE : ""}
-For each endpoint found: note the HTTP method, full path, and source file.`;
+For each endpoint found: note the HTTP method, full path, and source file.
+Also compare against the endpoint catalog to identify any endpoints that appear in the catalog but are no longer present in the source files — these are removed endpoints.`;
     const criticalPatternStep = `### Step 2.5: Identify critical patterns for test categorization
 Look for these patterns in model/schema/handler files to inform test recommendations:
 - **Unique constraints**: \`@unique\`, \`unique: true\`, unique indexes, \`.refine()\` uniqueness checks, \`UNIQUE\` in SQL migrations
 - **Cascade deletes**: \`ON DELETE CASCADE\`, \`.onDelete("cascade")\`, manual cascade logic in delete handlers
 - **Permission checks**: auth middleware, ownership guards (\`req.user.id === resource.ownerId\`), role-based access control, \`isOwner\` assertions
-- **Breaking changes in diff**: route renames, auth header changes, removed required fields, changed status codes
+- **Breaking changes in diff**: route renames, deleted route definitions (endpoints removed from modified files), auth header changes, removed required fields, changed status codes
 Tag each finding with its category (security_boundary, business_rule, data_integrity, breaking_change) for the recommendation step.`;
     const step3Content = useHealthFlow
         ? `### Step 3: Identify tests at risk of drift
@@ -160,8 +166,7 @@ Call \`skyramp_recommend_tests\` with:
     return `## Your Task — Enrich & Recommend (PR-scoped)
 ### Step 1: Read the changed files and diff
-${changedFiles}${diffSection}
+${changedFiles}${diffFileRef}
 ${buildPathResolutionTableStep(p)}${step2}
 ${criticalPatternStep}
@@ -186,7 +191,7 @@ ${p.routerMountContext.map(f => `- \`${f}\``).join("\n")}`
 **Session ID**: \`${p.sessionId}\`
 **Repository**: \`${p.repositoryPath}\`
 **Analysis Scope**: \`${p.analysisScope}\`
-${isDiffScope ? `**Diff endpoints**: ${(p.parsedDiff?.newEndpoints.length ?? 0) + (p.parsedDiff?.modifiedEndpoints.length ?? 0)}` : `**Pre-scanned endpoints**: ${p.scannedEndpoints.length}`}
+${isDiffScope ? `**Diff endpoints**: ${(p.parsedDiff?.newEndpoints.length ?? 0) + (p.parsedDiff?.modifiedEndpoints.length ?? 0) + (p.parsedDiff?.removedEndpoints?.length ?? 0)}` : `**Pre-scanned endpoints**: ${p.scannedEndpoints.length}`}
 ${routerSection}
 ${enrichment}

package/build/prompts/test-recommendation/recommendationSections.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { isContractConsumerModeEnabled } from "../../utils/featureFlags.js";
+import { isContractConsumerModeEnabled, resolveServiceDetailsRef } from "../../utils/featureFlags.js";
 import { WorkspaceAuthType, getAuthScheme, isAuthorizationHeaderName, AUTH_MIDDLEWARE_PATTERNS_STR } from "../../utils/workspaceAuth.js";
 // Cached at module-load — the flag is process-wide and cannot change per call.
 const CONSUMER_MODE_ENABLED = isContractConsumerModeEnabled();
@@ -42,13 +42,45 @@ Before calling any tool, replace every \`<from source>\` placeholder in the tool
 }
 export function buildReasoningProtocol() {
     return `<reasoning_protocol>
+## Coverage Reasoning Block (MANDATORY — complete BEFORE your Budget Plan)
+Before committing to a Budget Plan and test list, produce a <thinking> block that enumerates ALL testable surfaces introduced or affected by this PR. This prevents narrow focus on a single endpoint/method.
+**For backend-only PRs**, your thinking MUST cover:
+1. **All HTTP methods affected** — if a new validation/service method is added, trace ALL callers (not just createOne — also updateOne, updateMany, deleteOne). List every HTTP method × endpoint pair.
+2. **Error paths per method** — for each endpoint-method, what error codes does the source code return? (400, 401, 403, 404, 409, 422). Each distinct error path is a potential test.
+3. **Cross-service impact** — does the change affect other services that import the modified module? Those endpoints need coverage too.
+4. **Data migrations** — if a migration exists, can its effect be verified via an API call? (e.g. backfill → GET should return the backfilled value)
+**For frontend-only PRs**, your thinking MUST cover:
+1. **Component integration** — which routes render the changed component? Each route is a test target.
+2. **User interactions** — what actions can a user perform on the changed component? (click, type, select, drag). Each distinct action flow is a test.
+3. **State variations** — what different states does the component render? (empty, loading, error, populated, edge values)
+**For mixed (frontend + backend) PRs**, your thinking MUST cover:
+1. All backend surfaces (methods 1–4 above)
+2. All frontend surfaces (methods 1–3 above)
+3. **E2E bridges** — which frontend components call the changed backend endpoints? Those are E2E test candidates that cover both layers in one test.
+**Output format in your thinking block:**
+\`\`\`
+Testable surfaces:
+- POST /permissions → happy path (201), invalid fields (422), missing collection (400)
+- PATCH /permissions/:id → update with valid fields (200), update with invalid fields (422)
+- GET /items/:collection?aggregate → with allowed fields (200), with forbidden fields (403)
+- UI: permissions field selector → add field, remove field, wildcard toggle
+Total distinct surfaces: N
+\`\`\`
+Your Budget Plan total MUST be ≥ the number of GENERATE slots and reflect the breadth of surfaces found. If you found 8 distinct surfaces but only budget 3 tests, you are under-covering the PR.
 ## Parameter Grounding Rule
 Before each GENERATE tool call, confirm WHERE each key value comes from:
 - **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec. **The generation tool rejects empty \`{}\` request bodies for POST/PUT/PATCH** — read the source schema first if the fields are unknown.
 - **endpointURL** → workspace \`baseUrl\` + endpoint path (both required — never path alone)
 - **authHeader / authScheme** → workspace config or OpenAPI \`securitySchemes\`
-- **FK path params** → chained from a prior step's response \`id\` field — not hardcoded
+- **Foreign Key path params** → chained from a prior step's response (check the actual field name — it may be \`id\`, \`uuid\`, \`_id\`, or a resource-specific \`*_id\` field). The chaining source can be a response body (POST or GET), a response header (e.g. \`Location\`), or a cookie — not hardcoded
 - **Names / string values** → realistic; append timestamp suffix to avoid re-run conflicts
 ## Ranking Rule
@@ -110,11 +142,11 @@ export function buildTestPatternGuidelines() {
 - **Middleware chains**: If auth/rate-limit/logging middleware exists, test the chain (e.g., rate limit hit → auth still checked → correct error returned)
 - **N+1 query risk**: If list endpoints join related data (e.g., orders with products), test with large datasets
 - **State machines**: If resources have status transitions (draft→published→archived), test invalid transitions (e.g., archived→draft should fail)
-- **Cascade deletes**: Only recommend after reading source code to confirm which resource holds the FK. The resource with the FK is the child; the one it points to is the parent. Example: if orders.product_id references products, then products is the parent — deleting a product tests whether orders are protected or cascade-deleted. Getting this backwards (treating the child as the parent) produces a nonsensical test.
+- **Cascade deletes**: Only recommend after reading source code to confirm which resource holds the Foreign Key. The resource with the Foreign Key is the child; the one it points to is the parent. Example: if orders.product_id references products, then products is the parent — deleting a product tests whether orders are protected or cascade-deleted. Getting this backwards (treating the child as the parent) produces a nonsensical test.
 - **Race conditions**: If concurrent writes are possible (inventory deduction, counter increment), test concurrent requests
 - **Computed fields**: If response contains derived values (total, average, count), verify computation with known inputs (e.g., total_cost = compute_seconds * rate + memory_mb * rate + external_cost)
 - **Mutation with collection modification**: If PUT/PATCH endpoints accept arrays of child items (e.g., order line items, cart products, invoice entries), test adding/removing items and verify that derived totals (e.g., total_amount, subtotal, item_count) are recalculated correctly. This is the most common source of user-reported bugs — always prioritize it for GENERATE over simple field-update tests.
-    The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
+    The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with Foreign Key references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its Foreign Key/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child Foreign Key fields match chained IDs, quantities match sent values, and totals match the computation from the source code
 - **Webhook/event side effects**: If endpoints trigger async operations, test that side effects occur (e.g., POST /orders triggers notification)
 - **Cross-user isolation**: If resources are owned by users, test that user B cannot access/modify user A's resources (GET /users/{other_id}/data → 403 Forbidden)
 - **Range/boundary invariants**: If business rules cap values (max retries, min balance, discount ≤ subtotal), test the boundary (e.g., set retries to max+1 → expect rejection)
@@ -128,7 +160,7 @@ that step B depends on (e.g., create product → create order referencing that p
 verify order contains correct product). Single-resource CRUD alone is not an integration test.
 Use actual field names and values from the source code schema or OpenAPI schema (not \`{}\` or invented field names); verify response data, not just status codes.
 When a PUT/PATCH updates a resource with child collections (e.g., order items), the request body
-MUST include the child array with FK references chained from prior steps — and assertions MUST
+MUST include the child array with Foreign Key references chained from prior steps — and assertions MUST
 verify the actual child items in the response (product_id, quantity, unit_price), not just
 top-level metadata like discount or status.
@@ -182,7 +214,7 @@ Before finalizing your output, verify:
 6. **Real request shapes**: requestBody for POST/PUT/PATCH uses actual field names from source (not \`{}\`). GET search/filter uses \`queryParams\`, not \`requestBody\`.
 7. **scenarioFile**: \`skyramp_integration_test_generation\` uses the exact \`filePath\` returned by \`skyramp_batch_scenario_test_generation\` — not a guessed or hardcoded filename.
 8. **bugCatchingTarget**: Every GENERATE integration test that targets a business rule, formula, or constraint has a non-empty \`bugCatchingTarget\`.
-9. **FK chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
+9. **Foreign Key chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
 10. **Concrete scenario names**: No GENERATE item uses a placeholder name ending in a numeric suffix (e.g. \`ui-test-for-changed-component-1\`, \`ui-test-from-trace-2\`). Derive the name from the actual changed component or flow: if the diff touches \`LinkCard.tsx\`, the scenario name should be \`link-card-pin-toggle\` or \`link-card-edit-description\`, not \`ui-test-for-changed-component-1\`. The changed file list is available above — use it.
 </verification>`;
 }
@@ -193,7 +225,7 @@ export function buildFewShotExamples() {
 **Parameter grounding**:
 - baseURL: "http://localhost:8000" (workspace api.baseUrl)
 - steps[0].requestBody fields "name", "price": ProductCreate schema fields (src/models/product.py)
-- steps[1].requestBody "product_id": FK to products — chained from step 0 response id
+- steps[1].requestBody "product_id": Foreign Key to products — chained from step 0 response id
 - steps[1].requestBody "quantity": OrderCreate schema field (src/models/order.py)
 - responseBody "total_amount": 89.97 = 29.99 × 3 — from order total formula (src/services/order_service.py: total = sum(item.price * item.quantity))
 - authHeader/authScheme: workspace config (Authorization / Bearer)
@@ -311,7 +343,7 @@ ${authGuidance}
 **For multi-endpoint workflows (integration tests) — Batch Scenario → Integration pipeline:**
 1. Call \`skyramp_batch_scenario_test_generation\` with ALL steps in a single call: \`scenarioName\`, \`destination\`,
    \`baseURL\`, \`${authCallParams}\`, and a \`steps\` array where each element has \`method\`, \`path\`, \`requestBody\` OR \`queryParams\`, \`responseBody\`, \`statusCode\`.
-   \`statusCode\` is optional — defaults: POST→201, DELETE→204, GET/PUT/PATCH→200. Only override for non-standard codes.
+   \`statusCode\` is required — determine the expected status code from the source code for each step.
    **OpenAPI spec is NOT required.** \`apiSchema\` is OPTIONAL — omit it if no spec exists.
    **CRITICAL — Query params vs request body:**
    - For **POST/PUT/PATCH**: use \`requestBody\` with realistic field values from source code schemas.
@@ -351,12 +383,12 @@ ${CONSUMER_MODE_ENABLED ? `**Contract test mode selection — set based on this
 Only provider-side contract tests are supported. Pass \`providerMode: true\` for new or modified endpoints this codebase owns.`}
 **For UI tests:**
-1. \`browser_navigate\` to the target URL (from workspace \`api.baseUrl\`)
+1. \`browser_navigate\` to the target URL (from ${resolveServiceDetailsRef().baseUrlRef})
 2. \`browser_snapshot\` to see the page (ARIA tree)
 3. Interact using \`browser_click\`, \`browser_type\`, \`browser_fill_form\`, etc.
 4. \`browser_snapshot\` after each interaction that changes the page
 5. \`skyramp_export_zip\` with an **absolute** output path: \`<repositoryPath>/.skyramp/<test_name>_trace.zip\`
-6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip, and \`outputDir\` = the **frontend** service's \`testDirectory\` from workspace.yml (e.g. \`frontend/tests\`). Do NOT use the backend service's testDirectory — UI tests must go in the frontend service's test directory.
+6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip, and \`outputDir\` = ${resolveServiceDetailsRef().frontendTestDirRef} (e.g. \`frontend/tests\`). Do NOT use the backend service's testDirectory — UI tests must go in the frontend service's test directory.
 Tips: For custom dropdowns (Radix, MUI): click combobox → snapshot → click option (NOT \`browser_select_option\`).

package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { logger } from "../../utils/logger.js";
 import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
 import { ScenarioSource, AnalysisScope } from "../../types/RepositoryAnalysis.js";
 import { SCENARIO_CATEGORIES } from "../../types/TestRecommendation.js";
+import { inferExpectedStatus } from "../../utils/httpDefaults.js";
 export function mergeEnrichedScenarios(serverScenarios, raw) {
     const rejectionNotes = [];
     let parsed;
@@ -54,11 +55,7 @@ export function mergeEnrichedScenarios(serverScenarios, raw) {
                 requestBody: st.requestBody,
                 queryParams: st.queryParams,
                 responseBody: st.responseBody,
-                // Default status code by method if omitted to avoid `statusCode: undefined` in tool calls
-                expectedStatusCode: st.expectedStatusCode ??
-                    (String(st.method ?? "").toUpperCase() === "POST" ? 201
-                        : String(st.method ?? "").toUpperCase() === "DELETE" ? 204
-                            : 200),
+                expectedStatusCode: st.expectedStatusCode ?? inferExpectedStatus(String(st.method ?? "GET")),
                 expectedResponseFields: st.expectedResponseFields,
                 bodyMustInclude: st.bodyMustInclude,
                 chainsFrom: st.chainsFrom,