@skyramp/mcp 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/build/prompts/test-maintenance/driftAnalysisSections.js +2 -2
  2. package/build/prompts/test-recommendation/analysisOutputPrompt.js +26 -21
  3. package/build/prompts/test-recommendation/recommendationSections.js +42 -10
  4. package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +2 -5
  5. package/build/prompts/test-recommendation/test-recommendation-prompt.js +114 -157
  6. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +250 -18
  7. package/build/prompts/testbot/testbot-prompts.js +17 -9
  8. package/build/services/ScenarioGenerationService.js +2 -1
  9. package/build/services/TestDiscoveryService.js +22 -7
  10. package/build/services/TestDiscoveryService.test.js +44 -0
  11. package/build/tools/generate-tests/generateBatchScenarioRestTool.js +3 -4
  12. package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +9 -0
  13. package/build/tools/submitReportTool.js +4 -3
  14. package/build/tools/submitReportTool.test.js +16 -2
  15. package/build/tools/test-management/analyzeChangesTool.js +264 -140
  16. package/build/tools/test-management/analyzeChangesTool.test.js +3 -1
  17. package/build/tools/test-management/analyzeTestHealthTool.js +5 -0
  18. package/build/types/RepositoryAnalysis.js +8 -0
  19. package/build/types/TestRecommendation.js +2 -0
  20. package/build/utils/branchDiff.js +24 -8
  21. package/build/utils/featureFlags.js +25 -0
  22. package/build/utils/httpDefaults.js +12 -0
  23. package/build/utils/repoScanner.js +16 -2
  24. package/build/utils/routeParsers.js +79 -79
  25. package/build/utils/routeParsers.test.js +192 -66
  26. package/build/utils/scenarioDrafting.js +116 -497
  27. package/build/utils/scenarioDrafting.test.js +260 -480
  28. package/package.json +1 -1
@@ -143,8 +143,8 @@ When a diff adds a new HTTP method to a resource, UPDATE covers **all** existing
143
143
 
144
144
  ### PATCH/PUT with child collections (MANDATORY)
145
145
  When updating a contract or integration test for a PATCH or PUT endpoint whose request/response includes a child collection array (e.g. \`items\`, \`products\`, \`line_items\`):
146
- 1. The request body MUST include the child array with at least one item containing the FK field (e.g. \`product_id\`) and a \`quantity\` field.
147
- 2. Assert each item's FK field and \`quantity\` match the sent values.
146
+ 1. The request body MUST include the child array with at least one item containing the Foreign Key field (e.g. \`product_id\`) and a \`quantity\` field.
147
+ 2. Assert each item's Foreign Key field and \`quantity\` match the sent values.
148
148
  3. Assert the top-level computed total (e.g. \`total_amount\`) equals the expected math from the items.
149
149
  A test that only sends/asserts metadata (discount, status, notes) without asserting the items array is INCOMPLETE and will produce false passes even when the items/total logic is broken.
150
150
 
@@ -52,10 +52,10 @@ The ranked test recommendation catalog is pre-built and shown below (after the s
52
52
  **Your only job is to present it.**
53
53
 
54
54
  1. Fill in every \`<…from source>\` placeholder using the field names, computed formulas, and auth details you found in Steps 1–2.
55
- 2. Output the completed catalog **exactly as formatted grouped by test type (### E2E / ### UI / ### Integration / ### Contract)**. Do NOT restructure, reorder, rename sections, or generate a new format.
55
+ 2. Output the completed catalog **exactly as formatted**, preserving whatever test-type section headings are already present in the catalog. Do NOT restructure, reorder, rename sections, invent missing sections, or generate a new format.
56
56
  3. Do NOT call any Skyramp generation tools. The catalog shows ready-to-use tool calls that can be executed on demand.
57
57
 
58
- **If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or FK relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
58
+ **If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or Foreign Key relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
59
59
  const hasJavaFiles = p.candidateRouteFiles?.some(f => /\.(java|kt)$/.test(f)) ?? false;
60
60
  const routeFilesSection = p.candidateRouteFiles && p.candidateRouteFiles.length > 0
61
61
  ? `\nRoute/controller files found by static scan (read these to discover endpoints — the regex-based catalog below may be incomplete for your framework):\n${p.candidateRouteFiles.map(f => `- ${f}`).join("\n")}\n`
@@ -79,19 +79,23 @@ For GET list endpoints: identify query params (\`limit\`, \`offset\`, \`order\`,
79
79
  ${nextStep}`;
80
80
  }
81
81
  const changedFiles = p.parsedDiff?.changedFiles.join(", ") ?? "";
82
- // Whether the regex pre-detected any API endpoints used as a hint only.
83
- // Step 2 always asks the LLM to extract endpoints from the diff so unknown
84
- // frameworks (e.g. Spring class-level @RequestMapping, Django, Rails) are
85
- // covered even when the static regex returns nothing.
86
- const regexFoundEndpoints = p.parsedDiff && (p.parsedDiff.newEndpoints.length > 0 || p.parsedDiff.modifiedEndpoints.length > 0);
82
+ // Whether the scanner found API endpoints in any changed file.
83
+ const preDetectedEndpoints = p.parsedDiff && (p.parsedDiff.newEndpoints.length > 0 || p.parsedDiff.modifiedEndpoints.length > 0 || (p.parsedDiff.removedEndpoints?.length ?? 0) > 0);
87
84
  const diffFiles = p.parsedDiff?.changedFiles ?? [];
88
85
  const isUIOnly = diffFiles.length > 0 &&
89
- !regexFoundEndpoints &&
86
+ !preDetectedEndpoints &&
90
87
  diffFiles.every(f => FRONTEND_EXT.test(f));
91
88
  const diffHasJavaFiles = diffFiles.some(f => /\.(java|kt)$/.test(f));
92
- const diffSection = p.diffContent
93
- ? `\n<diff>\n${p.diffContent}\n</diff>`
94
- : "";
89
+ // Inline small diffs so the LLM sees them without a tool call. Large diffs
90
+ // stay as a temp file reference to avoid bloating the prompt.
91
+ const INLINE_DIFF_LIMIT = 12_000; // chars — roughly 300 lines
92
+ const canInline = p.diffContent && p.diffContent.length <= INLINE_DIFF_LIMIT;
93
+ const diffFileRef = canInline
94
+ ? `\n<diff>\n${p.diffContent}\n</diff>\n`
95
+ + (p.diffFilePath ? `Full diff also available at \`${p.diffFilePath}\`.\n` : "")
96
+ : p.diffFilePath
97
+ ? `\n**Full diff file**: \`${p.diffFilePath}\` — **you MUST read this file before proceeding to Step 2.** It contains the complete unified diff for this PR.\n`
98
+ : "";
95
99
  const step2 = isUIOnly
96
100
  ? `### Step 2: Identify consumed API endpoints and integration status
97
101
  UI-only PR — perform two checks:
@@ -105,26 +109,28 @@ If no production file imports, re-exports, or renders a changed component, mark
105
109
  Exception: if the same PR also adds a route/page file (e.g. under Next.js \`pages/\` or \`app/\`) that imports the component, the route IS the integration point — do NOT mark it as unintegrated.
106
110
  Do NOT apply the unintegrated heuristic to route/entrypoint files themselves — those are always reachable by convention.
107
111
  An unintegrated non-route component has no DOM node in the running app and cannot be browser-tested — it qualifies as a dead-code / unintegrated-component no-surface PR regardless of how complex the component logic is.`
108
- : p.diffContent
109
- ? `### Step 2: Extract new and modified API endpoints from the diff
110
- Read the \`<diff>\` above and identify every new or modified API endpoint — route registrations, handler methods, controller annotations. Then use the **Router Mounting / Nesting** section above to reconstruct the full URL path for each endpoint by chaining all parent router prefixes down to the handler (e.g. a handler in a file with \`prefix="/reviews"\` that is mounted at \`/{product_id}\` under a router mounted at \`/api/v1/products\` → full path \`/api/v1/products/{product_id}/reviews\`).
112
+ : (canInline || p.diffFilePath)
113
+ ? `### Step 2: Extract new, modified, and removed API endpoints from the diff
114
+ ${canInline ? "Read the `<diff>` above" : `Read the diff file at \`${p.diffFilePath}\``} and identify every new or modified API endpoint — route registrations, handler methods, controller annotations. Then use the **Router Mounting / Nesting** section above to reconstruct the full URL path for each endpoint by chaining all parent router prefixes down to the handler (e.g. a handler in a file with \`prefix="/reviews"\` that is mounted at \`/{product_id}\` under a router mounted at \`/api/v1/products\` → full path \`/api/v1/products/{product_id}/reviews\`).
111
115
  ${diffHasJavaFiles ? JAVA_SPRING_NOTE : ""}
112
116
  For each endpoint found: note the HTTP method, full path, and source file.
113
- ${regexFoundEndpoints ? "The static analysis above pre-detected some endpoints — verify and augment with anything it missed." : "The static analysis did not detect endpoints for this framework rely on the diff to extract them."}
117
+ ${preDetectedEndpoints ? "The endpoint catalog above already lists some changed endpoints — verify and augment with anything it missed." : "No endpoints were pre-detected in the changed files extract them from the diff."}
118
+ **Also identify removed endpoints**: Look for deleted route annotations (lines starting with \`-\` in the diff) in modified files (files that still exist but had routes deleted). A removed endpoint is a route definition present in the base branch but absent in the current branch. Cross-reference against the scanned endpoint listing below — if a deleted route annotation's endpoint still appears there (e.g. moved to another file), it is NOT removed. Only flag endpoints that are truly gone from the codebase.
114
119
  **CRITICAL — Query params vs body:** For GET endpoints (especially search/filter/list),
115
120
  identify which parameters are URL query params vs request body. Look at framework-specific
116
121
  annotations (FastAPI \`Query()\`, Express \`req.query\`, Spring \`@RequestParam\`, etc.).
117
122
  Pass these as \`queryParams\` (not \`requestBody\`) when generating scenarios.`
118
- : `### Step 2: Extract new and modified API endpoints from source files
123
+ : `### Step 2: Extract new, modified, and removed API endpoints from source files
119
124
  No diff was available — read the changed source files listed above directly to identify new or modified API endpoints. Use the **Router Mounting / Nesting** section to reconstruct full paths.
120
125
  ${diffHasJavaFiles ? JAVA_SPRING_NOTE : ""}
121
- For each endpoint found: note the HTTP method, full path, and source file.`;
126
+ For each endpoint found: note the HTTP method, full path, and source file.
127
+ Also compare against the endpoint catalog to identify any endpoints that appear in the catalog but are no longer present in the source files — these are removed endpoints.`;
122
128
  const criticalPatternStep = `### Step 2.5: Identify critical patterns for test categorization
123
129
  Look for these patterns in model/schema/handler files to inform test recommendations:
124
130
  - **Unique constraints**: \`@unique\`, \`unique: true\`, unique indexes, \`.refine()\` uniqueness checks, \`UNIQUE\` in SQL migrations
125
131
  - **Cascade deletes**: \`ON DELETE CASCADE\`, \`.onDelete("cascade")\`, manual cascade logic in delete handlers
126
132
  - **Permission checks**: auth middleware, ownership guards (\`req.user.id === resource.ownerId\`), role-based access control, \`isOwner\` assertions
127
- - **Breaking changes in diff**: route renames, auth header changes, removed required fields, changed status codes
133
+ - **Breaking changes in diff**: route renames, deleted route definitions (endpoints removed from modified files), auth header changes, removed required fields, changed status codes
128
134
  Tag each finding with its category (security_boundary, business_rule, data_integrity, breaking_change) for the recommendation step.`;
129
135
  const step3Content = useHealthFlow
130
136
  ? `### Step 3: Identify tests at risk of drift
@@ -160,8 +166,7 @@ Call \`skyramp_recommend_tests\` with:
160
166
  return `## Your Task — Enrich & Recommend (PR-scoped)
161
167
 
162
168
  ### Step 1: Read the changed files and diff
163
- ${changedFiles}${diffSection}
164
-
169
+ ${changedFiles}${diffFileRef}
165
170
  ${buildPathResolutionTableStep(p)}${step2}
166
171
 
167
172
  ${criticalPatternStep}
@@ -186,7 +191,7 @@ ${p.routerMountContext.map(f => `- \`${f}\``).join("\n")}`
186
191
  **Session ID**: \`${p.sessionId}\`
187
192
  **Repository**: \`${p.repositoryPath}\`
188
193
  **Analysis Scope**: \`${p.analysisScope}\`
189
- ${isDiffScope ? `**Diff endpoints**: ${(p.parsedDiff?.newEndpoints.length ?? 0) + (p.parsedDiff?.modifiedEndpoints.length ?? 0)}` : `**Pre-scanned endpoints**: ${p.scannedEndpoints.length}`}
194
+ ${isDiffScope ? `**Diff endpoints**: ${(p.parsedDiff?.newEndpoints.length ?? 0) + (p.parsedDiff?.modifiedEndpoints.length ?? 0) + (p.parsedDiff?.removedEndpoints?.length ?? 0)}` : `**Pre-scanned endpoints**: ${p.scannedEndpoints.length}`}
190
195
  ${routerSection}
191
196
  ${enrichment}
192
197
 
@@ -1,4 +1,4 @@
1
- import { isContractConsumerModeEnabled } from "../../utils/featureFlags.js";
1
+ import { isContractConsumerModeEnabled, resolveServiceDetailsRef } from "../../utils/featureFlags.js";
2
2
  import { WorkspaceAuthType, getAuthScheme, isAuthorizationHeaderName, AUTH_MIDDLEWARE_PATTERNS_STR } from "../../utils/workspaceAuth.js";
3
3
  // Cached at module-load — the flag is process-wide and cannot change per call.
4
4
  const CONSUMER_MODE_ENABLED = isContractConsumerModeEnabled();
@@ -42,13 +42,45 @@ Before calling any tool, replace every \`<from source>\` placeholder in the tool
42
42
  }
43
43
  export function buildReasoningProtocol() {
44
44
  return `<reasoning_protocol>
45
+ ## Coverage Reasoning Block (MANDATORY — complete BEFORE your Budget Plan)
46
+
47
+ Before committing to a Budget Plan and test list, produce a <thinking> block that enumerates ALL testable surfaces introduced or affected by this PR. This prevents narrow focus on a single endpoint/method.
48
+
49
+ **For backend-only PRs**, your thinking MUST cover:
50
+ 1. **All HTTP methods affected** — if a new validation/service method is added, trace ALL callers (not just createOne — also updateOne, updateMany, deleteOne). List every HTTP method × endpoint pair.
51
+ 2. **Error paths per method** — for each endpoint-method, what error codes does the source code return? (400, 401, 403, 404, 409, 422). Each distinct error path is a potential test.
52
+ 3. **Cross-service impact** — does the change affect other services that import the modified module? Those endpoints need coverage too.
53
+ 4. **Data migrations** — if a migration exists, can its effect be verified via an API call? (e.g. backfill → GET should return the backfilled value)
54
+
55
+ **For frontend-only PRs**, your thinking MUST cover:
56
+ 1. **Component integration** — which routes render the changed component? Each route is a test target.
57
+ 2. **User interactions** — what actions can a user perform on the changed component? (click, type, select, drag). Each distinct action flow is a test.
58
+ 3. **State variations** — what different states does the component render? (empty, loading, error, populated, edge values)
59
+
60
+ **For mixed (frontend + backend) PRs**, your thinking MUST cover:
61
+ 1. All backend surfaces (methods 1–4 above)
62
+ 2. All frontend surfaces (methods 1–3 above)
63
+ 3. **E2E bridges** — which frontend components call the changed backend endpoints? Those are E2E test candidates that cover both layers in one test.
64
+
65
+ **Output format in your thinking block:**
66
+ \`\`\`
67
+ Testable surfaces:
68
+ - POST /permissions → happy path (201), invalid fields (422), missing collection (400)
69
+ - PATCH /permissions/:id → update with valid fields (200), update with invalid fields (422)
70
+ - GET /items/:collection?aggregate → with allowed fields (200), with forbidden fields (403)
71
+ - UI: permissions field selector → add field, remove field, wildcard toggle
72
+ Total distinct surfaces: N
73
+ \`\`\`
74
+
75
+ Your Budget Plan total MUST be ≥ the number of GENERATE slots and reflect the breadth of surfaces found. If you found 8 distinct surfaces but only budget 3 tests, you are under-covering the PR.
76
+
45
77
  ## Parameter Grounding Rule
46
78
  Before each GENERATE tool call, confirm WHERE each key value comes from:
47
79
 
48
80
  - **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec. **The generation tool rejects empty \`{}\` request bodies for POST/PUT/PATCH** — read the source schema first if the fields are unknown.
49
81
  - **endpointURL** → workspace \`baseUrl\` + endpoint path (both required — never path alone)
50
82
  - **authHeader / authScheme** → workspace config or OpenAPI \`securitySchemes\`
51
- - **FK path params** → chained from a prior step's response \`id\` field — not hardcoded
83
+ - **Foreign Key path params** → chained from a prior step's response (check the actual field name — it may be \`id\`, \`uuid\`, \`_id\`, or a resource-specific \`*_id\` field). The chaining source can be a response body (POST or GET), a response header (e.g. \`Location\`), or a cookie — not hardcoded
52
84
  - **Names / string values** → realistic; append timestamp suffix to avoid re-run conflicts
53
85
 
54
86
  ## Ranking Rule
@@ -110,11 +142,11 @@ export function buildTestPatternGuidelines() {
110
142
  - **Middleware chains**: If auth/rate-limit/logging middleware exists, test the chain (e.g., rate limit hit → auth still checked → correct error returned)
111
143
  - **N+1 query risk**: If list endpoints join related data (e.g., orders with products), test with large datasets
112
144
  - **State machines**: If resources have status transitions (draft→published→archived), test invalid transitions (e.g., archived→draft should fail)
113
- - **Cascade deletes**: Only recommend after reading source code to confirm which resource holds the FK. The resource with the FK is the child; the one it points to is the parent. Example: if orders.product_id references products, then products is the parent — deleting a product tests whether orders are protected or cascade-deleted. Getting this backwards (treating the child as the parent) produces a nonsensical test.
145
+ - **Cascade deletes**: Only recommend after reading source code to confirm which resource holds the Foreign Key. The resource with the Foreign Key is the child; the one it points to is the parent. Example: if orders.product_id references products, then products is the parent — deleting a product tests whether orders are protected or cascade-deleted. Getting this backwards (treating the child as the parent) produces a nonsensical test.
114
146
  - **Race conditions**: If concurrent writes are possible (inventory deduction, counter increment), test concurrent requests
115
147
  - **Computed fields**: If response contains derived values (total, average, count), verify computation with known inputs (e.g., total_cost = compute_seconds * rate + memory_mb * rate + external_cost)
116
148
  - **Mutation with collection modification**: If PUT/PATCH endpoints accept arrays of child items (e.g., order line items, cart products, invoice entries), test adding/removing items and verify that derived totals (e.g., total_amount, subtotal, item_count) are recalculated correctly. This is the most common source of user-reported bugs — always prioritize it for GENERATE over simple field-update tests.
117
- The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
149
+ The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with Foreign Key references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its Foreign Key/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child Foreign Key fields match chained IDs, quantities match sent values, and totals match the computation from the source code
118
150
  - **Webhook/event side effects**: If endpoints trigger async operations, test that side effects occur (e.g., POST /orders triggers notification)
119
151
  - **Cross-user isolation**: If resources are owned by users, test that user B cannot access/modify user A's resources (GET /users/{other_id}/data → 403 Forbidden)
120
152
  - **Range/boundary invariants**: If business rules cap values (max retries, min balance, discount ≤ subtotal), test the boundary (e.g., set retries to max+1 → expect rejection)
@@ -128,7 +160,7 @@ that step B depends on (e.g., create product → create order referencing that p
128
160
  verify order contains correct product). Single-resource CRUD alone is not an integration test.
129
161
  Use actual field names and values from the source code schema or OpenAPI schema (not \`{}\` or invented field names); verify response data, not just status codes.
130
162
  When a PUT/PATCH updates a resource with child collections (e.g., order items), the request body
131
- MUST include the child array with FK references chained from prior steps — and assertions MUST
163
+ MUST include the child array with Foreign Key references chained from prior steps — and assertions MUST
132
164
  verify the actual child items in the response (product_id, quantity, unit_price), not just
133
165
  top-level metadata like discount or status.
134
166
 
@@ -182,7 +214,7 @@ Before finalizing your output, verify:
182
214
  6. **Real request shapes**: requestBody for POST/PUT/PATCH uses actual field names from source (not \`{}\`). GET search/filter uses \`queryParams\`, not \`requestBody\`.
183
215
  7. **scenarioFile**: \`skyramp_integration_test_generation\` uses the exact \`filePath\` returned by \`skyramp_batch_scenario_test_generation\` — not a guessed or hardcoded filename.
184
216
  8. **bugCatchingTarget**: Every GENERATE integration test that targets a business rule, formula, or constraint has a non-empty \`bugCatchingTarget\`.
185
- 9. **FK chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
217
+ 9. **Foreign Key chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
186
218
  10. **Concrete scenario names**: No GENERATE item uses a placeholder name ending in a numeric suffix (e.g. \`ui-test-for-changed-component-1\`, \`ui-test-from-trace-2\`). Derive the name from the actual changed component or flow: if the diff touches \`LinkCard.tsx\`, the scenario name should be \`link-card-pin-toggle\` or \`link-card-edit-description\`, not \`ui-test-for-changed-component-1\`. The changed file list is available above — use it.
187
219
  </verification>`;
188
220
  }
@@ -193,7 +225,7 @@ export function buildFewShotExamples() {
193
225
  **Parameter grounding**:
194
226
  - baseURL: "http://localhost:8000" (workspace api.baseUrl)
195
227
  - steps[0].requestBody fields "name", "price": ProductCreate schema fields (src/models/product.py)
196
- - steps[1].requestBody "product_id": FK to products — chained from step 0 response id
228
+ - steps[1].requestBody "product_id": Foreign Key to products — chained from step 0 response id
197
229
  - steps[1].requestBody "quantity": OrderCreate schema field (src/models/order.py)
198
230
  - responseBody "total_amount": 89.97 = 29.99 × 3 — from order total formula (src/services/order_service.py: total = sum(item.price * item.quantity))
199
231
  - authHeader/authScheme: workspace config (Authorization / Bearer)
@@ -311,7 +343,7 @@ ${authGuidance}
311
343
  **For multi-endpoint workflows (integration tests) — Batch Scenario → Integration pipeline:**
312
344
  1. Call \`skyramp_batch_scenario_test_generation\` with ALL steps in a single call: \`scenarioName\`, \`destination\`,
313
345
  \`baseURL\`, \`${authCallParams}\`, and a \`steps\` array where each element has \`method\`, \`path\`, \`requestBody\` OR \`queryParams\`, \`responseBody\`, \`statusCode\`.
314
- \`statusCode\` is optionaldefaults: POST→201, DELETE→204, GET/PUT/PATCH→200. Only override for non-standard codes.
346
+ \`statusCode\` is requireddetermine the expected status code from the source code for each step.
315
347
  **OpenAPI spec is NOT required.** \`apiSchema\` is OPTIONAL — omit it if no spec exists.
316
348
  **CRITICAL — Query params vs request body:**
317
349
  - For **POST/PUT/PATCH**: use \`requestBody\` with realistic field values from source code schemas.
@@ -351,12 +383,12 @@ ${CONSUMER_MODE_ENABLED ? `**Contract test mode selection — set based on this
351
383
  Only provider-side contract tests are supported. Pass \`providerMode: true\` for new or modified endpoints this codebase owns.`}
352
384
 
353
385
  **For UI tests:**
354
- 1. \`browser_navigate\` to the target URL (from workspace \`api.baseUrl\`)
386
+ 1. \`browser_navigate\` to the target URL (from ${resolveServiceDetailsRef().baseUrlRef})
355
387
  2. \`browser_snapshot\` to see the page (ARIA tree)
356
388
  3. Interact using \`browser_click\`, \`browser_type\`, \`browser_fill_form\`, etc.
357
389
  4. \`browser_snapshot\` after each interaction that changes the page
358
390
  5. \`skyramp_export_zip\` with an **absolute** output path: \`<repositoryPath>/.skyramp/<test_name>_trace.zip\`
359
- 6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip, and \`outputDir\` = the **frontend** service's \`testDirectory\` from workspace.yml (e.g. \`frontend/tests\`). Do NOT use the backend service's testDirectory — UI tests must go in the frontend service's test directory.
391
+ 6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip, and \`outputDir\` = ${resolveServiceDetailsRef().frontendTestDirRef} (e.g. \`frontend/tests\`). Do NOT use the backend service's testDirectory — UI tests must go in the frontend service's test directory.
360
392
 
361
393
  Tips: For custom dropdowns (Radix, MUI): click combobox → snapshot → click option (NOT \`browser_select_option\`).
362
394
 
@@ -4,6 +4,7 @@ import { logger } from "../../utils/logger.js";
4
4
  import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
5
5
  import { ScenarioSource, AnalysisScope } from "../../types/RepositoryAnalysis.js";
6
6
  import { SCENARIO_CATEGORIES } from "../../types/TestRecommendation.js";
7
+ import { inferExpectedStatus } from "../../utils/httpDefaults.js";
7
8
  export function mergeEnrichedScenarios(serverScenarios, raw) {
8
9
  const rejectionNotes = [];
9
10
  let parsed;
@@ -54,11 +55,7 @@ export function mergeEnrichedScenarios(serverScenarios, raw) {
54
55
  requestBody: st.requestBody,
55
56
  queryParams: st.queryParams,
56
57
  responseBody: st.responseBody,
57
- // Default status code by method if omitted to avoid `statusCode: undefined` in tool calls
58
- expectedStatusCode: st.expectedStatusCode ??
59
- (String(st.method ?? "").toUpperCase() === "POST" ? 201
60
- : String(st.method ?? "").toUpperCase() === "DELETE" ? 204
61
- : 200),
58
+ expectedStatusCode: st.expectedStatusCode ?? inferExpectedStatus(String(st.method ?? "GET")),
62
59
  expectedResponseFields: st.expectedResponseFields,
63
60
  bodyMustInclude: st.bodyMustInclude,
64
61
  chainsFrom: st.chainsFrom,