@skyramp/mcp 0.0.65 → 0.1.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/build/playwright/traceRecordingPrompt.js +30 -36
  2. package/build/prompts/architectPersona.js +19 -0
  3. package/build/prompts/test-maintenance/drift-analysis-prompt.js +11 -6
  4. package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +49 -0
  5. package/build/prompts/test-maintenance/driftAnalysisSections.js +4 -2
  6. package/build/prompts/test-recommendation/analysisOutputPrompt.js +42 -50
  7. package/build/prompts/test-recommendation/mergeEnrichedScenarios.test.js +125 -0
  8. package/build/prompts/test-recommendation/recommendationSections.js +121 -4
  9. package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +151 -9
  10. package/build/prompts/test-recommendation/test-recommendation-prompt.js +416 -61
  11. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +455 -63
  12. package/build/prompts/testbot/testbot-prompts.js +111 -100
  13. package/build/prompts/testbot/testbot-prompts.test.js +142 -0
  14. package/build/resources/analysisResources.js +13 -5
  15. package/build/services/ScenarioGenerationService.js +2 -2
  16. package/build/services/ScenarioGenerationService.test.js +35 -0
  17. package/build/services/TestExecutionService.js +1 -1
  18. package/build/tools/code-refactor/modularizationTool.js +2 -2
  19. package/build/tools/executeSkyrampTestTool.js +4 -3
  20. package/build/tools/generate-tests/generateBatchScenarioRestTool.js +51 -21
  21. package/build/tools/generate-tests/generateContractRestTool.js +26 -4
  22. package/build/tools/generate-tests/generateIntegrationRestTool.js +44 -13
  23. package/build/tools/generate-tests/generateScenarioRestTool.js +17 -39
  24. package/build/tools/generate-tests/generateUIRestTool.js +69 -4
  25. package/build/tools/submitReportTool.js +27 -13
  26. package/build/tools/test-management/analyzeChangesTool.js +32 -10
  27. package/build/tools/test-management/analyzeChangesTool.test.js +85 -0
  28. package/build/types/RepositoryAnalysis.js +25 -3
  29. package/build/types/TestRecommendation.js +5 -4
  30. package/build/types/TestTypes.js +44 -9
  31. package/build/utils/AnalysisStateManager.js +43 -9
  32. package/build/utils/AnalysisStateManager.test.js +35 -0
  33. package/build/utils/routeParsers.js +35 -0
  34. package/build/utils/routeParsers.test.js +66 -1
  35. package/build/utils/scenarioDrafting.js +207 -360
  36. package/build/utils/scenarioDrafting.test.js +191 -256
  37. package/build/utils/trace-parser.js +24 -6
  38. package/build/utils/trace-parser.test.js +140 -0
  39. package/node_modules/playwright/lib/mcp/browser/browserServerBackend.js +3 -0
  40. package/node_modules/playwright/lib/mcp/browser/tab.js +8 -1
  41. package/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +3 -2
  42. package/node_modules/playwright/lib/mcp/browser/tools/navigate.js +1 -1
  43. package/node_modules/playwright/lib/mcp/browser/tools/snapshot.js +4 -4
  44. package/node_modules/playwright/lib/mcp/browser/tools/tabs.js +5 -4
  45. package/node_modules/playwright/lib/mcp/browser/tools/wait.js +1 -1
  46. package/node_modules/playwright/lib/mcp/skyramp/exportTool.js +10 -9
  47. package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +304 -7
  48. package/node_modules/playwright/lib/mcp/test/skyRampExport.js +128 -20
  49. package/package.json +2 -2
  50. package/node_modules/playwright/lib/mcp/terminal/help.json +0 -32
@@ -1,6 +1,53 @@
1
1
  export const MAX_TESTS_TO_GENERATE = 3;
2
2
  export const MAX_RECOMMENDATIONS = 20;
3
3
  export const MAX_CRITICAL_TESTS = 3;
4
+ export function buildArchitectPreamble(isDiffScope) {
5
+ if (isDiffScope) {
6
+ return `You are acting as a Skyramp Integration Architect. You will receive a branch diff — changed endpoints, source code interactions, and existing tests. Your responsibility is to map test intent to the Skyramp generation spec with precision. No guessing — derive all parameters from the codebase, workspace config, and provided context only.
7
+
8
+ Your task:
9
+ 1. **Recommend AND generate tests for this PR's changes** — prioritize tests most likely to catch real production bugs (state machine violations, cross-resource data integrity, computed field errors, security boundary bypasses) over trivial coverage
10
+ 2. **Immediately generate the top-priority tests** by calling Skyramp MCP generation tools — only these types: **integration, contract, E2E, UI**. Never smoke. Never fuzz.
11
+ 3. **Avoid duplicate coverage** — if an existing test already covers an endpoint + test type, choose a different angle instead
12
+
13
+ Every tool parameter must trace to a concrete source: repository analysis, source code, or OpenAPI spec. Do not invent field names, request shapes, base URLs, or auth values. If a required value is unknown, read the relevant source file before calling the tool.`;
14
+ }
15
+ return `You are acting as a Skyramp Integration Architect. You will receive a full repository analysis — all endpoints, source code interactions, and existing tests. Your responsibility is to map test intent to the Skyramp generation spec with precision. No guessing — derive all parameters from the codebase, workspace config, and provided context only.
16
+
17
+ Your task:
18
+ 1. **Produce a comprehensive test recommendation catalog** grouped by test type (E2E → UI → Integration → Contract), ranked by production risk within each type
19
+ 2. **Present each recommendation with a complete, ready-to-use Skyramp tool call** — fill in all field names, endpoint URLs, request shapes, and auth parameters from source code so the catalog is immediately actionable
20
+ 3. **Do not call any generation tools** — your output is the catalog itself; it is executed on demand
21
+
22
+ Replace every \`<…from source>\` placeholder with actual values before presenting. Do not invent values — read the source file if a value is unclear.`;
23
+ }
24
+ export function buildContextFetchingGuidance(sessionId) {
25
+ if (!sessionId)
26
+ return "";
27
+ return `<context_fetching_protocol>
28
+ ## Execution Plan Context
29
+ Before calling any tool, replace every \`<from source>\` placeholder in the tool call parameters with actual values read from the relevant source file (handler, schema, or model). Do not proceed with placeholders still present — every parameter must trace to a concrete source.
30
+ </context_fetching_protocol>`;
31
+ }
32
+ export function buildReasoningProtocol() {
33
+ return `<reasoning_protocol>
34
+ ## Parameter Grounding Rule
35
+ Before each GENERATE tool call, output a brief \`<thinking>\` block stating WHERE the key values come from:
36
+
37
+ - **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec — state the file or schema name
38
+ - **endpointURL** → workspace \`baseUrl\` + endpoint path (both required — never path alone)
39
+ - **authHeader / authScheme** → workspace config or OpenAPI \`securitySchemes\`
40
+ - **FK path params** → chained from a prior step's response \`id\` field — not hardcoded
41
+ - **Names / string values** → realistic; append timestamp suffix to avoid re-run conflicts
42
+
43
+ ## Ranking Rule
44
+ For each GENERATE item, include one sentence in your output (before the tool calls) stating the specific bug or failure it targets — derived from \`bugCatchingTarget\` or your source-code reading. Example: "Targets: order total miscalculation — total_amount = sum(item.price × item.quantity) should recompute when items array changes."
45
+
46
+ If \`bugCatchingTarget\` is empty for a GENERATE item, derive it from source code before including the item. A GENERATE slot without a specific bug target belongs in ADDITIONAL.
47
+
48
+ If a value cannot be sourced, read the relevant source file before calling the tool. Do not proceed with invented values.
49
+ </reasoning_protocol>`;
50
+ }
4
51
  function serializeAuthCallParams(params) {
5
52
  const parts = [`authHeader: "${params.authHeader}"`];
6
53
  if (params.authScheme !== undefined) {
@@ -45,7 +92,7 @@ export function buildTestPatternGuidelines() {
45
92
  - **Race conditions**: If concurrent writes are possible (inventory deduction, counter increment), test concurrent requests
46
93
  - **Computed fields**: If response contains derived values (total, average, count), verify computation with known inputs (e.g., total_cost = compute_seconds * rate + memory_mb * rate + external_cost)
47
94
  - **Mutation with collection modification**: If PUT/PATCH endpoints accept arrays of child items (e.g., order line items, cart products, invoice entries), test adding/removing items and verify that derived totals (e.g., total_amount, subtotal, item_count) are recalculated correctly. This is the most common source of user-reported bugs — always prioritize it for GENERATE over simple field-update tests.
48
- **CRITICAL**: The PATCH/PUT request body MUST include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
95
+ The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
49
96
  - **Webhook/event side effects**: If endpoints trigger async operations, test that side effects occur (e.g., POST /orders triggers notification)
50
97
  - **Cross-user isolation**: If resources are owned by users, test that user B cannot access/modify user A's resources (GET /users/{other_id}/data → 403 Forbidden)
51
98
  - **Range/boundary invariants**: If business rules cap values (max retries, min balance, discount ≤ subtotal), test the boundary (e.g., set retries to max+1 → expect rejection)
@@ -118,6 +165,72 @@ export function buildTestExamples() {
118
165
  - Single-resource CRUD with no cross-resource or state verification
119
166
  - POST with missing field → 422 (obvious validation, covered by contract tests)`;
120
167
  }
168
+ export function buildVerificationChecklist(topN, maxGen) {
169
+ return `<verification>
170
+ Before finalizing your output, verify:
171
+ 1. **Count**: Total recommendation count equals exactly ${topN} (${maxGen} GENERATE + ${topN - maxGen} ADDITIONAL). Not fewer.
172
+ 2. **Distinct paths**: Each GENERATE item targets a distinct code path — no two share the same HTTP method + endpoint + expected status.
173
+ 3. **Auth parameters are consistent** across all tool calls (same authHeader and authScheme).
174
+ 4. Every endpointURL includes both the base URL and the path (not just the base, e.g. \`http://host/api/v1/orders/{id}\`).
175
+ 5. **All \`<from source>\` placeholders** you received have been replaced with actual values derived from source code — no \`<...>\` remain in your output.
176
+ 6. **Real request shapes**: requestBody for POST/PUT/PATCH uses actual field names from source (not \`{}\`). GET search/filter uses \`queryParams\`, not \`requestBody\`.
177
+ 7. **scenarioFile**: \`skyramp_integration_test_generation\` uses the exact \`filePath\` returned by \`skyramp_batch_scenario_test_generation\` — not a guessed or hardcoded filename.
178
+ 8. **bugCatchingTarget**: Every GENERATE integration test that targets a business rule, formula, or constraint has a non-empty \`bugCatchingTarget\`.
179
+ 9. **FK chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
180
+ </verification>`;
181
+ }
182
+ export function buildFewShotExamples() {
183
+ return `<examples>
184
+ <example index="1" type="integration_recommendation">
185
+ <thinking>
186
+ **Parameter grounding**:
187
+ - baseURL: "http://localhost:8000" (workspace api.baseUrl)
188
+ - steps[0].requestBody fields "name", "price": ProductCreate schema fields (src/models/product.py)
189
+ - steps[1].requestBody "product_id": FK to products — chained from step 0 response id
190
+ - steps[1].requestBody "quantity": OrderCreate schema field (src/models/order.py)
191
+ - responseBody "total_amount": 89.97 = 29.99 × 3 — from order total formula (src/services/order_service.py: total = sum(item.price * item.quantity))
192
+ - authHeader/authScheme: workspace config (Authorization / Bearer)
193
+ </thinking>
194
+
195
+ **#1 — GENERATE** | integration | business_rule | new
196
+ Scenario: orders-create-with-product-total-calc (3 steps)
197
+ 1. POST /api/v1/products → 201: Create product with known price
198
+ 2. POST /api/v1/orders → 201: Create order referencing product_id from step 1, quantity=3
199
+ 3. GET /api/v1/orders/{order_id} → 200: Verify total_amount = 29.99 × 3 = 89.97
200
+ bugCatchingTarget: "total_amount = sum(item.price × item.quantity) — wrong if multiplication is skipped or items list is ignored"
201
+ Tool calls:
202
+ skyramp_batch_scenario_test_generation({ scenarioName: "orders-create-with-product-total-calc", destination: "localhost", baseURL: "http://localhost:8000", authHeader: "Authorization", authScheme: "Bearer", steps: [
203
+ { method: "POST", path: "/api/v1/products", statusCode: 201, requestBody: "{\"name\": \"Widget-1713000000\", \"price\": 29.99}" },
204
+ { method: "POST", path: "/api/v1/orders", statusCode: 201, requestBody: "{\"product_id\": \"chained\", \"quantity\": 3}" },
205
+ { method: "GET", path: "/api/v1/orders/{order_id}", statusCode: 200, responseBody: "{\"id\": \"chained\", \"total_amount\": 89.97, \"items\": [{\"product_id\": \"chained\", \"quantity\": 3, \"unit_price\": 29.99}]}" }
206
+ ] })
207
+ skyramp_integration_test_generation({ scenarioFile: "<filePath returned by skyramp_batch_scenario_test_generation above>" })
208
+ Reasoning: Catches a broken total calculation before it ships — the most common source of order-related bug reports.
209
+ </example>
210
+
211
+ <example index="2" type="contract_recommendation">
212
+ <thinking>
213
+ **Parameter grounding**:
214
+ - endpointURL: "http://localhost:8000/api/v1/products/{product_id}" (workspace baseUrl + path from endpoint listing)
215
+ - method: "DELETE" (route definition, uppercase)
216
+ - pathParams "product_id=<random-uuid-v4>": hardcoded non-existent ID — no setup step needed for a 404 test; use a fresh UUID v4, not all-zeros
217
+ - authHeader/authScheme: workspace config (Authorization / Bearer)
218
+ - No requestData — DELETE carries no body
219
+ </thinking>
220
+
221
+ **#2 — GENERATE** | contract | error_handling | new
222
+ DELETE /api/v1/products/{product_id} → 404
223
+ Tool: skyramp_contract_test_generation({ endpointURL: "http://localhost:8000/api/v1/products/{product_id}", method: "DELETE", authHeader: "Authorization", authScheme: "Bearer", pathParams: "product_id=<random-uuid-v4>" })
224
+ Reasoning: Catches a missing 404 guard on DELETE — verifies the handler returns 404 for non-existent resources rather than a 500 or silent no-op.
225
+ </example>
226
+
227
+ <example index="3" type="additional_recommendation">
228
+ #5 [ADDITIONAL] | integration | security_boundary | existing
229
+ Scenario: orders-unauthorized-cross-user-access (POST /api/v1/orders → GET /api/v1/orders/{order_id} as different user → 403)
230
+ Validates: Cross-user isolation — user B cannot read user A's orders.
231
+ </example>
232
+ </examples>`;
233
+ }
121
234
  export function buildToolWorkflows(authHeaderValue, authTypeValue = "") {
122
235
  const isAuthorizationHeader = /^authorization$/i.test(authHeaderValue);
123
236
  const noAuth = !authHeaderValue;
@@ -177,6 +290,10 @@ To skip auth for unauthenticated endpoints, pass \`authHeader: ""\`.`;
177
290
  : `**Auth params:** \`${authCallParams}\` — pass to EVERY tool call below.`;
178
291
  return `## How to Generate Tests — Tool Workflows
179
292
 
293
+ **Contract**: The following tool signatures are strict technical contracts. Every parameter should match the schema exactly. Omit optional parameters rather than guessing values. If a required field cannot be resolved, fetch context first.
294
+
295
+ **Before every tool call**: Output a <thinking> block justifying the mapping of intent to endpoint to tool parameters. See Mandatory Reasoning Protocol above.
296
+
180
297
  ${authHeaderLine}
181
298
  ${authGuidance}
182
299
 
@@ -189,13 +306,13 @@ ${authGuidance}
189
306
  **CRITICAL — Query params vs request body:**
190
307
  - For **POST/PUT/PATCH**: use \`requestBody\` with realistic field values from source code schemas.
191
308
  - For **GET/DELETE with search/filter/pagination**: use \`queryParams\` (JSON string, e.g., \`{"q": "bear", "limit": 10}\`).
192
- NEVER put query parameters in \`requestBody\` for GET requests — GET request bodies are non-standard and may be ignored or rejected.
309
+ Do not put query parameters in \`requestBody\` for GET requests — GET request bodies are non-standard and may be ignored or rejected.
193
310
  - For **GET by ID**: no \`requestBody\` or \`queryParams\` needed — the ID is in the path.
194
311
  \`responseBody\` should match the actual API response shape from source code (including all fields
195
312
  returned by the controller — e.g., \`id\`, \`ownerId\`, \`createdAt\`, included relations like \`collection\`, \`tags\`).
196
313
  Wrap in \`{"response": ...}\` if the API uses an envelope pattern. If omitted, a synthetic response is generated.
197
314
  Inspect the source code to determine the correct request AND response body shapes — avoid sending \`{}\`.
198
- **CRITICAL for PATCH/PUT mutation-recalc scenarios:** The request body MUST include the child
315
+ **For PATCH/PUT mutation-recalc scenarios:** The request body should include the child
199
316
  collection array (e.g. \`"items": [{"product_id": <chained from prior POST>, "quantity": 2}]\`).
200
317
  Never send a PATCH that only modifies metadata (discount, status) without also including the
201
318
  items/products collection — such a test will not catch collection-level or total-recalculation bugs.
@@ -206,7 +323,7 @@ ${authGuidance}
206
323
  Do NOT pass \`chainingKey\` — defaults to \`response.id\`. After generation, the testbot
207
324
  will verify and fix path param chaining in the generated test.
208
325
 
209
- **For single-endpoint tests (contract/fuzz):**
326
+ **For single-endpoint tests (contract):**
210
327
  \`skyramp_{type}_test_generation\` with \`endpointURL\` (full URL incl. base + path), \`method\`,
211
328
  \`${authCallParams}\`, and \`requestData\` from source code schemas.
212
329
  If an OpenAPI spec exists, ALSO pass \`apiSchema\` — it enables schema-aware validation
@@ -1,11 +1,102 @@
1
1
  import { z } from "zod";
2
- import { StateManager, } from "../../utils/AnalysisStateManager.js";
2
+ import { StateManager, hasSessionData, getSessionData, } from "../../utils/AnalysisStateManager.js";
3
3
  import { logger } from "../../utils/logger.js";
4
4
  import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
5
+ import { getPersonaPrefix } from "../architectPersona.js";
6
+ import { ScenarioSource, AnalysisScope } from "../../types/RepositoryAnalysis.js";
7
+ import { SCENARIO_CATEGORIES } from "../../types/TestRecommendation.js";
8
+ export function mergeEnrichedScenarios(serverScenarios, raw) {
9
+ const rejectionNotes = [];
10
+ let parsed;
11
+ try {
12
+ const result = JSON.parse(raw);
13
+ if (!Array.isArray(result)) {
14
+ return { scenarios: serverScenarios, rejectionNotes: ["enrichedScenarios: expected a JSON array, got " + typeof result] };
15
+ }
16
+ parsed = result;
17
+ }
18
+ catch {
19
+ logger.warning("enrichedScenarios: invalid JSON — using server-side scenarios only");
20
+ return { scenarios: serverScenarios, rejectionNotes: ["enrichedScenarios: invalid JSON — all scenarios skipped"] };
21
+ }
22
+ const agentScenarios = [];
23
+ for (const s of parsed) {
24
+ const name = s?.scenarioName ? String(s.scenarioName) : null;
25
+ const label = name ? `"${name}"` : "(unnamed)";
26
+ if (!name) {
27
+ rejectionNotes.push(`rejected ${label}: missing scenarioName`);
28
+ continue;
29
+ }
30
+ if (!Array.isArray(s?.steps) || s.steps.length === 0) {
31
+ rejectionNotes.push(`rejected ${label}: missing or empty steps array`);
32
+ continue;
33
+ }
34
+ if (!s?.category) {
35
+ rejectionNotes.push(`rejected ${label}: missing category`);
36
+ continue;
37
+ }
38
+ if (!SCENARIO_CATEGORIES.includes(s.category)) {
39
+ rejectionNotes.push(`rejected ${label}: unknown category "${s.category}" — valid: ${SCENARIO_CATEGORIES.join(", ")}`);
40
+ continue;
41
+ }
42
+ agentScenarios.push({
43
+ scenarioName: name,
44
+ description: s.description ?? "",
45
+ category: s.category,
46
+ priority: s.priority ?? "high",
47
+ bugCatchingTarget: s.bugCatchingTarget,
48
+ testType: s.testType,
49
+ steps: s.steps.map((st, idx) => ({
50
+ order: st.order ?? idx + 1,
51
+ method: String(st.method ?? "GET").toUpperCase(),
52
+ path: String(st.path ?? "/"),
53
+ description: st.description ?? `${st.method} ${st.path}`,
54
+ interactionType: st.interactionType ?? "success",
55
+ requestBody: st.requestBody,
56
+ queryParams: st.queryParams,
57
+ responseBody: st.responseBody,
58
+ // Default status code by method if omitted to avoid `statusCode: undefined` in tool calls
59
+ expectedStatusCode: st.expectedStatusCode ??
60
+ (String(st.method ?? "").toUpperCase() === "POST" ? 201
61
+ : String(st.method ?? "").toUpperCase() === "DELETE" ? 204
62
+ : 200),
63
+ expectedResponseFields: st.expectedResponseFields,
64
+ bodyMustInclude: st.bodyMustInclude,
65
+ chainsFrom: st.chainsFrom,
66
+ })),
67
+ chainingKeys: s.chainingKeys ?? [],
68
+ requiresAuth: s.requiresAuth ?? true,
69
+ estimatedComplexity: s.estimatedComplexity ?? "moderate",
70
+ source: ScenarioSource.AgentEnriched,
71
+ });
72
+ }
73
+ if (agentScenarios.length === 0) {
74
+ return { scenarios: serverScenarios, rejectionNotes };
75
+ }
76
+ const merged = new Map(serverScenarios.map(s => [s.scenarioName, s]));
77
+ for (const s of agentScenarios) {
78
+ merged.set(s.scenarioName, s);
79
+ }
80
+ logger.info("Merged agent-enriched scenarios", {
81
+ server: serverScenarios.length,
82
+ agent: agentScenarios.length,
83
+ total: merged.size,
84
+ rejected: rejectionNotes.length,
85
+ });
86
+ return { scenarios: Array.from(merged.values()), rejectionNotes };
87
+ }
5
88
  export function registerRecommendTestsPrompt(server) {
6
89
  server.registerPrompt("skyramp_recommend_tests", {
7
- description: "Generate ranked test recommendations from a test-management analysis. " +
8
- "Provide a stateFile path from skyramp_analyze_changes.",
90
+ description: getPersonaPrefix() +
91
+ "Given the repository analysis in stateFile, produce ranked test recommendations split into " +
92
+ "GENERATE (call generation tools immediately) and ADDITIONAL (deferred, describe only).\n\n" +
93
+ "**Output contract:** Every GENERATE integration test targeting a business rule or formula " +
94
+ "MUST include a non-empty bugCatchingTarget. Parameters for generation tools must derive " +
95
+ "from the repository analysis, enrichedScenarios, or source code you read — no invented " +
96
+ "field names, no guessed URLs.\n\n" +
97
+ "**Ranking:** Prioritize business_rule and security_boundary over crud. Within a category, " +
98
+ "prefer tests that catch specific formulas, constraints, or state transitions over generic " +
99
+ "happy-path coverage. Provide a stateFile path from skyramp_analyze_changes.",
9
100
  argsSchema: {
10
101
  stateFile: z
11
102
  .string()
@@ -17,28 +108,79 @@ export function registerRecommendTestsPrompt(server) {
17
108
  .default(10)
18
109
  .optional()
19
110
  .describe("Maximum number of ranked recommendations to return (default: 10)"),
111
+ enrichedScenarios: z
112
+ .string()
113
+ .optional()
114
+ .refine((val) => {
115
+ if (val === undefined || val === "")
116
+ return true;
117
+ try {
118
+ const parsed = JSON.parse(val);
119
+ return Array.isArray(parsed);
120
+ }
121
+ catch {
122
+ return false;
123
+ }
124
+ }, { message: "enrichedScenarios must be a valid JSON array string (e.g. '[{\"scenarioName\":\"...\"}]')" })
125
+ .describe("JSON array of agent-drafted scenarios (DraftedScenario[]). Each must have " +
126
+ "scenarioName, category, and steps (array with method, path, order). " +
127
+ "Agent scenarios override server-side ones by scenarioName and are prioritized in ranking."),
20
128
  },
21
129
  }, async (args) => {
22
130
  const stateFile = args.stateFile;
23
131
  if (!stateFile) {
24
132
  throw new Error("stateFile is required");
25
133
  }
134
+ // Try in-memory session store first (fullAnalysis no longer on disk).
135
+ // Extract sessionId from the disk state, then look up the full analysis
136
+ // from process memory — same pattern as analysisResources.ts.
26
137
  const mgr = StateManager.fromStatePath(stateFile);
27
138
  if (!mgr.exists()) {
28
139
  throw new Error(`State file "${stateFile}" not found. Run skyramp_analyze_changes first.`);
29
140
  }
30
141
  const fullState = await mgr.readFullState();
31
142
  const state = fullState ?? null;
32
- if (!state?.repositoryAnalysis?.fullAnalysis) {
143
+ if (!state?.repositoryAnalysis) {
33
144
  throw new Error(`State file "${stateFile}" has no analysis data. Re-run skyramp_analyze_changes.`);
34
145
  }
35
- const { fullAnalysis, sessionId, wsAuthHeader, wsAuthType } = state.repositoryAnalysis;
146
+ const { sessionId, wsAuthHeader, wsAuthType } = state.repositoryAnalysis;
36
147
  const repositoryPath = fullState?.metadata?.repositoryPath ?? "";
148
+ // Resolve fullAnalysis: memory first, disk fallback for backward compat
149
+ let fullAnalysis;
150
+ if (sessionId && hasSessionData(sessionId)) {
151
+ const memData = getSessionData(sessionId);
152
+ if (memData?.analysis) {
153
+ fullAnalysis = memData.analysis;
154
+ logger.debug("Loaded fullAnalysis from process memory", { sessionId });
155
+ }
156
+ }
157
+ if (!fullAnalysis) {
158
+ fullAnalysis = state.repositoryAnalysis.fullAnalysis;
159
+ }
160
+ if (!fullAnalysis) {
161
+ throw new Error(`Analysis data for session not found in memory or on disk. Re-run skyramp_analyze_changes.`);
162
+ }
37
163
  const analysisScope = state.analysisScope === "branch_diff"
38
- ? "current_branch_diff"
39
- : "full_repo";
164
+ ? AnalysisScope.CurrentBranchDiff
165
+ : AnalysisScope.FullRepo;
40
166
  const effectiveTopN = args.topN;
41
- const prompt = buildRecommendationPrompt(fullAnalysis, analysisScope, effectiveTopN, undefined, wsAuthHeader, wsAuthType);
167
+ const enrichedRaw = args.enrichedScenarios;
168
+ let mergedAnalysis = fullAnalysis;
169
+ let rejectionWarning = "";
170
+ if (enrichedRaw) {
171
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios(fullAnalysis.businessContext.draftedScenarios ?? [], enrichedRaw);
172
+ mergedAnalysis = {
173
+ ...fullAnalysis,
174
+ businessContext: {
175
+ ...fullAnalysis.businessContext,
176
+ draftedScenarios: scenarios,
177
+ },
178
+ };
179
+ if (rejectionNotes.length > 0) {
180
+ rejectionWarning = `\n⚠️ enrichedScenarios — ${rejectionNotes.length} scenario(s) rejected and not used in recommendations:\n${rejectionNotes.map(n => ` - ${n}`).join("\n")}\nFix and re-call skyramp_recommend_tests to incorporate corrections.\n`;
181
+ }
182
+ }
183
+ const prompt = buildRecommendationPrompt(mergedAnalysis, analysisScope, effectiveTopN, undefined, wsAuthHeader, wsAuthType, undefined, sessionId);
42
184
  logger.info("Serving recommendation prompt via MCP Prompt", {
43
185
  stateFile,
44
186
  analysisScope,
@@ -52,7 +194,7 @@ export function registerRecommendTestsPrompt(server) {
52
194
  role: "user",
53
195
  content: {
54
196
  type: "text",
55
- text: `State file: ${stateFile}\nRepository: ${repositoryPath}\nScope: ${analysisScope}\n${resourceLinks}\n${prompt}`,
197
+ text: `State file: ${stateFile}\nRepository: ${repositoryPath}\nScope: ${analysisScope}\n${resourceLinks}${rejectionWarning}\n${prompt}`,
56
198
  },
57
199
  },
58
200
  ],