@skyramp/mcp 0.1.0-rc.1 → 0.1.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,27 +1,32 @@
1
+ import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
1
2
  function buildEnrichmentInstructions(p) {
2
- const isDiffScope = p.analysisScope === "current_branch_diff";
3
+ const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
3
4
  const useHealthFlow = p.nextTool === "skyramp_analyze_test_health";
4
5
  if (!isDiffScope) {
5
6
  const nextStep = useHealthFlow
6
7
  ? `### Step 3: Identify tests at risk of drift
7
8
  Call \`skyramp_analyze_test_health\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\``
8
- : `### Step 3: Call recommend tests
9
- Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
10
- return `## Your Task — Enrich & Recommend (full repo)
9
+ : `### Step 3: Present the catalog
10
+ The ranked test recommendation catalog is pre-built and shown below (after the separator line).
11
+
12
+ **Your only job is to present it.**
13
+
14
+ 1. Fill in every \`<…from source>\` placeholder using the field names, computed formulas, and auth details you found in Steps 1–2.
15
+ 2. Output the completed catalog **exactly as formatted — grouped by test type (### E2E / ### UI / ### Integration / ### Contract)**. Do NOT restructure, reorder, rename sections, or generate a new format.
16
+ 3. Do NOT call any Skyramp generation tools. The catalog shows ready-to-use tool calls that can be executed on demand.
17
+
18
+ **If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or FK relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
19
+ return `## Your Task — Fill in and Present the Catalog (full repo)
11
20
 
12
21
  ### Step 1: Read key files
13
- Read \`package.json\` / \`requirements.txt\`, \`docker-compose.yml\`, route/controller files,
14
- and model/schema files (Zod schemas, Pydantic models, TypeScript interfaces, DTOs)
15
- to understand the tech stack, endpoint shapes, auth mechanisms, and request/response schemas.
16
-
17
- ### Step 2: Identify resource relationships and parameter locations
18
- Map how endpoints relate to each other which POST creates resources consumed by other endpoints?
19
- **Resolve nested/sub-router paths** from the Router Mounting section above.
20
- **CRITICAL Distinguish query params vs request body:** For each endpoint, determine whether
21
- parameters are sent as URL query params (typical for GET search/filter/list) or request body
22
- (typical for POST/PUT/PATCH). Look at FastAPI \`Query()\` annotations, Express \`req.query\` usage,
23
- Spring \`@RequestParam\`, Flask \`request.args\`, etc. Populate \`queryParams\` in interactions
24
- for GET endpoints that accept search/filter/pagination parameters.
22
+ Read route/controller files and model/schema files (Pydantic models, Zod schemas, DTOs)
23
+ to find: required request body fields, computed response fields and formulas, auth middleware type, storage backend, and how sub-routers are mounted (cross-check against Router Mounting section above).
24
+
25
+ ### Step 2: Map cross-resource relationships and resolve endpoint paths
26
+ (Distinct from Step 1 — Step 1 reads individual schemas; Step 2 maps how endpoints relate to each other.)
27
+ For each endpoint: which POST creates resources consumed by other endpoints?
28
+ **Resolve nested paths** from the Router Mounting section — a router mounted at \`/products/{product_id}/reviews\` means \`GET /\` in that file is actually \`GET /api/v1/products/{product_id}/reviews\`.
29
+ For GET list endpoints: identify query params (\`limit\`, \`offset\`, \`order\`, \`orderBy\`) from framework annotations (FastAPI \`Query()\`, Express \`req.query\`, etc.).
25
30
 
26
31
  ${nextStep}`;
27
32
  }
@@ -67,8 +72,20 @@ Draft multi-step scenarios simulating realistic user workflows:
67
72
  response data verification, actual field names for chaining.
68
73
  **Parameter placement:** GET search/filter endpoints MUST use \`queryParams\`, not \`requestBody\`.
69
74
 
75
+ **No duplicate scenarios.** Each scenario must cover a distinct code path (unique method + path + expected status). Do NOT draft two scenarios that differ only in request body values but hit the same code path (e.g. discount=10% vs discount=25% — both succeed with 200, same logic). A negative-case variant with a different expected status (e.g. discount=-10% → 422) IS a distinct scenario — use a single-step contract test for it (see below).
76
+
77
+ **For each new or modified endpoint, ensure at least one error-path scenario is drafted** — a single-step contract test that triggers a specific error (404 for a missing resource ID, 422 for an invalid field value) that the source code explicitly handles. One auth-boundary scenario (missing auth → 401/403) is enough across all endpoints — do not repeat it per endpoint.
78
+
79
+ **For every scenario you draft, fill \`bugCatchingTarget\`** with the specific formula, constraint, or failure mode the test is designed to expose. Examples:
80
+ - \`"discount formula: total_amount = subtotal * (1 - discount_value / 100) — wrong if addition is used instead of subtraction"\`
81
+ - \`"items not recalculated after PATCH — total_amount stays at old value if collection update is ignored"\`
82
+ - \`"missing 404 guard on resource ID — returns 500 instead of 404 for unknown IDs"\`
83
+ This field is used at test generation time to compute exact assertion values. Leave it empty only if no specific formula or constraint applies.
84
+
70
85
  ### Step 4: Call recommend tests
71
- Call \`skyramp_recommend_tests\` with \`sessionId: "${p.sessionId}"\``;
86
+ Call \`skyramp_recommend_tests\` with:
87
+ - \`stateFile: "${p.stateFile}"\`
88
+ - \`enrichedScenarios\`: (optional) JSON array of your Step 3 scenarios — see the tool's inputSchema for the exact shape. Your enriched scenarios override server-side ones with the same \`scenarioName\` and are prioritized in ranking. Omit if you drafted nothing in Step 3.`;
72
89
  return `## Your Task — Enrich & Recommend (PR-scoped)
73
90
 
74
91
  ### Step 1: Read the changed files
@@ -81,39 +98,19 @@ ${criticalPatternStep}
81
98
  ${step3Content}`;
82
99
  }
83
100
  export function buildAnalysisOutputText(p) {
84
- const isDiffScope = p.analysisScope === "current_branch_diff";
85
- const diffSection = p.parsedDiff
101
+ const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
102
+ // Router mounting context is unique to this prompt (not in recommendationPrompt).
103
+ // Branch diff, endpoint catalog, auth config, and OpenAPI spec are omitted here
104
+ // because they are already present in the recommendation prompt that is
105
+ // concatenated in the same tool response.
106
+ const routerSection = !p.wsSchemaPath && p.routerMountContext
86
107
  ? `
87
- ## Branch Diff Context
88
- **Branch**: \`${p.parsedDiff.currentBranch}\` → base: \`${p.parsedDiff.baseBranch}\`
89
- **Changed Files** (${p.parsedDiff.changedFiles.length}): ${p.parsedDiff.changedFiles.join(", ")}
90
- **New Endpoints** (${p.parsedDiff.newEndpoints.length}): ${p.parsedDiff.newEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
91
- **Modified Endpoints** (${p.parsedDiff.modifiedEndpoints.length}): ${p.parsedDiff.modifiedEndpoints.map((e) => `${e.method} ${e.path} (${e.sourceFile})`).join(", ") || "none"}
92
- **Affected Services**: ${p.parsedDiff.affectedServices.join(", ") || "none"}
93
- `
94
- : "";
95
- const endpointCatalog = p.scannedEndpoints.length > 0
96
- ? `
97
- ## Pre-Scanned Endpoint Catalog (${p.scannedEndpoints.length} routes)
98
- ${p.scannedEndpoints.map((ep) => ` ${ep.methods.join("|")} ${ep.path} (${ep.sourceFile})`).join("\n")}
99
- `
100
- : "";
101
- const wsLine = p.wsBaseUrl
102
- ? `**Base URL**: \`${p.wsBaseUrl}\`${p.wsAuthHeader ? ` | **Auth header**: \`${p.wsAuthHeader}\`` : ""}${p.wsAuthType ? ` | **Auth type**: \`${p.wsAuthType}\`` : ""}`
103
- : "";
104
- const specSection = p.wsSchemaPath
105
- ? `
106
- ## OpenAPI Spec Available
107
- Spec at \`${p.wsSchemaPath}\`. **Read it** for authoritative paths and schemas.
108
- Pass \`apiSchema: "${p.wsSchemaPath}"\` to ALL test generation tool calls.`
109
- : p.routerMountContext
110
- ? `
111
108
  ## Router Mounting / Nesting
112
109
  \`\`\`
113
110
  ${p.routerMountContext}
114
111
  \`\`\`
115
112
  Use this to resolve full URL paths for nested endpoints.`
116
- : "";
113
+ : "";
117
114
  const enrichment = buildEnrichmentInstructions(p);
118
115
  return `# Repository Analysis
119
116
 
@@ -121,12 +118,7 @@ Use this to resolve full URL paths for nested endpoints.`
121
118
  **Repository**: \`${p.repositoryPath}\`
122
119
  **Analysis Scope**: \`${p.analysisScope}\`
123
120
  ${isDiffScope ? `**Diff endpoints**: ${(p.parsedDiff?.newEndpoints.length ?? 0) + (p.parsedDiff?.modifiedEndpoints.length ?? 0)}` : `**Pre-scanned endpoints**: ${p.scannedEndpoints.length}`}
124
- ${wsLine}
125
- ${p.wsSchemaPath ? `**OpenAPI Spec**: \`${p.wsSchemaPath}\` (spec-based flow)` : "**Flow**: Code-scanning (may miss nesting)"}
126
-
127
- ${diffSection}
128
- ${endpointCatalog}
129
- ${specSection}
121
+ ${routerSection}
130
122
  ${enrichment}
131
123
 
132
124
  **CRITICAL**: No .json/.md file creation. Prioritize cross-resource workflows.`;
@@ -0,0 +1,125 @@
1
+ jest.mock("@skyramp/skyramp", () => ({ Skyramp: class {
2
+ } }));
3
+ import { mergeEnrichedScenarios } from "./registerRecommendTestsPrompt.js";
4
+ import { ScenarioSource } from "../../types/RepositoryAnalysis.js";
5
+ import { TestType } from "../../types/TestTypes.js";
6
+ function makeScenario(overrides = {}) {
7
+ return {
8
+ scenarioName: "base-scenario",
9
+ description: "base",
10
+ category: "crud",
11
+ priority: "medium",
12
+ steps: [{ order: 1, method: "GET", path: "/api/items", description: "list", interactionType: "success", expectedStatusCode: 200 }],
13
+ chainingKeys: [],
14
+ requiresAuth: true,
15
+ estimatedComplexity: "simple",
16
+ source: ScenarioSource.CodeInferred,
17
+ testType: TestType.CONTRACT,
18
+ ...overrides,
19
+ };
20
+ }
21
+ const VALID_STEP = { order: 1, method: "post", path: "/api/orders", expectedStatusCode: 201 };
22
+ describe("mergeEnrichedScenarios — happy path", () => {
23
+ it("merges a valid agent scenario into server scenarios", () => {
24
+ const server = [makeScenario({ scenarioName: "existing" })];
25
+ const raw = JSON.stringify([{
26
+ scenarioName: "new-orders-flow",
27
+ category: "business_rule",
28
+ steps: [VALID_STEP],
29
+ }]);
30
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, raw);
31
+ expect(rejectionNotes).toHaveLength(0);
32
+ expect(scenarios.find(s => s.scenarioName === "new-orders-flow")).toBeDefined();
33
+ expect(scenarios.find(s => s.scenarioName === "existing")).toBeDefined();
34
+ expect(scenarios).toHaveLength(2);
35
+ });
36
+ it("overrides a server scenario when agent provides same scenarioName", () => {
37
+ const server = [makeScenario({ scenarioName: "orders-flow", description: "server version" })];
38
+ const raw = JSON.stringify([{
39
+ scenarioName: "orders-flow",
40
+ category: "business_rule",
41
+ description: "agent version",
42
+ steps: [VALID_STEP],
43
+ }]);
44
+ const { scenarios } = mergeEnrichedScenarios(server, raw);
45
+ expect(scenarios).toHaveLength(1);
46
+ expect(scenarios[0].description).toBe("agent version");
47
+ expect(scenarios[0].source).toBe("agent-enriched");
48
+ });
49
+ it("normalizes method to uppercase", () => {
50
+ const raw = JSON.stringify([{
51
+ scenarioName: "uppercase-test",
52
+ category: "crud",
53
+ steps: [{ order: 1, method: "post", path: "/api/items", expectedStatusCode: 201 }],
54
+ }]);
55
+ const { scenarios } = mergeEnrichedScenarios([], raw);
56
+ expect(scenarios[0].steps[0].method).toBe("POST");
57
+ });
58
+ it("preserves bugCatchingTarget when provided", () => {
59
+ const raw = JSON.stringify([{
60
+ scenarioName: "formula-test",
61
+ category: "business_rule",
62
+ bugCatchingTarget: "total = price * qty",
63
+ steps: [VALID_STEP],
64
+ }]);
65
+ const { scenarios } = mergeEnrichedScenarios([], raw);
66
+ expect(scenarios[0].bugCatchingTarget).toBe("total = price * qty");
67
+ });
68
+ it("falls back to server scenarios on empty agent array", () => {
69
+ const server = [makeScenario({ scenarioName: "server-only" })];
70
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, "[]");
71
+ // Empty array → no agent scenarios, return server ones unchanged
72
+ expect(scenarios).toEqual(server);
73
+ expect(rejectionNotes).toHaveLength(0);
74
+ });
75
+ });
76
+ describe("mergeEnrichedScenarios — rejection cases", () => {
77
+ it("rejects scenario with missing scenarioName", () => {
78
+ const raw = JSON.stringify([{ category: "crud", steps: [VALID_STEP] }]);
79
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios([], raw);
80
+ expect(scenarios).toHaveLength(0);
81
+ expect(rejectionNotes[0]).toMatch(/missing scenarioName/);
82
+ });
83
+ it("rejects scenario with missing steps array", () => {
84
+ const raw = JSON.stringify([{ scenarioName: "no-steps", category: "crud" }]);
85
+ const { rejectionNotes } = mergeEnrichedScenarios([], raw);
86
+ expect(rejectionNotes[0]).toMatch(/missing or empty steps/);
87
+ });
88
+ it("rejects scenario with empty steps array", () => {
89
+ const raw = JSON.stringify([{ scenarioName: "empty-steps", category: "crud", steps: [] }]);
90
+ const { rejectionNotes } = mergeEnrichedScenarios([], raw);
91
+ expect(rejectionNotes[0]).toMatch(/missing or empty steps/);
92
+ });
93
+ it("rejects scenario with missing category", () => {
94
+ const raw = JSON.stringify([{ scenarioName: "no-cat", steps: [VALID_STEP] }]);
95
+ const { rejectionNotes } = mergeEnrichedScenarios([], raw);
96
+ expect(rejectionNotes[0]).toMatch(/missing category/);
97
+ });
98
+ it("rejects scenario with unknown category", () => {
99
+ const raw = JSON.stringify([{ scenarioName: "bad-cat", category: "not_a_real_category", steps: [VALID_STEP] }]);
100
+ const { rejectionNotes } = mergeEnrichedScenarios([], raw);
101
+ expect(rejectionNotes[0]).toMatch(/unknown category/);
102
+ });
103
+ it("falls back to server scenarios on invalid JSON", () => {
104
+ const server = [makeScenario()];
105
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, "{ bad json");
106
+ expect(scenarios).toEqual(server);
107
+ expect(rejectionNotes[0]).toMatch(/invalid JSON/);
108
+ });
109
+ it("falls back to server scenarios when JSON is not an array", () => {
110
+ const server = [makeScenario()];
111
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios(server, JSON.stringify({ not: "array" }));
112
+ expect(scenarios).toEqual(server);
113
+ expect(rejectionNotes[0]).toMatch(/expected a JSON array/);
114
+ });
115
+ it("accepts valid scenarios and rejects invalid ones in the same batch", () => {
116
+ const raw = JSON.stringify([
117
+ { scenarioName: "valid-one", category: "crud", steps: [VALID_STEP] },
118
+ { category: "crud", steps: [VALID_STEP] }, // missing scenarioName
119
+ ]);
120
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios([], raw);
121
+ expect(scenarios).toHaveLength(1);
122
+ expect(scenarios[0].scenarioName).toBe("valid-one");
123
+ expect(rejectionNotes).toHaveLength(1);
124
+ });
125
+ });
@@ -1,6 +1,53 @@
1
1
  export const MAX_TESTS_TO_GENERATE = 3;
2
2
  export const MAX_RECOMMENDATIONS = 20;
3
3
  export const MAX_CRITICAL_TESTS = 3;
4
+ export function buildArchitectPreamble(isDiffScope) {
5
+ if (isDiffScope) {
6
+ return `You are acting as a Skyramp Integration Architect. You will receive a branch diff — changed endpoints, source code interactions, and existing tests. Your responsibility is to map test intent to the Skyramp generation spec with precision. No guessing — derive all parameters from the codebase, workspace config, and provided context only.
7
+
8
+ Your task:
9
+ 1. **Recommend AND generate tests for this PR's changes** — prioritize tests most likely to catch real production bugs (state machine violations, cross-resource data integrity, computed field errors, security boundary bypasses) over trivial coverage
10
+ 2. **Immediately generate the top-priority tests** by calling Skyramp MCP generation tools — only these types: **integration, contract, E2E, UI**. Never smoke. Never fuzz.
11
+ 3. **Avoid duplicate coverage** — if an existing test already covers an endpoint + test type, choose a different angle instead
12
+
13
+ Every tool parameter must trace to a concrete source: repository analysis, source code, or OpenAPI spec. Do not invent field names, request shapes, base URLs, or auth values. If a required value is unknown, read the relevant source file before calling the tool.`;
14
+ }
15
+ return `You are acting as a Skyramp Integration Architect. You will receive a full repository analysis — all endpoints, source code interactions, and existing tests. Your responsibility is to map test intent to the Skyramp generation spec with precision. No guessing — derive all parameters from the codebase, workspace config, and provided context only.
16
+
17
+ Your task:
18
+ 1. **Produce a comprehensive test recommendation catalog** grouped by test type (E2E → UI → Integration → Contract), ranked by production risk within each type
19
+ 2. **Present each recommendation with a complete, ready-to-use Skyramp tool call** — fill in all field names, endpoint URLs, request shapes, and auth parameters from source code so the catalog is immediately actionable
20
+ 3. **Do not call any generation tools** — your output is the catalog itself; it is executed on demand
21
+
22
+ Replace every \`<…from source>\` placeholder with actual values before presenting. Do not invent values — read the source file if a value is unclear.`;
23
+ }
24
+ export function buildContextFetchingGuidance(sessionId) {
25
+ if (!sessionId)
26
+ return "";
27
+ return `<context_fetching_protocol>
28
+ ## Execution Plan Context
29
+ Before calling any tool, replace every \`<from source>\` placeholder in the tool call parameters with actual values read from the relevant source file (handler, schema, or model). Do not proceed with placeholders still present — every parameter must trace to a concrete source.
30
+ </context_fetching_protocol>`;
31
+ }
32
+ export function buildReasoningProtocol() {
33
+ return `<reasoning_protocol>
34
+ ## Parameter Grounding Rule
35
+ Before each GENERATE tool call, output a brief \`<thinking>\` block stating WHERE the key values come from:
36
+
37
+ - **requestBody / responseBody fields** → source code schema (Zod, Pydantic, DTO), enriched scenario, or OpenAPI spec — state the file or schema name
38
+ - **endpointURL** → workspace \`baseUrl\` + endpoint path (both required — never path alone)
39
+ - **authHeader / authScheme** → workspace config or OpenAPI \`securitySchemes\`
40
+ - **FK path params** → chained from a prior step's response \`id\` field — not hardcoded
41
+ - **Names / string values** → realistic; append timestamp suffix to avoid re-run conflicts
42
+
43
+ ## Ranking Rule
44
+ For each GENERATE item, include one sentence in your output (before the tool calls) stating the specific bug or failure it targets — derived from \`bugCatchingTarget\` or your source-code reading. Example: "Targets: order total miscalculation — total_amount = sum(item.price × item.quantity) should recompute when items array changes."
45
+
46
+ If \`bugCatchingTarget\` is empty for a GENERATE item, derive it from source code before including the item. A GENERATE slot without a specific bug target belongs in ADDITIONAL.
47
+
48
+ If a value cannot be sourced, read the relevant source file before calling the tool. Do not proceed with invented values.
49
+ </reasoning_protocol>`;
50
+ }
4
51
  function serializeAuthCallParams(params) {
5
52
  const parts = [`authHeader: "${params.authHeader}"`];
6
53
  if (params.authScheme !== undefined) {
@@ -45,7 +92,7 @@ export function buildTestPatternGuidelines() {
45
92
  - **Race conditions**: If concurrent writes are possible (inventory deduction, counter increment), test concurrent requests
46
93
  - **Computed fields**: If response contains derived values (total, average, count), verify computation with known inputs (e.g., total_cost = compute_seconds * rate + memory_mb * rate + external_cost)
47
94
  - **Mutation with collection modification**: If PUT/PATCH endpoints accept arrays of child items (e.g., order line items, cart products, invoice entries), test adding/removing items and verify that derived totals (e.g., total_amount, subtotal, item_count) are recalculated correctly. This is the most common source of user-reported bugs — always prioritize it for GENERATE over simple field-update tests.
48
- **CRITICAL**: The PATCH/PUT request body MUST include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
95
+ The PATCH/PUT request body should include the child collection array field(s) defined for that endpoint (e.g., "items" with FK references like "product_id" and a quantity field) chained from prior POST responses. A PATCH that only sends metadata fields (e.g., discount_type, status, notes) without modifying the child collection is NOT a valid mutation-recalc test — it will pass even when the item/total logic is broken. Before writing assertions, inspect the source code or OpenAPI spec to identify (1) the actual child collection field name and its FK/quantity/price sub-fields, and (2) how derived totals are calculated (including any discounts, taxes, or fees). Then assert: the child FK fields match chained IDs, quantities match sent values, and totals match the computation from the source code
49
96
  - **Webhook/event side effects**: If endpoints trigger async operations, test that side effects occur (e.g., POST /orders triggers notification)
50
97
  - **Cross-user isolation**: If resources are owned by users, test that user B cannot access/modify user A's resources (GET /users/{other_id}/data → 403 Forbidden)
51
98
  - **Range/boundary invariants**: If business rules cap values (max retries, min balance, discount ≤ subtotal), test the boundary (e.g., set retries to max+1 → expect rejection)
@@ -118,6 +165,72 @@ export function buildTestExamples() {
118
165
  - Single-resource CRUD with no cross-resource or state verification
119
166
  - POST with missing field → 422 (obvious validation, covered by contract tests)`;
120
167
  }
168
+ export function buildVerificationChecklist(topN, maxGen) {
169
+ return `<verification>
170
+ Before finalizing your output, verify:
171
+ 1. **Count**: Total recommendation count equals exactly ${topN} (${maxGen} GENERATE + ${topN - maxGen} ADDITIONAL). Not fewer.
172
+ 2. **Distinct paths**: Each GENERATE item targets a distinct code path — no two share the same HTTP method + endpoint + expected status.
173
+ 3. **Auth parameters are consistent** across all tool calls (same authHeader and authScheme).
174
+ 4. Every endpointURL includes both the base URL and the path (not just the base, e.g. \`http://host/api/v1/orders/{id}\`).
175
+ 5. **All \`<from source>\` placeholders** you received have been replaced with actual values derived from source code — no \`<...>\` remain in your output.
176
+ 6. **Real request shapes**: requestBody for POST/PUT/PATCH uses actual field names from source (not \`{}\`). GET search/filter uses \`queryParams\`, not \`requestBody\`.
177
+ 7. **scenarioFile**: \`skyramp_integration_test_generation\` uses the exact \`filePath\` returned by \`skyramp_batch_scenario_test_generation\` — not a guessed or hardcoded filename.
178
+ 8. **bugCatchingTarget**: Every GENERATE integration test that targets a business rule, formula, or constraint has a non-empty \`bugCatchingTarget\`.
179
+ 9. **FK chaining**: In multi-step integration tests, path params sourced from a prior step's response (e.g. \`order_id\` from step 1) use \`chainsFrom\` — not hardcoded IDs.
180
+ </verification>`;
181
+ }
182
+ export function buildFewShotExamples() {
183
+ return `<examples>
184
+ <example index="1" type="integration_recommendation">
185
+ <thinking>
186
+ **Parameter grounding**:
187
+ - baseURL: "http://localhost:8000" (workspace api.baseUrl)
188
+ - steps[0].requestBody fields "name", "price": ProductCreate schema fields (src/models/product.py)
189
+ - steps[1].requestBody "product_id": FK to products — chained from step 0 response id
190
+ - steps[1].requestBody "quantity": OrderCreate schema field (src/models/order.py)
191
+ - responseBody "total_amount": 89.97 = 29.99 × 3 — from order total formula (src/services/order_service.py: total = sum(item.price * item.quantity))
192
+ - authHeader/authScheme: workspace config (Authorization / Bearer)
193
+ </thinking>
194
+
195
+ **#1 — GENERATE** | integration | business_rule | new
196
+ Scenario: orders-create-with-product-total-calc (3 steps)
197
+ 1. POST /api/v1/products → 201: Create product with known price
198
+ 2. POST /api/v1/orders → 201: Create order referencing product_id from step 1, quantity=3
199
+ 3. GET /api/v1/orders/{order_id} → 200: Verify total_amount = 29.99 × 3 = 89.97
200
+ bugCatchingTarget: "total_amount = sum(item.price × item.quantity) — wrong if multiplication is skipped or items list is ignored"
201
+ Tool calls:
202
+ skyramp_batch_scenario_test_generation({ scenarioName: "orders-create-with-product-total-calc", destination: "localhost", baseURL: "http://localhost:8000", authHeader: "Authorization", authScheme: "Bearer", steps: [
203
+ { method: "POST", path: "/api/v1/products", statusCode: 201, requestBody: "{\"name\": \"Widget-1713000000\", \"price\": 29.99}" },
204
+ { method: "POST", path: "/api/v1/orders", statusCode: 201, requestBody: "{\"product_id\": \"chained\", \"quantity\": 3}" },
205
+ { method: "GET", path: "/api/v1/orders/{order_id}", statusCode: 200, responseBody: "{\"id\": \"chained\", \"total_amount\": 89.97, \"items\": [{\"product_id\": \"chained\", \"quantity\": 3, \"unit_price\": 29.99}]}" }
206
+ ] })
207
+ skyramp_integration_test_generation({ scenarioFile: "<filePath returned by skyramp_batch_scenario_test_generation above>" })
208
+ Reasoning: Catches a broken total calculation before it ships — the most common source of order-related bug reports.
209
+ </example>
210
+
211
+ <example index="2" type="contract_recommendation">
212
+ <thinking>
213
+ **Parameter grounding**:
214
+ - endpointURL: "http://localhost:8000/api/v1/products/{product_id}" (workspace baseUrl + path from endpoint listing)
215
+ - method: "DELETE" (route definition, uppercase)
216
+ - pathParams "product_id=<random-uuid-v4>": hardcoded non-existent ID — no setup step needed for a 404 test; use a fresh UUID v4, not all-zeros
217
+ - authHeader/authScheme: workspace config (Authorization / Bearer)
218
+ - No requestData — DELETE carries no body
219
+ </thinking>
220
+
221
+ **#2 — GENERATE** | contract | error_handling | new
222
+ DELETE /api/v1/products/{product_id} → 404
223
+ Tool: skyramp_contract_test_generation({ endpointURL: "http://localhost:8000/api/v1/products/{product_id}", method: "DELETE", authHeader: "Authorization", authScheme: "Bearer", pathParams: "product_id=<random-uuid-v4>" })
224
+ Reasoning: Catches a missing 404 guard on DELETE — verifies the handler returns 404 for non-existent resources rather than a 500 or silent no-op.
225
+ </example>
226
+
227
+ <example index="3" type="additional_recommendation">
228
+ #5 [ADDITIONAL] | integration | security_boundary | existing
229
+ Scenario: orders-unauthorized-cross-user-access (POST /api/v1/orders → GET /api/v1/orders/{order_id} as different user → 403)
230
+ Validates: Cross-user isolation — user B cannot read user A's orders.
231
+ </example>
232
+ </examples>`;
233
+ }
121
234
  export function buildToolWorkflows(authHeaderValue, authTypeValue = "") {
122
235
  const isAuthorizationHeader = /^authorization$/i.test(authHeaderValue);
123
236
  const noAuth = !authHeaderValue;
@@ -177,6 +290,10 @@ To skip auth for unauthenticated endpoints, pass \`authHeader: ""\`.`;
177
290
  : `**Auth params:** \`${authCallParams}\` — pass to EVERY tool call below.`;
178
291
  return `## How to Generate Tests — Tool Workflows
179
292
 
293
+ **Contract**: The following tool signatures are strict technical contracts. Every parameter should match the schema exactly. Omit optional parameters rather than guessing values. If a required field cannot be resolved, fetch context first.
294
+
295
+ **Before every tool call**: Output a <thinking> block justifying the mapping of intent to endpoint to tool parameters. See Mandatory Reasoning Protocol above.
296
+
180
297
  ${authHeaderLine}
181
298
  ${authGuidance}
182
299
 
@@ -189,13 +306,13 @@ ${authGuidance}
189
306
  **CRITICAL — Query params vs request body:**
190
307
  - For **POST/PUT/PATCH**: use \`requestBody\` with realistic field values from source code schemas.
191
308
  - For **GET/DELETE with search/filter/pagination**: use \`queryParams\` (JSON string, e.g., \`{"q": "bear", "limit": 10}\`).
192
- NEVER put query parameters in \`requestBody\` for GET requests — GET request bodies are non-standard and may be ignored or rejected.
309
+ Do not put query parameters in \`requestBody\` for GET requests — GET request bodies are non-standard and may be ignored or rejected.
193
310
  - For **GET by ID**: no \`requestBody\` or \`queryParams\` needed — the ID is in the path.
194
311
  \`responseBody\` should match the actual API response shape from source code (including all fields
195
312
  returned by the controller — e.g., \`id\`, \`ownerId\`, \`createdAt\`, included relations like \`collection\`, \`tags\`).
196
313
  Wrap in \`{"response": ...}\` if the API uses an envelope pattern. If omitted, a synthetic response is generated.
197
314
  Inspect the source code to determine the correct request AND response body shapes — avoid sending \`{}\`.
198
- **CRITICAL for PATCH/PUT mutation-recalc scenarios:** The request body MUST include the child
315
+ **For PATCH/PUT mutation-recalc scenarios:** The request body should include the child
199
316
  collection array (e.g. \`"items": [{"product_id": <chained from prior POST>, "quantity": 2}]\`).
200
317
  Never send a PATCH that only modifies metadata (discount, status) without also including the
201
318
  items/products collection — such a test will not catch collection-level or total-recalculation bugs.
@@ -206,7 +323,7 @@ ${authGuidance}
206
323
  Do NOT pass \`chainingKey\` — defaults to \`response.id\`. After generation, the testbot
207
324
  will verify and fix path param chaining in the generated test.
208
325
 
209
- **For single-endpoint tests (contract/fuzz):**
326
+ **For single-endpoint tests (contract):**
210
327
  \`skyramp_{type}_test_generation\` with \`endpointURL\` (full URL incl. base + path), \`method\`,
211
328
  \`${authCallParams}\`, and \`requestData\` from source code schemas.
212
329
  If an OpenAPI spec exists, ALSO pass \`apiSchema\` — it enables schema-aware validation
@@ -1,11 +1,102 @@
1
1
  import { z } from "zod";
2
- import { StateManager, } from "../../utils/AnalysisStateManager.js";
2
+ import { StateManager, hasSessionData, getSessionData, } from "../../utils/AnalysisStateManager.js";
3
3
  import { logger } from "../../utils/logger.js";
4
4
  import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
5
+ import { getPersonaPrefix } from "../architectPersona.js";
6
+ import { ScenarioSource, AnalysisScope } from "../../types/RepositoryAnalysis.js";
7
+ import { SCENARIO_CATEGORIES } from "../../types/TestRecommendation.js";
8
+ export function mergeEnrichedScenarios(serverScenarios, raw) {
9
+ const rejectionNotes = [];
10
+ let parsed;
11
+ try {
12
+ const result = JSON.parse(raw);
13
+ if (!Array.isArray(result)) {
14
+ return { scenarios: serverScenarios, rejectionNotes: ["enrichedScenarios: expected a JSON array, got " + typeof result] };
15
+ }
16
+ parsed = result;
17
+ }
18
+ catch {
19
+ logger.warning("enrichedScenarios: invalid JSON — using server-side scenarios only");
20
+ return { scenarios: serverScenarios, rejectionNotes: ["enrichedScenarios: invalid JSON — all scenarios skipped"] };
21
+ }
22
+ const agentScenarios = [];
23
+ for (const s of parsed) {
24
+ const name = s?.scenarioName ? String(s.scenarioName) : null;
25
+ const label = name ? `"${name}"` : "(unnamed)";
26
+ if (!name) {
27
+ rejectionNotes.push(`rejected ${label}: missing scenarioName`);
28
+ continue;
29
+ }
30
+ if (!Array.isArray(s?.steps) || s.steps.length === 0) {
31
+ rejectionNotes.push(`rejected ${label}: missing or empty steps array`);
32
+ continue;
33
+ }
34
+ if (!s?.category) {
35
+ rejectionNotes.push(`rejected ${label}: missing category`);
36
+ continue;
37
+ }
38
+ if (!SCENARIO_CATEGORIES.includes(s.category)) {
39
+ rejectionNotes.push(`rejected ${label}: unknown category "${s.category}" — valid: ${SCENARIO_CATEGORIES.join(", ")}`);
40
+ continue;
41
+ }
42
+ agentScenarios.push({
43
+ scenarioName: name,
44
+ description: s.description ?? "",
45
+ category: s.category,
46
+ priority: s.priority ?? "high",
47
+ bugCatchingTarget: s.bugCatchingTarget,
48
+ testType: s.testType,
49
+ steps: s.steps.map((st, idx) => ({
50
+ order: st.order ?? idx + 1,
51
+ method: String(st.method ?? "GET").toUpperCase(),
52
+ path: String(st.path ?? "/"),
53
+ description: st.description ?? `${st.method} ${st.path}`,
54
+ interactionType: st.interactionType ?? "success",
55
+ requestBody: st.requestBody,
56
+ queryParams: st.queryParams,
57
+ responseBody: st.responseBody,
58
+ // Default status code by method if omitted to avoid `statusCode: undefined` in tool calls
59
+ expectedStatusCode: st.expectedStatusCode ??
60
+ (String(st.method ?? "").toUpperCase() === "POST" ? 201
61
+ : String(st.method ?? "").toUpperCase() === "DELETE" ? 204
62
+ : 200),
63
+ expectedResponseFields: st.expectedResponseFields,
64
+ bodyMustInclude: st.bodyMustInclude,
65
+ chainsFrom: st.chainsFrom,
66
+ })),
67
+ chainingKeys: s.chainingKeys ?? [],
68
+ requiresAuth: s.requiresAuth ?? true,
69
+ estimatedComplexity: s.estimatedComplexity ?? "moderate",
70
+ source: ScenarioSource.AgentEnriched,
71
+ });
72
+ }
73
+ if (agentScenarios.length === 0) {
74
+ return { scenarios: serverScenarios, rejectionNotes };
75
+ }
76
+ const merged = new Map(serverScenarios.map(s => [s.scenarioName, s]));
77
+ for (const s of agentScenarios) {
78
+ merged.set(s.scenarioName, s);
79
+ }
80
+ logger.info("Merged agent-enriched scenarios", {
81
+ server: serverScenarios.length,
82
+ agent: agentScenarios.length,
83
+ total: merged.size,
84
+ rejected: rejectionNotes.length,
85
+ });
86
+ return { scenarios: Array.from(merged.values()), rejectionNotes };
87
+ }
5
88
  export function registerRecommendTestsPrompt(server) {
6
89
  server.registerPrompt("skyramp_recommend_tests", {
7
- description: "Generate ranked test recommendations from a test-management analysis. " +
8
- "Provide a stateFile path from skyramp_analyze_changes.",
90
+ description: getPersonaPrefix() +
91
+ "Given the repository analysis in stateFile, produce ranked test recommendations split into " +
92
+ "GENERATE (call generation tools immediately) and ADDITIONAL (deferred, describe only).\n\n" +
93
+ "**Output contract:** Every GENERATE integration test targeting a business rule or formula " +
94
+ "MUST include a non-empty bugCatchingTarget. Parameters for generation tools must derive " +
95
+ "from the repository analysis, enrichedScenarios, or source code you read — no invented " +
96
+ "field names, no guessed URLs.\n\n" +
97
+ "**Ranking:** Prioritize business_rule and security_boundary over crud. Within a category, " +
98
+ "prefer tests that catch specific formulas, constraints, or state transitions over generic " +
99
+ "happy-path coverage. Provide a stateFile path from skyramp_analyze_changes.",
9
100
  argsSchema: {
10
101
  stateFile: z
11
102
  .string()
@@ -17,28 +108,79 @@ export function registerRecommendTestsPrompt(server) {
17
108
  .default(10)
18
109
  .optional()
19
110
  .describe("Maximum number of ranked recommendations to return (default: 10)"),
111
+ enrichedScenarios: z
112
+ .string()
113
+ .optional()
114
+ .refine((val) => {
115
+ if (val === undefined || val === "")
116
+ return true;
117
+ try {
118
+ const parsed = JSON.parse(val);
119
+ return Array.isArray(parsed);
120
+ }
121
+ catch {
122
+ return false;
123
+ }
124
+ }, { message: "enrichedScenarios must be a valid JSON array string (e.g. '[{\"scenarioName\":\"...\"}]')" })
125
+ .describe("JSON array of agent-drafted scenarios (DraftedScenario[]). Each must have " +
126
+ "scenarioName, category, and steps (array with method, path, order). " +
127
+ "Agent scenarios override server-side ones by scenarioName and are prioritized in ranking."),
20
128
  },
21
129
  }, async (args) => {
22
130
  const stateFile = args.stateFile;
23
131
  if (!stateFile) {
24
132
  throw new Error("stateFile is required");
25
133
  }
134
+ // Try in-memory session store first (fullAnalysis no longer on disk).
135
+ // Extract sessionId from the disk state, then look up the full analysis
136
+ // from process memory — same pattern as analysisResources.ts.
26
137
  const mgr = StateManager.fromStatePath(stateFile);
27
138
  if (!mgr.exists()) {
28
139
  throw new Error(`State file "${stateFile}" not found. Run skyramp_analyze_changes first.`);
29
140
  }
30
141
  const fullState = await mgr.readFullState();
31
142
  const state = fullState ?? null;
32
- if (!state?.repositoryAnalysis?.fullAnalysis) {
143
+ if (!state?.repositoryAnalysis) {
33
144
  throw new Error(`State file "${stateFile}" has no analysis data. Re-run skyramp_analyze_changes.`);
34
145
  }
35
- const { fullAnalysis, sessionId, wsAuthHeader, wsAuthType } = state.repositoryAnalysis;
146
+ const { sessionId, wsAuthHeader, wsAuthType } = state.repositoryAnalysis;
36
147
  const repositoryPath = fullState?.metadata?.repositoryPath ?? "";
148
+ // Resolve fullAnalysis: memory first, disk fallback for backward compat
149
+ let fullAnalysis;
150
+ if (sessionId && hasSessionData(sessionId)) {
151
+ const memData = getSessionData(sessionId);
152
+ if (memData?.analysis) {
153
+ fullAnalysis = memData.analysis;
154
+ logger.debug("Loaded fullAnalysis from process memory", { sessionId });
155
+ }
156
+ }
157
+ if (!fullAnalysis) {
158
+ fullAnalysis = state.repositoryAnalysis.fullAnalysis;
159
+ }
160
+ if (!fullAnalysis) {
161
+ throw new Error(`Analysis data for session not found in memory or on disk. Re-run skyramp_analyze_changes.`);
162
+ }
37
163
  const analysisScope = state.analysisScope === "branch_diff"
38
- ? "current_branch_diff"
39
- : "full_repo";
164
+ ? AnalysisScope.CurrentBranchDiff
165
+ : AnalysisScope.FullRepo;
40
166
  const effectiveTopN = args.topN;
41
- const prompt = buildRecommendationPrompt(fullAnalysis, analysisScope, effectiveTopN, undefined, wsAuthHeader, wsAuthType);
167
+ const enrichedRaw = args.enrichedScenarios;
168
+ let mergedAnalysis = fullAnalysis;
169
+ let rejectionWarning = "";
170
+ if (enrichedRaw) {
171
+ const { scenarios, rejectionNotes } = mergeEnrichedScenarios(fullAnalysis.businessContext.draftedScenarios ?? [], enrichedRaw);
172
+ mergedAnalysis = {
173
+ ...fullAnalysis,
174
+ businessContext: {
175
+ ...fullAnalysis.businessContext,
176
+ draftedScenarios: scenarios,
177
+ },
178
+ };
179
+ if (rejectionNotes.length > 0) {
180
+ rejectionWarning = `\n⚠️ enrichedScenarios — ${rejectionNotes.length} scenario(s) rejected and not used in recommendations:\n${rejectionNotes.map(n => ` - ${n}`).join("\n")}\nFix and re-call skyramp_recommend_tests to incorporate corrections.\n`;
181
+ }
182
+ }
183
+ const prompt = buildRecommendationPrompt(mergedAnalysis, analysisScope, effectiveTopN, undefined, wsAuthHeader, wsAuthType, undefined, sessionId);
42
184
  logger.info("Serving recommendation prompt via MCP Prompt", {
43
185
  stateFile,
44
186
  analysisScope,
@@ -52,7 +194,7 @@ export function registerRecommendTestsPrompt(server) {
52
194
  role: "user",
53
195
  content: {
54
196
  type: "text",
55
- text: `State file: ${stateFile}\nRepository: ${repositoryPath}\nScope: ${analysisScope}\n${resourceLinks}\n${prompt}`,
197
+ text: `State file: ${stateFile}\nRepository: ${repositoryPath}\nScope: ${analysisScope}\n${resourceLinks}${rejectionWarning}\n${prompt}`,
56
198
  },
57
199
  },
58
200
  ],