@skyramp/mcp 0.0.64-rc.9 → 0.0.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/build/index.js +2 -0
  2. package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -7
  3. package/build/prompts/test-maintenance/driftAnalysisSections.js +96 -34
  4. package/build/prompts/test-maintenance/enhanceAssertionSection.js +99 -0
  5. package/build/prompts/test-recommendation/recommendationSections.js +24 -9
  6. package/build/prompts/test-recommendation/test-recommendation-prompt.js +96 -27
  7. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +239 -2
  8. package/build/prompts/testbot/testbot-prompts.js +182 -125
  9. package/build/services/TestDiscoveryService.js +23 -0
  10. package/build/services/TestExecutionService.js +1 -1
  11. package/build/services/TestGenerationService.js +83 -12
  12. package/build/services/TestGenerationService.test.js +111 -2
  13. package/build/tool-phase-coverage.test.js +8 -2
  14. package/build/tool-phases.js +11 -13
  15. package/build/tools/generate-tests/generateBatchScenarioRestTool.js +203 -0
  16. package/build/tools/generate-tests/generateContractRestTool.js +3 -73
  17. package/build/tools/generate-tests/generateIntegrationRestTool.js +11 -61
  18. package/build/tools/submitReportTool.js +11 -3
  19. package/build/tools/submitReportTool.test.js +1 -1
  20. package/build/tools/test-management/analyzeChangesTool.js +14 -4
  21. package/build/types/RepositoryAnalysis.js +1 -0
  22. package/build/utils/scenarioDrafting.js +121 -11
  23. package/build/utils/scenarioDrafting.test.js +266 -3
  24. package/node_modules/playwright/ThirdPartyNotices.txt +679 -3093
  25. package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +117 -11
  26. package/package.json +2 -2
  27. package/build/tools/test-recommendation/recommendTestsTool.js +0 -274
@@ -5,7 +5,8 @@ function formatTestLocations(locs) {
5
5
  const entries = Object.entries(locs || {});
6
6
  if (entries.length === 0)
7
7
  return "";
8
- return "\n**Existing test files (do NOT duplicate these):**\n" +
8
+ return "\n**Existing test files cross-check these before creating new files:**\n" +
9
+ " If a GENERATE item's resource path matches a path listed here, UPDATE that file instead of creating a new one.\n" +
9
10
  entries.map(([type, files]) => " - [" + type + "] " + files).join("\n");
10
11
  }
11
12
  const CATEGORY_PRIORITY = {
@@ -43,10 +44,35 @@ function computeTiebreakerSeed(endpoints, diffFiles) {
43
44
  const canonical = [...endpoints].sort().join("|") + "::" + [...diffFiles].sort().join("|");
44
45
  return crypto.createHash("sha256").update(canonical).digest("hex").slice(0, 8);
45
46
  }
47
+ // ── Helpers ──
48
+ const SKIP_SEGMENTS_SET = new Set(["api", "v1", "v2", "v3", "public"]);
49
+ function extractResourceFromPath(path) {
50
+ const segments = path.split("/").filter(Boolean);
51
+ const nonParam = segments.filter(s => !s.startsWith("{") && !SKIP_SEGMENTS_SET.has(s));
52
+ return nonParam[nonParam.length - 1] || "unknown";
53
+ }
54
+ function scenarioCoverageKey(scenario) {
55
+ const testType = scenario.testType ?? (scenario.steps.length === 1 ? "contract" : "integration");
56
+ const mutatingSteps = scenario.steps.filter(st => ["POST", "PUT", "PATCH", "DELETE"].includes(st.method));
57
+ // Use the last mutating step — earlier steps are typically prerequisite setup
58
+ // (e.g. POST /products before PATCH /orders), while the final mutation is the
59
+ // primary action under test.
60
+ const primaryStep = mutatingSteps[mutatingSteps.length - 1] ?? scenario.steps[scenario.steps.length - 1];
61
+ const resource = extractResourceFromPath(primaryStep?.path ?? "");
62
+ return `${resource}::${testType}`;
63
+ }
46
64
  // ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
47
65
  function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
48
- const generateItems = scored.slice(0, Math.min(maxGen, scored.length));
49
- const additionalItems = scored.slice(maxGen, topN);
66
+ // For mixed PRs (frontend + backend), reserve the last GENERATE slot for a UI test
67
+ // so the agent has explicit room to record a browser trace and generate it.
68
+ const reserveUIGenSlot = hasFrontendChanges && !isUIOnlyPR && maxGen > 1;
69
+ const backendGenCount = reserveUIGenSlot ? maxGen - 1 : maxGen;
70
+ const backendBudget = reserveUIGenSlot ? Math.max(topN - 1, 0) : topN;
71
+ const generateItems = scored.slice(0, Math.min(backendGenCount, scored.length));
72
+ const rawAdditionalItems = scored.slice(backendGenCount, backendBudget);
73
+ // Filter additional items whose primary resource + test type already appear in GENERATE
74
+ const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
75
+ const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
50
76
  const authRef = authHeaderValue
51
77
  ? `, authHeader: "${authHeaderValue}"${authSchemeSnippet}`
52
78
  : `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
@@ -87,32 +113,56 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
87
113
  ? st.chainsFrom.map(c => `${c.sourceField} from step ${c.sourceStep}`).join(", ")
88
114
  : `${st.chainsFrom.sourceField} from step ${st.chainsFrom.sourceStep}`})`
89
115
  : "";
90
- return ` ${st.order}. ${st.method} ${st.path} → ${st.expectedStatusCode}: ${st.description}${chains}`;
91
- }).join("\n");
92
- const toolCalls = s.steps.map((st) => {
93
- const isBodyMethod = ["POST", "PUT", "PATCH"].includes(st.method);
94
- const dataParam = isBodyMethod
95
- ? `, requestBody: <${st.method} ${st.path} body from source code schemas>`
116
+ const bodyHint = st.bodyMustInclude?.length
117
+ ? ` [body MUST include: ${st.bodyMustInclude.join(", ")}]`
118
+ : "";
119
+ const responseHint = st.expectedResponseFields?.length
120
+ ? ` [assert response fields: ${st.expectedResponseFields.join(", ")}]`
96
121
  : "";
97
- return ` skyramp_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${s.scenarioName}", baseURL: "${baseUrl}", method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${scenarioAuthRef}${dataParam} })`;
122
+ return ` ${st.order}. ${st.method} ${st.path} ${st.expectedStatusCode}: ${st.description}${chains}${bodyHint}${responseHint}`;
98
123
  }).join("\n");
124
+ const batchSteps = s.steps.map((st) => {
125
+ const isBodyMethod = ["POST", "PUT", "PATCH"].includes(st.method);
126
+ let dataParam = "";
127
+ if (isBodyMethod) {
128
+ if (st.bodyMustInclude && st.bodyMustInclude.length > 0) {
129
+ const fields = st.bodyMustInclude.join(", ");
130
+ dataParam = `, requestBody: <${st.method} ${st.path} body from source code — MUST include child collection fields: [${fields}]. Chain FK fields (e.g. product_id) from prior POST response IDs. Do NOT omit the collection array or send only metadata/discount fields.>`;
131
+ }
132
+ else {
133
+ dataParam = `, requestBody: <${st.method} ${st.path} body from source code schemas>`;
134
+ }
135
+ }
136
+ return ` { method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${dataParam} }`;
137
+ }).join(",\n");
138
+ let destinationHost = s.scenarioName;
139
+ try {
140
+ const parsed = new URL(baseUrl);
141
+ destinationHost = parsed.hostname;
142
+ }
143
+ catch { /* use scenarioName as fallback */ }
144
+ const toolCalls = ` skyramp_batch_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${destinationHost}", baseURL: "${baseUrl}"${scenarioAuthRef}, steps: [\n${batchSteps}\n ] })`;
99
145
  const prereqNote = s.category === "new_endpoint"
100
- ? `\nPrerequisite discovery (MANDATORY for new_endpoint): Before executing these tool calls, read the source code for the new endpoint's request body. Look for FK fields (e.g. \`product_id\`, \`user_id\`, \`order_id\`). For each FK field found, prepend one \`skyramp_scenario_test_generation\` call to create that prerequisite resource first, then chain its \`id\` into the dependent step. If no FK fields exist, proceed with the steps above as-is.`
146
+ ? `\nPrerequisite discovery (MANDATORY for new_endpoint): Before executing these tool calls, read the source code for the new endpoint's request body. Look for FK fields (e.g. \`product_id\`, \`user_id\`, \`order_id\`). For each FK field found, prepend a step to the \`steps\` array in \`skyramp_batch_scenario_test_generation\` to create that prerequisite resource first, then chain its \`id\` into the dependent step. If no FK fields exist, proceed with the steps above as-is.`
101
147
  : "";
102
148
  return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | priority=${item.priority} | ${item.novelty}\n` +
103
149
  `Scenario: ${s.scenarioName} (${s.steps.length} steps)\n` +
104
150
  `${stepLines}\n` +
105
151
  `Tool calls:\n` +
106
152
  `${toolCalls}\n` +
107
- ` skyramp_integration_test_generation({ scenarioFile: "scenario_${s.scenarioName}.json"${authHeaderOnlyRef} })\n` +
153
+ ` skyramp_integration_test_generation({ scenarioFile: <use the filePath returned by skyramp_batch_scenario_test_generation above>${authHeaderOnlyRef} })\n` +
108
154
  `From source: requestBody shapes for POST/PUT/PATCH steps; responseBody shapes; authScheme` +
109
155
  prereqNote);
110
156
  }
111
157
  }).join("\n\n");
112
- // For mixed PRs, always reserve slots for UI and E2E recommendations regardless of whether
113
- // traces already exist the user can record them later or the bot can record during the run.
114
- const needsE2ESlot = hasFrontendChanges && !isUIOnlyPR;
115
- const needsUISlot = hasFrontendChanges && !isUIOnlyPR;
158
+ // For mixed PRs, reserve slots for UI/E2E additional recommendations but skip
159
+ // if the GENERATE list already includes a UI/E2E test for the changed frontend flows.
160
+ const hasGeneratedFrontendTest = generateItems.some(item => {
161
+ const tt = item.scenario.testType ?? (item.scenario.steps.length === 1 ? "contract" : "integration");
162
+ return tt === "ui" || tt === "e2e";
163
+ }) || reserveUIGenSlot;
164
+ const needsE2ESlot = hasFrontendChanges && !isUIOnlyPR && !hasGeneratedFrontendTest;
165
+ const needsUISlot = hasFrontendChanges && !isUIOnlyPR && !reserveUIGenSlot && !hasGeneratedFrontendTest;
116
166
  const frontendSlots = (needsE2ESlot ? 1 : 0) + (needsUISlot ? 1 : 0);
117
167
  const backendAdditionalItems = frontendSlots > 0
118
168
  ? additionalItems.slice(0, Math.max(additionalItems.length - frontendSlots, 0))
@@ -141,12 +191,19 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
141
191
  : "No traces exist yet — record a backend trace via `skyramp_start_trace_collection` + `skyramp_stop_trace_collection` and a UI trace via Playwright browser tools, then call `skyramp_e2e_test_generation`.";
142
192
  return `\n\n#${rank} [ADDITIONAL] | E2E | workflow | priority=HIGH | new\n Scenario: e2e-flow-for-changed-feature (frontend + backend files changed in this diff)\n Validates: Full browser-level flow for the changed UI components end-to-end — derive the scenario name and steps from the actual changed frontend files. ${traceNote}`;
143
193
  })() : "";
144
- const supplementCount = topN - generateItems.length - backendAdditionalItems.length - frontendSlots;
194
+ const reservedUIGenCount = reserveUIGenSlot ? 1 : 0;
195
+ const supplementCount = topN - generateItems.length - reservedUIGenCount - backendAdditionalItems.length - frontendSlots;
145
196
  const supplementNote = supplementCount > 0
146
- ? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them from endpoint interactions and source code patterns not yet covered. Use the same 5-dimension rubric and quality gate to assign priority (HIGH/MEDIUM/LOW), testType, and category.${hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT produce fewer than ${topN} total.`
197
+ ? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them from endpoint interactions and source code patterns not yet covered. Use the same 6-dimension rubric and quality gate to assign priority (HIGH/MEDIUM/LOW), testType, and category.${hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT produce fewer than ${topN} total. Do NOT supplement with tests whose primary endpoint and test type match a GENERATE item — those flows are already covered.`
147
198
  : "";
148
199
  return `## Execution Plan
149
- Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length} generate + ${Math.max(topN - generateItems.length, 0)} additional = ${topN} total
200
+ Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length + (reserveUIGenSlot ? 1 : 0)} generate + ${Math.max(topN - generateItems.length - (reserveUIGenSlot ? 1 : 0), 0)} additional = ${topN} total
201
+
202
+ **Step 0 — Existing-test cross-check (MANDATORY before executing anything)**
203
+ For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
204
+ - **Contract tests**: If an existing contract test already covers that resource path → UPDATE the existing file instead of creating a new one. This does NOT count toward \`newTestsCreated\` — backfill from ADDITIONAL candidates to fill the open ADD slot.
205
+ - **Integration/scenario tests**: Always generate as a new file via the scenario pipeline, even if an existing integration test covers the same resource. A new multi-step scenario is a distinct test. Count it toward \`newTestsCreated\`.
206
+ - **UI tests**: Always generate as a new file. Count toward \`newTestsCreated\`.
150
207
 
151
208
  **Step 1 — Source-Code Enrichment (MANDATORY before executing anything)**
152
209
  Read the source code for ALL changed files. Look for:
@@ -156,17 +213,19 @@ Read the source code for ALL changed files. Look for:
156
213
  - Validation logic (field constraints, cross-field dependencies)
157
214
  - Security boundaries not covered by the structural candidates below
158
215
 
159
- For each one found, evaluate it against these 5 dimensions and assign priority:
216
+ For each one found, evaluate it against these 6 dimensions and assign priority:
160
217
  | Dimension | What to assess |
161
218
  | Production Safety | Guards a critical boundary (auth, unique constraint, cascade delete, data integrity, breaking migration)? → HIGH |
162
219
  | Bug-Finding Potential | Targets a known failure mode (race condition, data consistency, state transition, cascade effect)? → HIGH |
220
+ | Mutation Side Effects | Does PUT/PATCH modify a collection of child items (line items, cart entries) and trigger recalculation (totals, counts, amounts)? → HIGH — this is the most common source of user-reported bugs |
163
221
  | User Journey Relevance | Reflects how real users interact (from traces, business flows, critical paths)? → HIGH or MEDIUM |
164
222
  | Coverage Gap | Addresses an area with zero existing test coverage? → bump up one tier |
165
223
  | Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
166
224
 
167
- Quality gate — ask both questions:
225
+ Quality gate — ask all three questions:
168
226
  1. "Would this test prevent a production incident?" → YES = HIGH priority regardless of other dimensions
169
227
  2. "Does this test exercise a real workflow or catch a real bug?" → YES = at least MEDIUM
228
+ 3. "Does this test cover a mutation that modifies child items and triggers total/amount recalculation?" → YES = HIGH priority, and prefer it for GENERATE over simple single-field update tests for the same endpoint
170
229
 
171
230
  Assign category: security_boundary | business_rule | data_integrity | breaking_change | auth | workflow | error-handling | data-validation | crud
172
231
 
@@ -180,8 +239,9 @@ INSERT a source-code-derived candidate into the ranked list **only if ALL three
180
239
  3. It is not already covered by a structural candidate in the list below
181
240
 
182
241
  If these conditions are not met, add it to ADDITIONAL only — do NOT displace a pre-ranked GENERATE item.
242
+ **CRITICAL-tier items (category: new_endpoint) can NEVER be displaced** — they test the actual endpoints introduced in this PR and must always occupy GENERATE slots.
183
243
 
184
- When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW; within the same priority, source-code-derived candidates go BEFORE structural ones. Re-number ranks after insertion. The top ${generateItems.length} become GENERATE items.
244
+ When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW; within the same priority, source-code-derived candidates go BEFORE structural ones. Re-number ranks after insertion. The top ${backendGenCount} become backend GENERATE items.${reserveUIGenSlot ? " The final GENERATE slot is reserved for a UI test and is not taken from this ranked list." : ""}
185
245
 
186
246
  **Cascade vs referential integrity:** If both a \`cascade-delete\` and a \`delete-blocked\` scenario appear for the same resource pair, keep only the one that matches the source code's FK delete policy (e.g. \`ON DELETE CASCADE\`, \`cascade=True\`, or \`onDelete: 'CASCADE'\` → keep cascade-delete; \`RESTRICT\`/\`PROTECT\`/no cascade → keep delete-blocked). Remove the inapplicable variant before executing.
187
247
 
@@ -206,23 +266,31 @@ ${buildGenerationRules(isUIOnlyPR)}
206
266
 
207
267
  **Critical-category minimum:** At least ${Math.min(MAX_CRITICAL_TESTS, maxGen)} of the ${maxGen} GENERATE items MUST be from HIGH-priority categories (security_boundary, business_rule, data_integrity, breaking_change). The pre-ranked plan below already prioritises this — only override if source-code enrichment reveals a higher-value candidate.
208
268
 
209
- ### GENERATE (execute these in order after Step 1 insertion, one retry on failure then skip)
269
+ ### GENERATE (process these EXACTLY as listed, in order do NOT reorder or replace any item with a different scenario; if Step 0 converts an item to UPDATE, backfill the ADD slot from ADDITIONAL)
270
+
271
+ ${generateBlocks || " (no pre-ranked generate items — draft your own based on endpoint analysis)"}${reserveUIGenSlot ? `
272
+
273
+ **#${generateItems.length + 1} — GENERATE** | UI | workflow | priority=HIGH | new
274
+ Scenario: ui-interaction-for-changed-components (frontend files changed in this diff)
275
+ Record a browser trace for the changed UI components, then generate a UI test.
276
+ Steps: browser_navigate → browser_snapshot → interact with changed components → browser_assert → skyramp_export_zip → skyramp_ui_test_generation
277
+ This slot is RESERVED — you MUST attempt a UI test here. Only skip if browser_navigate fails (app unreachable).` : ""}
210
278
 
211
- ${generateBlocks || " (no pre-ranked generate items — draft your own based on endpoint analysis)"}
279
+ **COMPLIANCE CHECK**: Before proceeding, verify your generate list matches the items above. If you plan to generate a scenario with a DIFFERENT name than what is listed (e.g. you want to generate "order-update-discount-calculation" but the plan says "orders-patch-add-items-recalculate"), STOP use the plan's scenario name and steps. Add your alternative to ADDITIONAL instead. One retry on failure then skip to next item.
212
280
 
213
281
  ### ADDITIONAL (list in additionalRecommendations in this order after Step 1 insertion)
214
282
 
215
283
  ${additionalLines || " (none pre-ranked)"}${uiSlotLine}${e2eSlotLine}
216
284
  ${supplementNote}
217
285
 
218
- **You MUST produce EXACTLY ${topN} total recommendations: ${generateItems.length} to generate + ${Math.max(topN - generateItems.length, 0)} as additionalRecommendations. Do NOT produce fewer. Generate recommendations now.**
286
+ **You MUST produce EXACTLY ${topN} total recommendations: ${generateItems.length + (reserveUIGenSlot ? 1 : 0)} to generate + ${Math.max(topN - generateItems.length - (reserveUIGenSlot ? 1 : 0), 0)} as additionalRecommendations. Do NOT produce fewer. Generate recommendations now.**
219
287
 
220
288
  ## Recommendation Stability
221
289
  - **Carry forward** previous additionalRecommendations that still apply — match by scenarioName (multi-step) or endpoint (single-endpoint). Re-derive category and priority from test content.
222
290
  - **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
223
291
  - **Only add** new recommendations for code paths introduced since the last run.`;
224
292
  }
225
- export function buildRecommendationPrompt(analysis, analysisScope = "full_repo", topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType) {
293
+ export function buildRecommendationPrompt(analysis, analysisScope = "full_repo", topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType, maxGenerateOverride) {
226
294
  const isDiffScope = analysisScope === "current_branch_diff";
227
295
  const diffContext = analysis.branchDiffContext;
228
296
  const openApiSpec = analysis.artifacts?.openApiSpecs?.[0];
@@ -342,7 +410,8 @@ ${detailBlocks}
342
410
  }
343
411
  // ── Scoring ──
344
412
  const endpointCount = allEndpoints.reduce((acc, ep) => acc + (ep.methods ?? []).length, 0);
345
- const maxGen = isUIOnlyPR ? (hasTraces ? MAX_TESTS_TO_GENERATE : 0) : MAX_TESTS_TO_GENERATE;
413
+ const baseMaxGen = Math.min(Math.max(maxGenerateOverride ?? (isDiffScope ? MAX_TESTS_TO_GENERATE : topN), 0), topN);
414
+ const maxGen = isUIOnlyPR ? (hasTraces ? baseMaxGen : 0) : baseMaxGen;
346
415
  const scenarios = analysis.businessContext.draftedScenarios;
347
416
  let scored = [];
348
417
  let seed = "";
@@ -2,7 +2,7 @@ jest.mock("@skyramp/skyramp", () => ({
2
2
  WorkspaceConfigManager: { create: jest.fn() },
3
3
  }));
4
4
  import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
5
- import { PATH_PARAM_UUID_GUIDANCE, MAX_TESTS_TO_GENERATE } from "./recommendationSections.js";
5
+ import { PATH_PARAM_UUID_GUIDANCE, MAX_TESTS_TO_GENERATE, buildTestQualityCriteria } from "./recommendationSections.js";
6
6
  // ---------------------------------------------------------------------------
7
7
  // Minimal fixtures
8
8
  // ---------------------------------------------------------------------------
@@ -315,7 +315,7 @@ describe("buildRecommendationPrompt — Stability and supplement section", () =>
315
315
  });
316
316
  const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
317
317
  expect(prompt).toContain("REQUIRED — You MUST add");
318
- expect(prompt).toContain("5-dimension rubric");
318
+ expect(prompt).toContain("6-dimension rubric");
319
319
  });
320
320
  // Verify MAX_TESTS_TO_GENERATE is still exported and equals 3
321
321
  it("MAX_TESTS_TO_GENERATE is 3", () => {
@@ -369,3 +369,240 @@ describe("PATH_PARAM_UUID_GUIDANCE — no hardcoded UUID anchor", () => {
369
369
  expect(prompt).not.toMatch(UUID_V4_REGEX);
370
370
  });
371
371
  });
372
+ // ---------------------------------------------------------------------------
373
+ // Tests — maxGenerateOverride parameter in buildRecommendationPrompt
374
+ // ---------------------------------------------------------------------------
375
+ describe("buildRecommendationPrompt — maxGenerateOverride", () => {
376
+ const scenariosForOverride = Array.from({ length: 6 }, (_, i) => minimalScenario({
377
+ scenarioName: `scenario-${i}`,
378
+ description: `Test scenario ${i}`,
379
+ category: i < 2 ? "security_boundary" : "crud",
380
+ priority: i < 2 ? "high" : "low",
381
+ }));
382
+ const analysisWithScenarios = minimalAnalysis({
383
+ businessContext: {
384
+ mainPurpose: "Test API",
385
+ userFlows: [],
386
+ dataFlows: [],
387
+ integrationPatterns: [],
388
+ draftedScenarios: scenariosForOverride,
389
+ },
390
+ });
391
+ it("uses MAX_TESTS_TO_GENERATE as default when maxGenerateOverride is undefined", () => {
392
+ const prompt = buildRecommendationPrompt(analysisWithScenarios, "current_branch_diff", 10);
393
+ expect(prompt).toContain(`Budget: ${MAX_TESTS_TO_GENERATE} generate`);
394
+ });
395
+ it("respects maxGenerateOverride when provided", () => {
396
+ const prompt = buildRecommendationPrompt(analysisWithScenarios, "current_branch_diff", 10, undefined, undefined, undefined, 5);
397
+ expect(prompt).toContain("Budget: 5 generate");
398
+ expect(prompt).toContain("additional = 10 total");
399
+ });
400
+ it("clamps maxGenerateOverride to topN when override exceeds topN", () => {
401
+ const prompt = buildRecommendationPrompt(analysisWithScenarios, "current_branch_diff", 4, undefined, undefined, undefined, 10);
402
+ expect(prompt).toContain("Budget: 4 generate");
403
+ });
404
+ it("clamps maxGenerateOverride to 0 when negative", () => {
405
+ const prompt = buildRecommendationPrompt(analysisWithScenarios, "current_branch_diff", 10, undefined, undefined, undefined, -5);
406
+ expect(prompt).toContain("Budget: 0 generate");
407
+ });
408
+ it("allows maxGenerateOverride of 0 to produce no generate items", () => {
409
+ const prompt = buildRecommendationPrompt(analysisWithScenarios, "current_branch_diff", 10, undefined, undefined, undefined, 0);
410
+ expect(prompt).toContain("Budget: 0 generate");
411
+ expect(prompt).not.toContain("#1 — GENERATE");
412
+ });
413
+ it("uses topN as default maxGen in full_repo scope when maxGenerateOverride is undefined", () => {
414
+ const prompt = buildRecommendationPrompt(analysisWithScenarios, "full_repo", 6);
415
+ expect(prompt).toContain("Budget: 6 generate");
416
+ });
417
+ it("overrides full_repo default when maxGenerateOverride is provided", () => {
418
+ const prompt = buildRecommendationPrompt(analysisWithScenarios, "full_repo", 6, undefined, undefined, undefined, 2);
419
+ expect(prompt).toContain("Budget: 2 generate");
420
+ expect(prompt).toContain("additional = 6 total");
421
+ });
422
+ });
423
+ // ---------------------------------------------------------------------------
424
+ // Tests — Additional recommendation dedup (Fix 1) and E2E slot guard (Fix 2)
425
+ // ---------------------------------------------------------------------------
426
+ describe("buildRecommendationPrompt — additional recommendation dedup", () => {
427
+ function patchOrdersScenario(name, overrides = {}) {
428
+ return {
429
+ scenarioName: name,
430
+ description: `Test ${name}`,
431
+ category: "new_endpoint",
432
+ priority: "high",
433
+ steps: [
434
+ { order: 1, method: "POST", path: "/api/v1/products", description: "Create product", interactionType: "success", expectedStatusCode: 201 },
435
+ { order: 2, method: "POST", path: "/api/v1/orders", description: "Create order", interactionType: "success", expectedStatusCode: 201 },
436
+ { order: 3, method: "PATCH", path: "/api/v1/orders/{order_id}", description: "Patch order", interactionType: "success", expectedStatusCode: 200 },
437
+ ],
438
+ chainingKeys: ["id"],
439
+ requiresAuth: false,
440
+ estimatedComplexity: "complex",
441
+ testType: "integration",
442
+ ...overrides,
443
+ };
444
+ }
445
+ function analysisWithPatchScenarios(scenarios) {
446
+ return minimalAnalysis({
447
+ businessContext: {
448
+ mainPurpose: "Order API",
449
+ userFlows: [],
450
+ dataFlows: [],
451
+ integrationPatterns: [],
452
+ draftedScenarios: scenarios,
453
+ },
454
+ branchDiffContext: {
455
+ baseBranch: "main",
456
+ currentBranch: "feature/patch-orders",
457
+ changedFiles: ["backend/src/api/orders.py", "src/frontend/components/OrderDetail.tsx"],
458
+ newEndpoints: [{ path: "/api/v1/orders/{order_id}", methods: [{ method: "PATCH", sourceFile: "routes.py", interactionCount: 0 }] }],
459
+ modifiedEndpoints: [],
460
+ affectedServices: [],
461
+ },
462
+ apiEndpoints: {
463
+ totalCount: 3,
464
+ baseUrl: "http://localhost:8000",
465
+ endpoints: [
466
+ { path: "/api/v1/products", resourceGroup: "products", pathParams: [], methods: [{ method: "POST", description: "Create product", queryParams: [], authRequired: false, sourceFile: "routes.py", interactions: [] }] },
467
+ { path: "/api/v1/orders", resourceGroup: "orders", pathParams: [], methods: [{ method: "POST", description: "Create order", queryParams: [], authRequired: false, sourceFile: "routes.py", interactions: [] }] },
468
+ { path: "/api/v1/orders/{order_id}", resourceGroup: "orders", pathParams: [{ name: "order_id", type: "string", required: true }], methods: [{ method: "PATCH", description: "Update order", queryParams: [], authRequired: false, sourceFile: "routes.py", interactions: [] }] },
469
+ ],
470
+ },
471
+ });
472
+ }
473
+ it("filters additional items that share resource and test type with GENERATE items", () => {
474
+ const scenarios = [
475
+ patchOrdersScenario("orders-patch-add-items-recalculate"),
476
+ patchOrdersScenario("orders-patch-new-endpoint-happy-path"),
477
+ patchOrdersScenario("orders-patch-items-cleanup-verification"),
478
+ patchOrdersScenario("orders-patch-discount-fixed"),
479
+ patchOrdersScenario("orders-patch-another-variant"),
480
+ ];
481
+ const analysis = analysisWithPatchScenarios(scenarios);
482
+ const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10, undefined, undefined, undefined, 2);
483
+ // First 2 become GENERATE, the remaining share orders::integration → should be filtered
484
+ const additionalMatches = prompt.match(/#\d+ \[ADDITIONAL\]/g) || [];
485
+ const ordersPatchAdditional = (prompt.match(/\[ADDITIONAL\].*orders-patch/g) || []);
486
+ // Same-resource same-type scenarios should NOT appear in ADDITIONAL
487
+ expect(ordersPatchAdditional.length).toBe(0);
488
+ });
489
+ it("preserves additional items with different test type for same endpoint", () => {
490
+ const scenarios = [
491
+ patchOrdersScenario("orders-patch-add-items-recalculate"),
492
+ patchOrdersScenario("orders-patch-new-endpoint-happy-path"),
493
+ // Contract test for same endpoint — different test type, should survive dedup
494
+ {
495
+ ...patchOrdersScenario("orders-patch-contract"),
496
+ testType: "contract",
497
+ steps: [{ order: 1, method: "PATCH", path: "/api/v1/orders/{order_id}", description: "Contract test", interactionType: "success", expectedStatusCode: 200 }],
498
+ },
499
+ ];
500
+ const analysis = analysisWithPatchScenarios(scenarios);
501
+ const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10, undefined, undefined, undefined, 2);
502
+ // Contract test targets orders but is a different type → should be in ADDITIONAL
503
+ expect(prompt).toContain("orders-patch-contract");
504
+ });
505
+ it("preserves additional items targeting a different resource", () => {
506
+ const scenarios = [
507
+ patchOrdersScenario("orders-patch-add-items-recalculate"),
508
+ patchOrdersScenario("orders-patch-new-endpoint-happy-path"),
509
+ // Different resource entirely
510
+ {
511
+ ...patchOrdersScenario("products-unique-constraint"),
512
+ steps: [
513
+ { order: 1, method: "POST", path: "/api/v1/products", description: "Create product", interactionType: "success", expectedStatusCode: 201 },
514
+ { order: 2, method: "POST", path: "/api/v1/products", description: "Create duplicate", interactionType: "error", expectedStatusCode: 409 },
515
+ ],
516
+ },
517
+ ];
518
+ const analysis = analysisWithPatchScenarios(scenarios);
519
+ const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10, undefined, undefined, undefined, 2);
520
+ expect(prompt).toContain("products-unique-constraint");
521
+ });
522
+ });
523
+ describe("buildRecommendationPrompt — E2E slot guard (Fix 2)", () => {
524
+ function uiScenario() {
525
+ return {
526
+ scenarioName: "ui-edit-order-crash",
527
+ description: "UI test for edit order crash",
528
+ category: "new_endpoint",
529
+ priority: "high",
530
+ steps: [
531
+ { order: 1, method: "GET", path: "/orders/{order_id}", description: "Navigate to order detail", interactionType: "success", expectedStatusCode: 200 },
532
+ ],
533
+ chainingKeys: [],
534
+ requiresAuth: false,
535
+ estimatedComplexity: "simple",
536
+ testType: "ui",
537
+ };
538
+ }
539
+ it("suppresses E2E additional slot when UI test is in GENERATE list", () => {
540
+ const scenarios = [
541
+ minimalScenario({ scenarioName: "integration-test-1", category: "new_endpoint" }),
542
+ uiScenario(),
543
+ ];
544
+ const analysis = minimalAnalysis({
545
+ businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: scenarios },
546
+ branchDiffContext: {
547
+ baseBranch: "main",
548
+ currentBranch: "feature/test",
549
+ changedFiles: ["backend/routes.py", "src/frontend/components/App.tsx"],
550
+ newEndpoints: [{ path: "/api/items/{id}", methods: [{ method: "PATCH", sourceFile: "routes.py", interactionCount: 0 }] }],
551
+ modifiedEndpoints: [],
552
+ affectedServices: [],
553
+ },
554
+ });
555
+ const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10, undefined, undefined, undefined, 3);
556
+ expect(prompt).not.toContain("e2e-flow-for-changed-feature");
557
+ });
558
+ it("includes E2E additional slot when no UI test is generated and no UI slot reserved", () => {
559
+ const scenarios = [
560
+ minimalScenario({ scenarioName: "integration-test-1", category: "new_endpoint" }),
561
+ minimalScenario({ scenarioName: "integration-test-2", category: "new_endpoint" }),
562
+ ];
563
+ const analysis = minimalAnalysis({
564
+ businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: scenarios },
565
+ branchDiffContext: {
566
+ baseBranch: "main",
567
+ currentBranch: "feature/test",
568
+ changedFiles: ["backend/routes.py", "src/frontend/components/App.tsx"],
569
+ newEndpoints: [{ path: "/api/items", methods: [{ method: "POST", sourceFile: "routes.py", interactionCount: 0 }] }],
570
+ modifiedEndpoints: [],
571
+ affectedServices: [],
572
+ },
573
+ });
574
+ // maxGen=1 so reserveUIGenSlot is false (requires maxGen > 1),
575
+ // and no UI scenario in GENERATE → E2E slot should appear
576
+ const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10, undefined, undefined, undefined, 1);
577
+ expect(prompt).toContain("e2e-flow-for-changed-feature");
578
+ });
579
+ });
580
+ // ---------------------------------------------------------------------------
581
+ // Tests — buildTestQualityCriteria contract-test guidance (regression guard)
582
+ // ---------------------------------------------------------------------------
583
+ describe("buildTestQualityCriteria — contract test guidance for error-handling", () => {
584
+ it("includes guidance to use contract tests for single-endpoint error-handling scenarios", () => {
585
+ const criteria = buildTestQualityCriteria();
586
+ expect(criteria).toContain("Contract tests");
587
+ expect(criteria).toContain("error-handling scenarios on a single");
588
+ expect(criteria).toContain("Do NOT add setup steps just to avoid hardcoding an ID");
589
+ });
590
+ it("instructs to use a hardcoded nonexistent ID to keep it a single-step test", () => {
591
+ const criteria = buildTestQualityCriteria();
592
+ expect(criteria).toContain("99999");
593
+ expect(criteria).toContain("single-step contract test");
594
+ });
595
+ it("is included in the recommendation prompt when scored scenarios exist", () => {
596
+ const analysis = minimalAnalysis({
597
+ businessContext: {
598
+ mainPurpose: "Test API",
599
+ userFlows: [],
600
+ dataFlows: [],
601
+ integrationPatterns: [],
602
+ draftedScenarios: [minimalScenario()],
603
+ },
604
+ });
605
+ const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
606
+ expect(prompt).toContain("Do NOT add setup steps just to avoid hardcoding an ID");
607
+ });
608
+ });