@skyramp/mcp 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/build/index.js +6 -5
  2. package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +150 -149
  3. package/build/prompts/personas.js +2 -1
  4. package/build/prompts/test-maintenance/drift-analysis-prompt.js +2 -1
  5. package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +28 -0
  6. package/build/prompts/test-recommendation/analysisOutputPrompt.js +72 -14
  7. package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -0
  8. package/build/prompts/test-recommendation/diffExecutionPlan.js +290 -0
  9. package/build/prompts/test-recommendation/fullRepoCatalog.js +271 -0
  10. package/build/prompts/test-recommendation/recommendationSections.js +4 -2
  11. package/build/prompts/test-recommendation/recommendationShared.js +68 -0
  12. package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +20 -4
  13. package/build/prompts/test-recommendation/test-recommendation-prompt.js +11 -640
  14. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +6 -6
  15. package/build/prompts/testbot/testbot-prompts.js +19 -7
  16. package/build/prompts/testbot/testbot-prompts.test.js +22 -5
  17. package/build/resources/analysisResources.js +1 -0
  18. package/build/services/ScenarioGenerationService.js +5 -1
  19. package/build/services/TestGenerationService.js +3 -0
  20. package/build/tools/code-refactor/codeReuseTool.js +3 -0
  21. package/build/tools/code-refactor/enhanceAssertionsTool.js +5 -1
  22. package/build/tools/code-refactor/modularizationTool.js +3 -0
  23. package/build/tools/generate-tests/generateBatchScenarioRestTool.js +123 -1
  24. package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +205 -9
  25. package/build/tools/generate-tests/generateContractRestTool.js +19 -19
  26. package/build/tools/generate-tests/generateIntegrationRestTool.js +9 -2
  27. package/build/tools/generate-tests/generateUIRestTool.js +23 -8
  28. package/build/tools/test-management/analyzeChangesTool.js +218 -2
  29. package/build/tools/test-management/analyzeChangesTool.test.js +233 -1
  30. package/build/tools/workspace/initializeWorkspaceTool.js +1 -1
  31. package/build/utils/docker.test.js +1 -1
  32. package/build/utils/featureFlags.js +7 -0
  33. package/build/utils/featureFlags.test.js +81 -0
  34. package/build/utils/gitStaging.js +18 -0
  35. package/build/utils/gitStaging.test.js +87 -0
  36. package/build/utils/httpDefaults.js +17 -0
  37. package/build/utils/httpDefaults.test.js +21 -0
  38. package/build/utils/scenarioDrafting.js +37 -15
  39. package/build/utils/scenarioDrafting.test.js +66 -0
  40. package/build/utils/telemetry.js +2 -1
  41. package/build/utils/utils.js +23 -0
  42. package/build/utils/versions.js +1 -1
  43. package/node_modules/playwright/lib/mcp/browser/context.js +2 -0
  44. package/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +2 -2
  45. package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +17 -26
  46. package/package.json +2 -2
@@ -1,11 +1,15 @@
1
1
  import * as crypto from "crypto";
2
2
  import { AnalysisScope, isDiff, } from "../../types/RepositoryAnalysis.js";
3
- import { WorkspaceAuthType, getDefaultAuthHeader, AUTH_MIDDLEWARE_PATTERNS_STR } from "../../utils/workspaceAuth.js";
3
+ import { WorkspaceAuthType, getDefaultAuthHeader } from "../../utils/workspaceAuth.js";
4
4
  import { logger } from "../../utils/logger.js";
5
- import { extractResourceFromPath } from "../../utils/routeParsers.js";
6
- import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildFewShotExamples, buildVerificationChecklist, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
7
- import { CATEGORY_PRIORITY, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
5
+ import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildFewShotExamples, buildVerificationChecklist, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, } from "./recommendationSections.js";
6
+ import { CATEGORY_PRIORITY } from "../../types/TestRecommendation.js";
8
7
  import { buildScopeAssessmentSection, isFrontendFile } from "./scopeAssessment.js";
8
+ import { buildExecutionPlan } from "./diffExecutionPlan.js";
9
+ import { buildFullRepoRecommendations } from "./fullRepoCatalog.js";
10
+ import { buildExternalCoverageSet, externalDedupKey, } from "./recommendationShared.js";
11
+ // Re-export for backward compatibility (tests and external callers import these from this module)
12
+ export { buildExternalCoverageSet, externalDedupKey };
9
13
  function formatTestLocations(locs) {
10
14
  const entries = Object.entries(locs || {});
11
15
  if (entries.length === 0)
@@ -51,640 +55,7 @@ function computeTiebreakerSeed(endpoints, diffFiles) {
51
55
  const canonical = [...endpoints].sort().join("|") + "::" + [...diffFiles].sort().join("|");
52
56
  return crypto.createHash("sha256").update(canonical).digest("hex").slice(0, 8);
53
57
  }
54
- // ── Helpers ──
55
- /** Resolve the primary step and inferred test type for a scenario. */
56
- function resolvePrimaryStep(scenario) {
57
- const testType = scenario.testType ?? (scenario.steps.length === 1 ? "contract" : "integration");
58
- const mutatingSteps = scenario.steps.filter(st => ["POST", "PUT", "PATCH", "DELETE"].includes(st.method));
59
- // Use the last mutating step — earlier steps are typically prerequisite setup
60
- // (e.g. POST /products before PATCH /orders), while the final mutation is the
61
- // primary action under test.
62
- const primaryStep = mutatingSteps[mutatingSteps.length - 1] ?? scenario.steps[scenario.steps.length - 1];
63
- return { primaryStep, testType };
64
- }
65
- function scenarioCoverageKey(scenario) {
66
- const { primaryStep, testType } = resolvePrimaryStep(scenario);
67
- const resource = extractResourceFromPath(primaryStep?.path ?? "");
68
- return `${resource}::${testType}`;
69
- }
70
- /**
71
- * Method-aware coverage key for external test dedup.
72
- * Unlike scenarioCoverageKey (resource::testType), this includes the HTTP method
73
- * so that e.g. an external test covering "GET /orders" doesn't block generating
74
- * a test for "PUT /orders" — a different operation on the same resource.
75
- */
76
- function externalDedupKey(scenario) {
77
- const { primaryStep, testType } = resolvePrimaryStep(scenario);
78
- const method = primaryStep?.method ?? "GET";
79
- const resource = extractResourceFromPath(primaryStep?.path ?? "");
80
- return `${method}::${resource}::${testType}`;
81
- }
82
- /**
83
- * Build a set of coverage keys from external (non-Skyramp) tests.
84
- * Parses `testLocations` entries tagged with `[external]` to extract the
85
- * method-aware `METHOD::resource::testType` keys they cover. This allows
86
- * programmatic filtering of scenarios that duplicate external test coverage
87
- * while preserving distinct operations on the same resource (for example,
88
- * `GET::orders::integration` vs `PUT::orders::integration`) — complementing
89
- * the prompt-level Step 0 dedup instructions with an algorithmic guarantee.
90
- *
91
- * Format of testLocations: Record<testType, "file1 [external] (covers: GET /api/v1/orders, POST /api/v1/orders), file2 (covers: ...)">
92
- */
93
- function buildExternalCoverageSet(testLocations) {
94
- const coverage = new Set();
95
- let externalWithoutCoverage = 0;
96
- for (const [testType, fileList] of Object.entries(testLocations)) {
97
- // Count external files with no covers clause — these fall back to prompt-level dedup only
98
- const externalCount = (fileList.match(/\[external\]/g) || []).length;
99
- const coveredCount = (fileList.match(/\[external\]\s*\(covers:/g) || []).length;
100
- externalWithoutCoverage += externalCount - coveredCount;
101
- // Match all "[external] (covers: ...)" segments in the file list string.
102
- // Each match captures the covers clause for one external test file.
103
- for (const m of fileList.matchAll(/\[external\]\s*\(covers:\s*([^)]+)\)/g)) {
104
- const endpoints = m[1].split(",").map(e => e.trim());
105
- for (const ep of endpoints) {
106
- // ep is "METHOD /path" e.g. "GET /api/v1/orders/{order_id}"
107
- const spaceIdx = ep.indexOf(" ");
108
- if (spaceIdx < 0)
109
- continue;
110
- const method = ep.slice(0, spaceIdx).toUpperCase();
111
- const epPath = ep.slice(spaceIdx + 1);
112
- const resource = extractResourceFromPath(epPath);
113
- if (resource !== "unknown") {
114
- // Method-aware key: "GET::orders::integration" — matches externalDedupKey() format.
115
- // When testType is "unknown" (heuristic failed), emit keys for both integration and
116
- // contract to avoid silent misses — conservative over-blocking is preferable.
117
- if (testType === "unknown") {
118
- coverage.add(`${method}::${resource}::integration`);
119
- coverage.add(`${method}::${resource}::contract`);
120
- }
121
- else {
122
- coverage.add(`${method}::${resource}::${testType}`);
123
- }
124
- }
125
- }
126
- }
127
- }
128
- if (externalWithoutCoverage > 0) {
129
- logger.info(`${externalWithoutCoverage} external test file(s) have no extractable endpoint coverage — ` +
130
- `programmatic dedup skipped for these; Step 0 semantic check is the fallback.`);
131
- }
132
- return coverage;
133
- }
134
58
  // ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
135
- function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject = false, isFrontendOnlyProject = false, externalCoverage = new Set()) {
136
- // Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
137
- const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
138
- const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
139
- const slotsFloor = Math.floor(topN / 2);
140
- const minE2ESlots = Math.min(rawE2E, slotsFloor);
141
- const minUISlots = Math.min(rawUI, Math.max(0, topN - minE2ESlots));
142
- const authRef = authHeaderValue
143
- ? `, authHeader: "${authHeaderValue}"${authSchemeSnippet}`
144
- : `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
145
- const hasWorkspaceAuthType = !!authTypeValue && authTypeValue !== "none";
146
- const scenarioAuthRef = authRef;
147
- const authHeaderOnlyRef = hasWorkspaceAuthType
148
- ? ""
149
- : authHeaderValue
150
- ? `, authHeader: "${authHeaderValue}"`
151
- : `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
152
- // Supplement count for full-repo mode
153
- const supplementCount = topN - Math.min(scored.length, topN);
154
- const toTitle = (name) => name.replace(/-/g, " ").replace(/\b\w/g, c => c.toUpperCase());
155
- const TYPE_ORDER = ["e2e", "ui", "integration", "contract"];
156
- const TYPE_LABEL = {
157
- e2e: "E2E", ui: "UI", integration: "Integration", contract: "Contract",
158
- };
159
- // Filter out scenarios already covered by external tests before slicing.
160
- const scoredFiltered = externalCoverage.size > 0
161
- ? scored.filter(item => {
162
- const key = externalDedupKey(item.scenario);
163
- if (externalCoverage.has(key)) {
164
- logger.info(`External dedup (full-repo): skipping "${item.scenario.scenarioName}" (${key})`);
165
- return false;
166
- }
167
- return true;
168
- })
169
- : scored;
170
- // For full-stack repos, carve out E2E and UI slots before filling with backend tests.
171
- const backendSlotCount = isFrontendProject
172
- ? Math.max(0, topN - minE2ESlots - minUISlots)
173
- : topN;
174
- const allItems = scoredFiltered.slice(0, backendSlotCount);
175
- const byType = new Map();
176
- for (const t of TYPE_ORDER)
177
- byType.set(t, []);
178
- for (const item of allItems) {
179
- const t = item.scenario.testType ?? (item.scenario.steps.length === 1 ? "contract" : "integration");
180
- if (!byType.has(t))
181
- byType.set(t, []);
182
- byType.get(t).push(item);
183
- }
184
- const renderItem = (item, rank) => {
185
- const s = item.scenario;
186
- const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
187
- const title = toTitle(s.scenarioName);
188
- if (testType === "contract") {
189
- const step = s.steps[0];
190
- const endpointURL = `${baseUrl}${step.path}`;
191
- const isBodyMethod = ["POST", "PUT", "PATCH"].includes(step.method);
192
- const dataParam = isBodyMethod
193
- ? `, requestData: <${step.method} ${step.path} required fields from source code>`
194
- : "";
195
- return [
196
- `**${rank}. ${title}**`,
197
- ` ${s.description}`,
198
- ` ${step.method} ${step.path} \u2192 ${step.expectedStatusCode}`,
199
- ` Tool: \`skyramp_contract_test_generation({ endpointURL: "${endpointURL}", method: "${step.method}"${authRef}${dataParam} })\``,
200
- ` From source: fill in requestData field names and the specific production boundary this validates`,
201
- ].join("\n");
202
- }
203
- else {
204
- const stepLines = s.steps.map(st => {
205
- const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
206
- const bodyHint = isBody ? ` \u2014 body: <${st.method} ${st.path} required fields from source>` : "";
207
- return ` ${st.order}. ${st.method} ${st.path} \u2192 ${st.expectedStatusCode}: ${st.description}${bodyHint}`;
208
- }).join("\n");
209
- const isTraceBased = testType === "e2e" || testType === "ui";
210
- let toolCallsBlock;
211
- if (isTraceBased) {
212
- // E2E and UI need browser recording first, then generation
213
- const frontendUrl = "<frontend_url>";
214
- const zipPath = `<repositoryPath>/.skyramp/${s.scenarioName}_trace.zip`;
215
- if (testType === "ui") {
216
- toolCallsBlock = [
217
- ` 1. browser_navigate({ url: "${frontendUrl}" })`,
218
- ` 2. Interact with the changed components (browser_click, browser_type, browser_fill_form, etc.)`,
219
- ` 3. browser_snapshot() after each key interaction`,
220
- ` 4. skyramp_export_zip({ outputPath: "${zipPath}" }) — use absolute path`,
221
- ` 5. skyramp_ui_test_generation({ playwrightInput: "${zipPath}"${authHeaderOnlyRef} })`,
222
- ].join("\n");
223
- }
224
- else {
225
- toolCallsBlock = [
226
- ` 1. browser_navigate({ url: "${frontendUrl}" }) — record frontend trace`,
227
- ` 2. Interact with the user journey described above`,
228
- ` 3. skyramp_export_zip({ outputPath: "${zipPath}" }) — use absolute path`,
229
- ` 4. Capture backend trace JSON separately (skyramp_start_trace_collection / skyramp_stop_trace_collection)`,
230
- ` 5. skyramp_e2e_test_generation({ playwrightInput: "${zipPath}", trace: "<backend trace path>"${authHeaderOnlyRef} })`,
231
- ].join("\n");
232
- }
233
- }
234
- else {
235
- // Integration: use batch scenario tool (all steps in one call)
236
- let destinationHost = s.scenarioName;
237
- try {
238
- destinationHost = new URL(baseUrl).hostname;
239
- }
240
- catch { /* keep fallback */ }
241
- const batchSteps = s.steps.map(st => {
242
- const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
243
- let dataParam = "";
244
- if (isBody) {
245
- if (st.requestBody && Object.keys(st.requestBody).length > 0) {
246
- const bodyJson = JSON.stringify(st.requestBody).replace(/"/g, '\\"');
247
- dataParam = `, requestBody: "${bodyJson}"`;
248
- }
249
- else {
250
- dataParam = `, requestBody: <${st.method} ${st.path} required fields from source code>`;
251
- }
252
- }
253
- return ` { method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${dataParam} }`;
254
- }).join(",\n");
255
- toolCallsBlock = [
256
- ` skyramp_batch_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${destinationHost}", baseURL: "${baseUrl}"${scenarioAuthRef}, steps: [\n${batchSteps}\n ] })`,
257
- ` skyramp_integration_test_generation({ scenarioFile: <filePath returned by skyramp_batch_scenario_test_generation above>${authHeaderOnlyRef} })`,
258
- ].join("\n");
259
- }
260
- return [
261
- `**${rank}. ${title}**`,
262
- ` ${s.description}`,
263
- ` Steps:`,
264
- stepLines,
265
- ` Tool calls:`,
266
- toolCallsBlock,
267
- ` From source: fill in requestBody field values and assert all computed response fields`,
268
- ].join("\n");
269
- }
270
- };
271
- const backendSections = TYPE_ORDER
272
- .filter(t => (byType.get(t) ?? []).length > 0)
273
- .map(t => {
274
- const items = byType.get(t);
275
- const label = TYPE_LABEL[t];
276
- let globalRank = 0;
277
- for (const prev of TYPE_ORDER) {
278
- if (prev === t)
279
- break;
280
- globalRank += (byType.get(prev) ?? []).length;
281
- }
282
- const entries = items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n");
283
- return `### ${label} (${items.length})\n\n${entries}`;
284
- });
285
- // Pre-allocate E2E and UI placeholder sections for full-stack repos.
286
- const e2eSectionParts = [];
287
- const uiSectionParts = [];
288
- if (isFrontendProject) {
289
- for (let i = 0; i < minE2ESlots; i++) {
290
- const rank = i + 1;
291
- e2eSectionParts.push(`**${rank}. E2E User Journey ${i + 1}**\n` +
292
- ` End-to-end test covering a complete user journey through the frontend and backend.\n` +
293
- ` To generate: record a browser trace, then call the generation tool.\n` +
294
- ` browser_navigate({ url: "${baseUrl}" }) \u2192 exercise key user flow \u2192 skyramp_export_zip({ outputPath: "<repo>/.skyramp/e2e_journey_${i + 1}.zip" })\n` +
295
- ` Tool: \`skyramp_e2e_test_generation({ playwrightInput: "<repo>/.skyramp/e2e_journey_${i + 1}.zip"${authHeaderOnlyRef} })\`\n` +
296
- ` From source: read frontend components and their API calls to identify the highest-value user journey`);
297
- }
298
- for (let i = 0; i < minUISlots; i++) {
299
- const rank = minE2ESlots + i + 1;
300
- uiSectionParts.push(`**${rank}. UI Component Test ${i + 1}**\n` +
301
- ` Test key UI component interactions and state changes.\n` +
302
- ` To generate: record a browser trace, then call the generation tool.\n` +
303
- ` browser_navigate({ url: "${baseUrl}" }) \u2192 interact with UI components \u2192 skyramp_export_zip({ outputPath: "<repo>/.skyramp/ui_component_${i + 1}.zip" })\n` +
304
- ` Tool: \`skyramp_ui_test_generation({ playwrightInput: "<repo>/.skyramp/ui_component_${i + 1}.zip"${authHeaderOnlyRef} })\`\n` +
305
- ` From source: read frontend component files to identify interactions, form submissions, and state transitions`);
306
- }
307
- // Offset backend section ranks by the number of E2E + UI placeholders
308
- const offset = minE2ESlots + minUISlots;
309
- backendSections.forEach((_, idx) => {
310
- const t = TYPE_ORDER.filter(t => (byType.get(t) ?? []).length > 0)[idx];
311
- if (!t)
312
- return;
313
- const items = byType.get(t);
314
- const label = TYPE_LABEL[t];
315
- let globalRank = offset;
316
- for (const prev of TYPE_ORDER) {
317
- if (prev === t)
318
- break;
319
- globalRank += (byType.get(prev) ?? []).length;
320
- }
321
- backendSections[idx] = `### ${label} (${items.length})\n\n${items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n")}`;
322
- });
323
- }
324
- const allSections = [
325
- ...(e2eSectionParts.length > 0 ? [`### E2E (${e2eSectionParts.length})\n\n${e2eSectionParts.join("\n\n")}`] : []),
326
- ...(uiSectionParts.length > 0 ? [`### UI (${uiSectionParts.length})\n\n${uiSectionParts.join("\n\n")}`] : []),
327
- ...backendSections,
328
- ];
329
- const sections = allSections.join("\n\n");
330
- const frontendTierNote = isFrontendOnlyProject
331
- ? `\n\n**Frontend repo:** supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
332
- : isFrontendProject
333
- ? `\n\n**Full-stack repo:** supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Add these before exhausting backend tiers.`
334
- : "";
335
- const repoSupplementNote = supplementCount > 0
336
- ? `
337
- <supplement_guidance>
338
- **When to use:** The pre-ranked sections above contain fewer than ${topN} items. Add exactly ${supplementCount} more using the tiers below — exhaust each tier before moving to the next.
339
-
340
- **Tier 1 — Error paths for endpoints already in the list** (highest value, do first):
341
- • Auth boundary (no Authorization header → 403/401) → \`testType: contract, category: security_boundary\`
342
- • Invalid/non-existent IDs (→ 404) → \`testType: contract, category: error_handling\`
343
- • Missing required fields (→ 422) → \`testType: contract, category: data_validation\`
344
- • Boundary values for numeric fields → \`testType: integration, category: data_validation\`
345
- Note: DISCARD unique-constraint scenarios if the storage backend is Redis, MongoDB, or schema-less.
346
-
347
- **Tier 2 — Auth coverage for any endpoint not yet covered by Tier 1:**
348
- → \`testType: contract, category: security_boundary\`
349
-
350
- **Tier 3 — Cross-resource integration** (only when one resource's POST body contains another's \`_id\` field):
351
- → \`testType: integration, category: workflow\`
352
-
353
- **Tier 4 — CRUD lifecycle** for any resource not yet covered:
354
- → \`testType: integration, category: crud\`
355
-
356
- **How to fill each item:** Use path parameters in \`{param}\` format. Use real field names from the analysis or handler source — no generic placeholders. Describe behavior in API terms (HTTP method, path, status code), not storage internals.${frontendTierNote}
357
- </supplement_guidance>`
358
- : "";
359
- const typeMixText = isFrontendOnlyProject
360
- ? `This is a frontend repo. Focus on E2E and UI tests only. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
361
- : isFrontendProject
362
- ? `This is a full-stack repo. Coverage ranking: E2E > UI > Integration > Contract. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`), in addition to backend integration and contract tests.`
363
- : `Focus on integration and contract tests for all API endpoints.`;
364
- return `## Test Recommendations — ${topN} total (grouped by test type)
365
-
366
- > Repo mode — no tests are executed. Ranked by risk within each type.
367
- > To generate any item: read the handler source, fill \`<…from source>\` placeholders with real values, then call the tool.
368
-
369
- ${sections}
370
-
371
- **Test type mix — MANDATORY. No smoke tests. No fuzz tests. Only: integration, contract, E2E, UI.**
372
- ${typeMixText}
373
-
374
- ${repoSupplementNote}
375
-
376
- **Present up to ${topN} recommendations.** Prioritize quality — only include a recommendation if it adds genuine new coverage. If fewer than ${topN} high-value tests exist for this codebase, stop at the last useful item rather than padding with trivial ones.
377
-
378
- ---
379
- <enrichment_notes>
380
- **Path resolution (do this before filling in any tool call):**
381
- Cross-check every endpoint path against the Router Mounting / Nesting section in the analysis above. Sub-routers may be mounted at nested prefixes — e.g. a reviews router with \`@router.get("/")\` may actually be \`GET /api/v1/products/{product_id}/reviews\` if mounted under that prefix. Always use the fully-qualified nested path in tool calls, not the path as it appears in the route file alone.
382
-
383
- **Existing test files (check before assigning output filenames):**
384
- See the Existing Tests section above. If a recommendation's primary resource already has a \`[skyramp]\` test file listed there, prefer passing an explicit \`output\` filename (e.g. \`output: "orders_integration_test.py"\`) to update the existing file rather than creating a duplicate. Do NOT update \`[external]\` test files — they are user-maintained.
385
-
386
- Before filling in tool call parameters for each item, use the analysis data already provided above (endpoint interactions, source context) first. Only read the route handler source code directly when the analysis data does not contain the specific value you need:
387
- - Required request body fields (POST/PUT/PATCH) — use field names from the analysis interactions; read source only if they show \`{}\` or are missing
388
- - Computed/derived response fields and their formulas — assert exact values; read source for formula details not captured in the analysis
389
- - Auth middleware — set authHeader/authScheme from the repository context above; FastAPI HTTPBearer → 403 not 401
390
- - Storage backend — if Redis or schema-less, discard unique-constraint and cascade-delete scenarios
391
- - Delete behavior — hard-delete → 204; soft-delete/cancel → 200
392
-
393
- ${buildTestQualityCriteria()}
394
-
395
- **5-dimension rubric — use to assign priority for supplement items:**
396
- | Dimension | What to assess |
397
- | Production Safety | Guards a critical boundary (auth, unique constraint, cascade delete, data integrity, breaking migration)? → HIGH |
398
- | Bug-Finding Potential | Targets a known failure mode (race condition, data consistency, state transition, cascade effect)? → HIGH |
399
- | User Journey Relevance | Reflects how real users interact (from traces, business flows, critical paths)? → HIGH or MEDIUM |
400
- | Coverage Gap | Addresses an area with zero existing test coverage? → bump up one tier |
401
- | Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
402
- </enrichment_notes>`;
403
- }
404
- function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, externalCoverage = new Set(), relevantExternalTestPaths = []) {
405
- const frontendUrl = "<frontend_url>";
406
- // Slot allocation:
407
- // - UI-only PR: all GENERATE slots are UI placeholders (no pre-ranked backend scenarios)
408
- // - Mixed PR: last GENERATE slot is a UI placeholder; remaining slots are backend
409
- // - Backend-only PR: all GENERATE slots are backend scenarios
410
- const backendGenerateCount = isUIOnlyPR
411
- ? 0
412
- : hasFrontendChanges
413
- ? Math.max(0, maxGen - 1)
414
- : maxGen;
415
- // Filter out scenarios whose primary method + resource + test type is already covered by external tests.
416
- // Method-aware: an external test covering GET /orders won't block PUT /orders scenarios.
417
- // This is the programmatic complement to the prompt-level Step 0 dedup instructions.
418
- const scoredAfterExternalDedup = externalCoverage.size > 0
419
- ? scored.filter(item => {
420
- const key = externalDedupKey(item.scenario);
421
- if (externalCoverage.has(key)) {
422
- logger.info(`External dedup: skipping "${item.scenario.scenarioName}" (${key}) — covered by external test`);
423
- return false;
424
- }
425
- return true;
426
- })
427
- : scored;
428
- const generateItems = scoredAfterExternalDedup.slice(0, Math.min(backendGenerateCount, scoredAfterExternalDedup.length));
429
- const rawAdditionalItems = scoredAfterExternalDedup.slice(backendGenerateCount, topN);
430
- // Filter additional items whose primary resource + test type already appear in GENERATE
431
- const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
432
- const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
433
- const hasWorkspaceAuthType = !!authTypeValue && authTypeValue !== "none";
434
- // For skyramp_integration_test_generation with scenarioFile:
435
- // - If workspace has authType set: omit auth entirely — workspace handles Bearer prefix.
436
- // - If no authType: pass authHeader only (no authScheme).
437
- const authHeaderOnlyRef = hasWorkspaceAuthType
438
- ? ""
439
- : authHeaderValue
440
- ? `, authHeader: "${authHeaderValue}"`
441
- : `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
442
- // UI-only: all GENERATE slots are UI test placeholders (one per changed component/flow)
443
- const uiGenerateBlocks = isUIOnlyPR
444
- ? Array.from({ length: maxGen }, (_, i) => {
445
- const rank = i + 1;
446
- const zipPath = `<repositoryPath>/.skyramp/ui_test_${rank}_trace.zip`;
447
- return hasTraces
448
- ? (`**#${rank} — GENERATE** | ui | workflow | new\n` +
449
- `Scenario: ui-test-from-trace-${rank} (rename from the actual changed component/flow)\n` +
450
- `Validates: UI interactions for a changed frontend component or flow.\n\n` +
451
- `**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\``)
452
- : (`**#${rank} — GENERATE** | ui | workflow | new\n` +
453
- `Scenario: ui-test-for-changed-component-${rank} (rename from the actual changed component/flow)\n` +
454
- `Validates: UI interactions for changed frontend component/flow ${rank}.\n\n` +
455
- `**Tool workflow:**\n` +
456
- ` 1. \`browser_navigate({ url: "${frontendUrl}" })\`\n` +
457
- ` 2. Interact with the changed component (read the diff to identify which component changed and what interactions it supports)\n` +
458
- ` 3. \`browser_snapshot()\` after each key interaction\n` +
459
- ` 4. \`skyramp_export_zip({ outputPath: "${zipPath}" })\` — absolute path\n` +
460
- ` 5. \`skyramp_ui_test_generation({ playwrightInput: "${zipPath}", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\`\n\n` +
461
- `Each item must target a distinct changed component or user flow.`);
462
- }).join("\n\n")
463
- : "";
464
- // Mixed PR: reserve the last GENERATE slot for a UI test for the changed frontend components.
465
- // Guard: skip when maxGen=0 (caller explicitly requested no generation)
466
- const uiRank = generateItems.length + 1;
467
- const uiPlaceholderBlock = (hasFrontendChanges && !isUIOnlyPR && maxGen > 0)
468
- ? hasTraces
469
- ? (`**#${uiRank} — GENERATE** | ui | workflow | new\n` +
470
- `Scenario: ui-test-for-changed-components (rename from the actual changed component/flow)\n` +
471
- `Validates: UI interactions for the changed frontend components in this PR.\n\n` +
472
- `**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\``)
473
- : (`**#${uiRank} — GENERATE** | ui | workflow | new\n` +
474
- `Scenario: ui-test-for-changed-components (rename from the actual changed component/flow)\n` +
475
- `Validates: UI interactions for the changed frontend components in this PR.\n\n` +
476
- `**Tool workflow:**\n` +
477
- ` 1. \`browser_navigate({ url: "${frontendUrl}" })\`\n` +
478
- ` 2. Interact with the changed component (read the diff to identify which component changed and what interactions it supports)\n` +
479
- ` 3. \`browser_snapshot()\` after each key interaction\n` +
480
- ` 4. \`skyramp_export_zip({ outputPath: "<repositoryPath>/.skyramp/ui_mixed_pr_trace.zip" })\` — absolute path\n` +
481
- ` 5. \`skyramp_ui_test_generation({ playwrightInput: "<repositoryPath>/.skyramp/ui_mixed_pr_trace.zip", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\`\n\n` +
482
- `Derive scenario name and steps from the actual changed frontend files.`)
483
- : "";
484
- const generateBlocks = generateItems.map((item, i) => {
485
- const rank = i + 1;
486
- const s = item.scenario;
487
- const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
488
- if (testType === "contract") {
489
- const step = s.steps[0];
490
- const endpointURL = `${baseUrl}${step.path}`;
491
- const isBodyMethod = ["POST", "PUT", "PATCH"].includes(step.method);
492
- const requestBodyData = step.requestBody && Object.keys(step.requestBody).length > 0
493
- ? `\n Request body: ${JSON.stringify(step.requestBody)} (pass as JSON string in tool call, NOT as object)`
494
- : (isBodyMethod ? `\n Request body: <derive from source code schemas>` : "");
495
- const authContext = authHeaderValue
496
- ? `\n authHeader: "${authHeaderValue}"${authSchemeSnippet}`
497
- : `\n authHeader: <resolve from workspace or OpenAPI securitySchemes>; authScheme: <if Authorization>`;
498
- return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | ${item.novelty}\n` +
499
- `${step.method} ${step.path} → ${step.expectedStatusCode}\n` +
500
- `Validates: ${s.description}\n\n` +
501
- `**Context for generation**:\n` +
502
- ` Endpoint URL: ${endpointURL}${requestBodyData}${authContext}\n\n` +
503
- `**Tool**: skyramp_contract_test_generation (see tool description for parameter structure)`);
504
- }
505
- else {
506
- // integration / e2e / ui — multi-step scenario pipeline
507
- const stepLines = s.steps.map((st) => {
508
- const chains = st.chainsFrom
509
- ? ` (chains: ${Array.isArray(st.chainsFrom)
510
- ? st.chainsFrom.map(c => `${c.sourceField} from step ${c.sourceStep}`).join(", ")
511
- : `${st.chainsFrom.sourceField} from step ${st.chainsFrom.sourceStep}`})`
512
- : "";
513
- const bodyHint = st.bodyMustInclude?.length
514
- ? ` [required fields: ${st.bodyMustInclude.join(", ")}]`
515
- : "";
516
- const responseHint = st.expectedResponseFields?.length
517
- ? ` [assert: ${st.expectedResponseFields.join(", ")}]`
518
- : "";
519
- const bodyData = st.requestBody && Object.keys(st.requestBody).length > 0
520
- ? ` [use requestBody: ${JSON.stringify(st.requestBody)} — pass as JSON string in tool call]`
521
- : "";
522
- return ` ${st.order}. ${st.method} ${st.path} → ${st.expectedStatusCode}: ${st.description}${chains}${bodyHint}${bodyData}${responseHint}`;
523
- }).join("\n");
524
- let destinationHost = "localhost";
525
- try {
526
- const parsed = new URL(baseUrl);
527
- destinationHost = parsed.hostname;
528
- }
529
- catch { /* use localhost as fallback */ }
530
- const authContext = authHeaderValue
531
- ? `authHeader: "${authHeaderValue}"${authSchemeSnippet}`
532
- : "authHeader: <resolve from workspace or OpenAPI securitySchemes>; authScheme: <if Authorization>";
533
- const prereqNote = s.category === "new_endpoint"
534
- ? `\n**Prerequisite discovery**: Check for FK fields (product_id, user_id, order_id) in the endpoint's request body. If found, prepend a step to create that prerequisite resource first, then chain its primary key field into the dependent step using template variable syntax. Check the actual field name from the response body (\`id\`, \`uuid\`, \`_id\`, etc.), response header (\`Location\`), or cookie — do not assume \`id\`.`
535
- : "";
536
- const bugLine = s.bugCatchingTarget
537
- ? `**Bug to catch**: ${s.bugCatchingTarget}\n`
538
- : "";
539
- const fromSource = s.source === "agent-enriched"
540
- ? "Auth: OpenAPI securitySchemes or auth middleware"
541
- : "Request/response shapes: source code schemas; Auth: OpenAPI securitySchemes or auth middleware";
542
- return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | ${item.novelty}\n` +
543
- `Scenario: ${s.scenarioName} (${s.steps.length} steps)\n` +
544
- bugLine +
545
- `${stepLines}\n\n` +
546
- `**Context for generation**:\n` +
547
- ` - Destination: ${destinationHost}\n` +
548
- ` - Base URL: ${baseUrl}\n` +
549
- ` - ${authContext}\n` +
550
- ` - From source: ${fromSource}\n\n` +
551
- `**Tool pipeline**:\n` +
552
- ` 1. skyramp_batch_scenario_test_generation (see tool description for parameter structure)\n` +
553
- ` 2. skyramp_integration_test_generation with returned scenarioFile${authHeaderOnlyRef ? ` and ${authHeaderOnlyRef.replace(/^,\s*/, '')}` : ""}\n` +
554
- ` **Note**: requestBody/responseBody must be JSON strings (e.g. "{\\"field\\":\\"value\\"}"), not objects.` +
555
- prereqNote);
556
- }
557
- }).join("\n\n");
558
- // Pre-ranked backend additional candidates — the LLM picks from these per its Budget Plan.
559
- const additionalLines = additionalItems.map((item, i) => {
560
- const rank = maxGen + i + 1;
561
- const s = item.scenario;
562
- const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
563
- const target = s.steps.length === 1
564
- ? `${s.steps[0].method} ${s.steps[0].path} → ${s.steps[0].expectedStatusCode}`
565
- : `Scenario: ${s.scenarioName} (${s.steps.map(st => `${st.method} ${st.path}`).join(" → ")})`;
566
- return `#${rank} [ADDITIONAL] | ${testType} | ${s.category} | ${item.novelty}\n ${target}\n Validates: ${s.description}`;
567
- }).join("\n\n");
568
- // UI/E2E guidance — the LLM adds as many as its Budget Plan calls for.
569
- // Note: if a UI test already occupies a GENERATE slot (uiPlaceholderBlock), that slot
570
- // satisfies the UI generate count — do not add it again in ADDITIONAL.
571
- const uiGuidance = !isUIOnlyPR ? `
572
- **UI/E2E tests (add per your Budget Plan):** If your Budget Plan requires UI/E2E items beyond what is already in your GENERATE list, append an [ADDITIONAL] entry for each. If a UI test already occupies a GENERATE slot above, that slot satisfies your UI/E2E generate count — do NOT add it again to ADDITIONAL. Tool workflow for each new item:
573
- - **E2E**: ${hasTraces ? "Use discovered trace/recording files with `skyramp_e2e_test_generation`." : "Add to additionalRecommendations with a note that both a backend API trace (`skyramp_start_trace_collection` / `skyramp_stop_trace_collection`) and a browser Playwright recording must be collected in a live environment first. Do NOT attempt `skyramp_e2e_test_generation` without both traces present."}
574
- - **UI**: ${hasTraces ? "Use an existing Playwright `.zip` trace with `skyramp_ui_test_generation`." : "Record a trace using `browser_navigate` + `browser_snapshot` + `skyramp_export_zip`, then call `skyramp_ui_test_generation({ playwrightInput: \"<zip_path>\", outputDir: \"<frontend testDirectory from workspace.yml>\" })`."}
575
- Derive scenario names and steps from the actual changed frontend files. If your Budget Plan calls for 0% UI/E2E, omit this entirely.` : "";
576
- const supplementNote = `\n**If your Budget Plan total exceeds the pre-ranked items listed above:** draft additional tests from source-code enrichment (Step 1). For each new or changed endpoint, identify boundary or variation scenarios — formula parameters, search/filter constraints, required field validation. Only after exhausting PR-specific scenarios, add generic patterns (auth boundary → 401, non-existent ID → 404). Do NOT supplement with tests whose endpoint + test type match a GENERATE item.`;
577
- // ── PR / branch-diff mode: execution plan ────────────────────────────────
578
- const externalTestFilesList = relevantExternalTestPaths.length > 0
579
- ? `**Read these external test files first** (paths are relative to the \`repositoryPath\` you passed to \`skyramp_analyze_changes\` — prepend it to get the absolute path). Determine exactly which HTTP methods + paths each one covers. This is the definitive source of truth for external coverage:\n${relevantExternalTestPaths.map(p => `- \`${p}\``).join("\n")}\n\n`
580
- : "";
581
- return `## Execution Plan
582
- Seed: ${seed} | Endpoints: ${endpointCount} | Max: ${maxGen} generate + up to ${Math.max(topN - maxGen, 0)} additional (your Budget Plan determines the exact count)
583
-
584
- ${buildScopeAssessmentSection(topN, maxGen, isUIOnlyPR)}
585
-
586
- **Step 0 — External test coverage verification (before executing anything)**
587
- ${externalTestFilesList}For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
588
- - **\`[external]\` tests**: If the endpoint is already covered by an \`[external]\` test of the same type → skip the resource entirely (do NOT create or update). Backfill from ADDITIONAL using the priority order below:
589
- 1. **BUG-CATCHING TESTS FIRST (CRITICAL)**: If source code analysis revealed a bug, logic error, or incorrect formula (e.g. discount math adding instead of subtracting, off-by-one errors, missing validation), CREATE A TEST THAT EXPOSES IT. The test SHOULD FAIL — that's the point. Document the bug. Example: if discount formula is wrong, test with discount=20% and assert correct math. If no bug found, skip to #2.
590
- 2. **PR-endpoint edge cases**: Look for integration test candidates covering error paths, boundary values, or alternative scenarios for the SAME endpoints changed in the PR diff. If no suitable candidate exists in ADDITIONAL, derive one from your source-code enrichment findings.
591
- 3. **Same-resource other scenarios**: Other HTTP methods or flows on the same resource group touched by the PR.
592
- 4. **Cross-resource workflows involving the PR endpoint**: Integration scenarios that include the PR's changed endpoint as one of the steps.
593
- 5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted.
594
- **Avoid backfilling with a test for a completely unrelated resource (e.g. \`POST /reviews\` when the PR only changes \`/orders\`) if any PR-endpoint edge-case integration test is feasible.**
595
- - **Contract tests (\`[skyramp]\`)**: If an existing \`[skyramp]\` contract test already covers that resource path → UPDATE the existing test file instead of creating a new one. A new test case is a new test even if the file already exists — count it toward \`newTestsCreated\`.
596
- - **Integration/scenario tests**: Always generate as a new file via the scenario pipeline, even if an existing integration test covers the same resource. A new multi-step scenario is a distinct test. Count it toward \`newTestsCreated\`.
597
- - **UI tests**: Always generate as a new file. Count toward \`newTestsCreated\`.
598
-
599
- **Step 1 — Source-Code Enrichment (before executing anything)**
600
- Read the source code for ALL changed files. Before generating each recommendation, quote the relevant source code in a <source_evidence> block — include the route handler signature, request body schema fields, response shape, and any computed field formulas. Use these quotes to derive tool call parameters. Look for:
601
- - **Auth middleware** — check for known signals (${AUTH_MIDDLEWARE_PATTERNS_STR}). If any match, override \`authHeader\` and \`authScheme\` even if workspace.yml says authType: none. **If no known signal matches but the diff shows security-adjacent code** (decorators like \`@requiresRole\`/\`@Protected\`, function names like \`validateToken\`/\`checkPermission\`/\`verifyHMAC\`, or imports from auth/security packages), read the relevant source file to determine the actual auth scheme before proceeding. Auth handling for \`skyramp_integration_test_generation\` with \`scenarioFile\` is covered in the Tool Workflows section below.
602
- - Business rules and formulas (e.g. total_cost = compute * rate + memory * rate)
603
- - State transitions and domain constraints (e.g. budget cannot drop below current spend)
604
- - Validation logic (field constraints, cross-field dependencies)
605
- - Security boundaries not covered by the structural candidates below
606
-
607
- For each one found, evaluate it against these 6 dimensions and assign priority:
608
- | Dimension | What to assess |
609
- | Production Safety | Guards a critical boundary (auth, unique constraint, cascade delete, data integrity, breaking migration)? → HIGH |
610
- | Bug-Finding Potential | Targets a known failure mode (race condition, data consistency, state transition, cascade effect)? → HIGH |
611
- | Mutation Side Effects | Does PUT/PATCH modify a collection of child items (line items, cart entries) and trigger recalculation (totals, counts, amounts)? → HIGH — this is the most common source of user-reported bugs |
612
- | User Journey Relevance | Reflects how real users interact (from traces, business flows, critical paths)? → HIGH or MEDIUM |
613
- | Coverage Gap | Addresses an area with zero existing test coverage? → bump up one tier |
614
- | Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
615
-
616
- Quality gate — ask all three questions:
617
- 1. "Would this test prevent a production incident?" → YES = HIGH priority regardless of other dimensions
618
- 2. "Does this test exercise a real workflow or catch a real bug?" → YES = at least MEDIUM
619
- 3. "Does this test cover a mutation that modifies child items and triggers total/amount recalculation?" → YES = HIGH priority, and prefer it for GENERATE over simple single-field update tests for the same endpoint
620
-
621
- Assign category: ${TEST_CATEGORIES.join(" | ")}
622
-
623
- ${buildTestPatternGuidelines()}
624
-
625
- INSERT a source-code-derived candidate into the ranked list **only if ALL three conditions are met**:
626
- 1. Priority is HIGH (it guards a critical boundary or would prevent a production incident)
627
- 2. It is specific to THIS codebase — derived from a concrete business rule, formula, or constraint found in the changed files (not a general pattern that applies to any API)
628
- 3. It is not already covered by a structural candidate in the list below
629
-
630
- If these conditions are not met, add it to ADDITIONAL only — do NOT displace a pre-ranked GENERATE item.
631
- **CRITICAL-tier items (category: new_endpoint) should never be displaced** — they test the actual endpoints introduced in this PR and must always occupy GENERATE slots.
632
-
633
- When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW; within the same priority, source-code-derived candidates go BEFORE structural ones. Re-number ranks after insertion. The top ${maxGen} ranked items become GENERATE candidates.
634
-
635
- **Source-code validation gates (apply during Step 1):**
636
- - **Cascade vs referential integrity**: If both a cascade-delete and a delete-blocked scenario appear for the same resource pair, keep only the one matching the source FK delete policy (ON DELETE CASCADE / cascade=True / onDelete: 'CASCADE' → keep cascade-delete; RESTRICT/PROTECT/no annotation → keep delete-blocked). Remove the inapplicable variant.
637
- - **Unique constraints**: Unique-constraint scenarios (duplicate POST → 409) are pre-drafted for all resources. Confirm enforcement before keeping: SQL UNIQUE index, Mongoose unique: true, Prisma @unique, or explicit duplicate-check code. If the backend is Redis, schema-less, or has no explicit constraint in the changed files, move to ADDITIONAL with a note — do NOT generate.
638
-
639
- **Step 2 — Diversity check (using enriched knowledge from Step 1)**
640
- Each GENERATE item must exercise a **distinct code path** — not just different input values on the same path.
641
-
642
- For each pair of GENERATE items, ask: same HTTP method + path + step sequence + expected status? → DUPLICATE. Keep the richer item; replace the other with a test from a different path below. Move the displaced item to ADDITIONAL.
643
-
644
- **Good diversity — aim for this mix across GENERATE slots:**
645
- - **Happy-path**: create prerequisites → call the new endpoint → verify computed fields and child collections
646
- - **Error-path**: trigger a distinct error status (404 for non-existent resource, 422 for invalid input, 400 for malformed request — whichever the source code handles)
647
- - **State-variation**: same endpoint, different logic branch (empty array, remove instead of add, boundary value that triggers a guard)
648
-
649
- Same step sequence with only payload differences (e.g. 10% vs 5% discount both returning 200) = same code path = duplicate. Different scenario names do not make duplicate tests distinct.
650
-
651
- **Step 3 — Execute merged plan in rank order**
652
- Replace any scenario that pairs unrelated resources with one reflecting actual FK relationships in the codebase.
653
- Use the field names and values from the \`<source_evidence>\` blocks you quoted in Step 1 to fill all tool call parameters. Prefer reusing Step 1 evidence when it already resolves a placeholder, but if a placeholder cannot be replaced with concrete values from files already read, you may read the specific schema, model, or handler file needed to resolve it. Assert response field values, not just status codes.
654
-
655
- ${buildTestQualityCriteria()}
656
-
657
- ${buildGenerationRules(isUIOnlyPR)}
658
-
659
- **ADDITIONAL recommendations** are submitted via \`skyramp_submit_report\`. Refer to its schema for required fields. Only include recommendations that add distinct coverage beyond what was generated.
660
-
661
- **Never mark a recommendation "blocked":** No OpenAPI spec → use source code for shapes. No traces → provide \`skyramp_start_trace_collection\` instructions. No backend trace → use the scenario pipeline.
662
-
663
- **Critical-category minimum:** At least ${Math.min(MAX_CRITICAL_TESTS, maxGen)} of the ${maxGen} GENERATE items should be from HIGH-priority categories (security_boundary, business_rule, data_integrity, breaking_change). The pre-ranked plan below already prioritises this — only override if source-code enrichment reveals a higher-value candidate.
664
-
665
- ### GENERATE (process these EXACTLY as listed, in order — after completing Steps 0–2 above; if Step 0 converts an item to UPDATE, backfill the ADD slot from ADDITIONAL following the priority order in Step 0)
666
-
667
- ${isUIOnlyPR
668
- ? (uiGenerateBlocks || " (no UI generate items — derive scenarios from changed frontend files)")
669
- : ([generateBlocks, uiPlaceholderBlock].filter(Boolean).join("\n\n") || " (no pre-ranked generate items — draft your own based on endpoint analysis)")}
670
-
671
- **COMPLIANCE CHECK**: Before proceeding, verify your generate list matches the items above. If you plan to generate a scenario with a different name than what is listed (e.g. you want to generate "order-update-discount-calculation" but the plan says "orders-patch-add-items-recalculate"), STOP — use the plan's scenario name and steps. Add your alternative to ADDITIONAL instead. One retry on failure then skip to next item.
672
-
673
- ### ADDITIONAL (list in additionalRecommendations in this order after Step 1 insertion)
674
-
675
- ${additionalLines || " (none pre-ranked)"}
676
- ${uiGuidance}
677
- ${supplementNote}
678
-
679
- **Honor your Budget Plan: produce exactly the total you committed to (GENERATE + ADDITIONAL). No fewer, no padding with low-value tests.**
680
-
681
- ## Recommendation Stability
682
- - **Carry forward** previous additionalRecommendations that still apply — match by scenarioName (multi-step) or endpoint (single-endpoint). Re-derive category and priority from test content.
683
- - **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
684
- - **Only add** new recommendations for code paths introduced since the last run.`;
685
- }
686
- // Exported for testing — these are the core dedup primitives.
687
- export { buildExternalCoverageSet, externalDedupKey };
688
59
  export function buildRecommendationPrompt(analysis, analysisScope = AnalysisScope.FullRepo, topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType, workspaceAuthScheme, maxGenerateOverride, sessionId) {
689
60
  const isDiffScope = isDiff(analysisScope);
690
61
  const diffContext = analysis.branchDiffContext;
@@ -753,7 +124,7 @@ Output should be concise and immediately actionable.`
753
124
  changedLines.push(` ${m.method} ${ep.path} [removed]`);
754
125
  }
755
126
  }
756
- endpointLines = `**Changed in this PR:**\n${changedLines.join("\n") || " none"}\n\n**Other endpoints (reference only — do not prioritize for testing):**\n${otherLines.join("\n") || " none"}`;
127
+ endpointLines = `**Likely changed in this PR (from static file→endpoint mapping — verify against diff in Step 2):**\n${changedLines.join("\n") || " none"}\n\n**Other endpoints (reference only):**\n${otherLines.join("\n") || " none"}`;
757
128
  }
758
129
  else {
759
130
  endpointLines = allEndpoints
@@ -826,7 +197,7 @@ Framework: ${analysis.projectClassification.primaryFramework} (${analysis.projec
826
197
  Project type: ${analysis.projectClassification.projectType}
827
198
  Auth: ${authMethod} (header: ${authHeaderValue}${authTypeValue ? `, type: ${authTypeValue}` : ""})
828
199
  Base URL: ${analysis.apiEndpoints.baseUrl}
829
- Endpoints (${analysis.apiEndpoints.totalCount}):
200
+ Candidate endpoints from static scan — unverified, confirm paths against spec or source before use (${analysis.apiEndpoints.totalCount}):
830
201
  ${endpointLines}${testFingerprint}
831
202
  `.trim();
832
203
  // ── Branch diff ──
@@ -847,7 +218,7 @@ Affected services: ${diffContext.affectedServices.join(", ") || "N/A"}
847
218
 
848
219
  Focus on tests that validate these changes and how they interact with existing resources.
849
220
  For removed endpoints: verify they now return 404 or the appropriate deprecation status code.
850
- Allocate your test budget to endpoints listed under "Changed in this PR". Use other endpoints only as setup steps (e.g. creating a resource before testing its deletion).
221
+ Allocate your test budget to endpoints listed under "Likely changed in this PR". Use other endpoints only as setup steps (e.g. creating a resource before testing its deletion).
851
222
  `;
852
223
  }
853
224
  // ── Interactions ──