@skyramp/mcp 0.0.64-rc.9 → 0.0.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +2 -0
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -7
- package/build/prompts/test-maintenance/driftAnalysisSections.js +96 -34
- package/build/prompts/test-maintenance/enhanceAssertionSection.js +99 -0
- package/build/prompts/test-recommendation/recommendationSections.js +24 -9
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +103 -40
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +239 -2
- package/build/prompts/testbot/testbot-prompts.js +182 -125
- package/build/services/TestDiscoveryService.js +23 -0
- package/build/services/TestExecutionService.js +1 -1
- package/build/services/TestGenerationService.js +84 -12
- package/build/services/TestGenerationService.test.js +111 -2
- package/build/tool-phase-coverage.test.js +8 -2
- package/build/tool-phases.js +11 -13
- package/build/tools/generate-tests/generateBatchScenarioRestTool.js +203 -0
- package/build/tools/generate-tests/generateContractRestTool.js +3 -73
- package/build/tools/generate-tests/generateIntegrationRestTool.js +11 -61
- package/build/tools/generate-tests/generateMockRestTool.js +1 -0
- package/build/tools/submitReportTool.js +14 -5
- package/build/tools/submitReportTool.test.js +1 -1
- package/build/tools/test-management/analyzeChangesTool.js +14 -4
- package/build/types/RepositoryAnalysis.js +3 -12
- package/build/types/TestRecommendation.js +43 -1
- package/build/types/TestTypes.js +4 -0
- package/build/utils/scenarioDrafting.js +121 -11
- package/build/utils/scenarioDrafting.test.js +266 -3
- package/node_modules/playwright/ThirdPartyNotices.txt +679 -3093
- package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +117 -11
- package/package.json +2 -2
- package/build/tools/test-recommendation/recommendTestsTool.js +0 -274
|
@@ -1,25 +1,20 @@
|
|
|
1
1
|
import * as crypto from "crypto";
|
|
2
2
|
import { WorkspaceAuthType } from "../../utils/workspaceAuth.js";
|
|
3
3
|
import { buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildTestExamples, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
|
|
4
|
+
import { CATEGORY_PRIORITY, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
|
|
4
5
|
function formatTestLocations(locs) {
|
|
5
6
|
const entries = Object.entries(locs || {});
|
|
6
7
|
if (entries.length === 0)
|
|
7
8
|
return "";
|
|
8
|
-
return "\n**Existing test files
|
|
9
|
+
return "\n**Existing test files — cross-check these before creating new files:**\n" +
|
|
10
|
+
" If a GENERATE item's resource path matches a path listed here, UPDATE that file instead of creating a new one.\n" +
|
|
9
11
|
entries.map(([type, files]) => " - [" + type + "] " + files).join("\n");
|
|
10
12
|
}
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
breaking_change: "HIGH",
|
|
17
|
-
auth: "HIGH",
|
|
18
|
-
workflow: "MEDIUM",
|
|
19
|
-
"error-handling": "MEDIUM",
|
|
20
|
-
"data-validation": "MEDIUM",
|
|
21
|
-
crud: "LOW",
|
|
22
|
-
};
|
|
13
|
+
// ── Priority-tier ordering (replaces numeric CATEGORY_WEIGHTS) ──
|
|
14
|
+
// Categories map to HIGH / MEDIUM / LOW tiers.
|
|
15
|
+
// Within a tier, novelty (new > modified > existing) breaks ties,
|
|
16
|
+
// then cross-resource, step count, and finally the deterministic SHA-256 seed.
|
|
17
|
+
// CATEGORY_PRIORITY and PriorityTier imported from ../../types/TestRecommendation.js
|
|
23
18
|
const PRIORITY_ORDER = { CRITICAL: 4, HIGH: 3, MEDIUM: 2, LOW: 1 };
|
|
24
19
|
const NOVELTY_ORDER = { new: 3, modified: 2, existing: 1 };
|
|
25
20
|
function classifyNovelty(scenario, diffContext) {
|
|
@@ -43,10 +38,35 @@ function computeTiebreakerSeed(endpoints, diffFiles) {
|
|
|
43
38
|
const canonical = [...endpoints].sort().join("|") + "::" + [...diffFiles].sort().join("|");
|
|
44
39
|
return crypto.createHash("sha256").update(canonical).digest("hex").slice(0, 8);
|
|
45
40
|
}
|
|
41
|
+
// ── Helpers ──
|
|
42
|
+
const SKIP_SEGMENTS_SET = new Set(["api", "v1", "v2", "v3", "public"]);
|
|
43
|
+
function extractResourceFromPath(path) {
|
|
44
|
+
const segments = path.split("/").filter(Boolean);
|
|
45
|
+
const nonParam = segments.filter(s => !s.startsWith("{") && !SKIP_SEGMENTS_SET.has(s));
|
|
46
|
+
return nonParam[nonParam.length - 1] || "unknown";
|
|
47
|
+
}
|
|
48
|
+
function scenarioCoverageKey(scenario) {
|
|
49
|
+
const testType = scenario.testType ?? (scenario.steps.length === 1 ? "contract" : "integration");
|
|
50
|
+
const mutatingSteps = scenario.steps.filter(st => ["POST", "PUT", "PATCH", "DELETE"].includes(st.method));
|
|
51
|
+
// Use the last mutating step — earlier steps are typically prerequisite setup
|
|
52
|
+
// (e.g. POST /products before PATCH /orders), while the final mutation is the
|
|
53
|
+
// primary action under test.
|
|
54
|
+
const primaryStep = mutatingSteps[mutatingSteps.length - 1] ?? scenario.steps[scenario.steps.length - 1];
|
|
55
|
+
const resource = extractResourceFromPath(primaryStep?.path ?? "");
|
|
56
|
+
return `${resource}::${testType}`;
|
|
57
|
+
}
|
|
46
58
|
// ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
|
|
47
59
|
function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
|
|
48
|
-
|
|
49
|
-
|
|
60
|
+
// For mixed PRs (frontend + backend), reserve the last GENERATE slot for a UI test
|
|
61
|
+
// so the agent has explicit room to record a browser trace and generate it.
|
|
62
|
+
const reserveUIGenSlot = hasFrontendChanges && !isUIOnlyPR && maxGen > 1;
|
|
63
|
+
const backendGenCount = reserveUIGenSlot ? maxGen - 1 : maxGen;
|
|
64
|
+
const backendBudget = reserveUIGenSlot ? Math.max(topN - 1, 0) : topN;
|
|
65
|
+
const generateItems = scored.slice(0, Math.min(backendGenCount, scored.length));
|
|
66
|
+
const rawAdditionalItems = scored.slice(backendGenCount, backendBudget);
|
|
67
|
+
// Filter additional items whose primary resource + test type already appear in GENERATE
|
|
68
|
+
const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
|
|
69
|
+
const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
|
|
50
70
|
const authRef = authHeaderValue
|
|
51
71
|
? `, authHeader: "${authHeaderValue}"${authSchemeSnippet}`
|
|
52
72
|
: `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
|
|
@@ -87,32 +107,56 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
87
107
|
? st.chainsFrom.map(c => `${c.sourceField} from step ${c.sourceStep}`).join(", ")
|
|
88
108
|
: `${st.chainsFrom.sourceField} from step ${st.chainsFrom.sourceStep}`})`
|
|
89
109
|
: "";
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
const
|
|
94
|
-
|
|
95
|
-
? `, requestBody: <${st.method} ${st.path} body from source code schemas>`
|
|
110
|
+
const bodyHint = st.bodyMustInclude?.length
|
|
111
|
+
? ` [body MUST include: ${st.bodyMustInclude.join(", ")}]`
|
|
112
|
+
: "";
|
|
113
|
+
const responseHint = st.expectedResponseFields?.length
|
|
114
|
+
? ` [assert response fields: ${st.expectedResponseFields.join(", ")}]`
|
|
96
115
|
: "";
|
|
97
|
-
return `
|
|
116
|
+
return ` ${st.order}. ${st.method} ${st.path} → ${st.expectedStatusCode}: ${st.description}${chains}${bodyHint}${responseHint}`;
|
|
98
117
|
}).join("\n");
|
|
118
|
+
const batchSteps = s.steps.map((st) => {
|
|
119
|
+
const isBodyMethod = ["POST", "PUT", "PATCH"].includes(st.method);
|
|
120
|
+
let dataParam = "";
|
|
121
|
+
if (isBodyMethod) {
|
|
122
|
+
if (st.bodyMustInclude && st.bodyMustInclude.length > 0) {
|
|
123
|
+
const fields = st.bodyMustInclude.join(", ");
|
|
124
|
+
dataParam = `, requestBody: <${st.method} ${st.path} body from source code — MUST include child collection fields: [${fields}]. Chain FK fields (e.g. product_id) from prior POST response IDs. Do NOT omit the collection array or send only metadata/discount fields.>`;
|
|
125
|
+
}
|
|
126
|
+
else {
|
|
127
|
+
dataParam = `, requestBody: <${st.method} ${st.path} body from source code schemas>`;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return ` { method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${dataParam} }`;
|
|
131
|
+
}).join(",\n");
|
|
132
|
+
let destinationHost = s.scenarioName;
|
|
133
|
+
try {
|
|
134
|
+
const parsed = new URL(baseUrl);
|
|
135
|
+
destinationHost = parsed.hostname;
|
|
136
|
+
}
|
|
137
|
+
catch { /* use scenarioName as fallback */ }
|
|
138
|
+
const toolCalls = ` skyramp_batch_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${destinationHost}", baseURL: "${baseUrl}"${scenarioAuthRef}, steps: [\n${batchSteps}\n ] })`;
|
|
99
139
|
const prereqNote = s.category === "new_endpoint"
|
|
100
|
-
? `\nPrerequisite discovery (MANDATORY for new_endpoint): Before executing these tool calls, read the source code for the new endpoint's request body. Look for FK fields (e.g. \`product_id\`, \`user_id\`, \`order_id\`). For each FK field found, prepend
|
|
140
|
+
? `\nPrerequisite discovery (MANDATORY for new_endpoint): Before executing these tool calls, read the source code for the new endpoint's request body. Look for FK fields (e.g. \`product_id\`, \`user_id\`, \`order_id\`). For each FK field found, prepend a step to the \`steps\` array in \`skyramp_batch_scenario_test_generation\` to create that prerequisite resource first, then chain its \`id\` into the dependent step. If no FK fields exist, proceed with the steps above as-is.`
|
|
101
141
|
: "";
|
|
102
142
|
return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | priority=${item.priority} | ${item.novelty}\n` +
|
|
103
143
|
`Scenario: ${s.scenarioName} (${s.steps.length} steps)\n` +
|
|
104
144
|
`${stepLines}\n` +
|
|
105
145
|
`Tool calls:\n` +
|
|
106
146
|
`${toolCalls}\n` +
|
|
107
|
-
` skyramp_integration_test_generation({ scenarioFile:
|
|
147
|
+
` skyramp_integration_test_generation({ scenarioFile: <use the filePath returned by skyramp_batch_scenario_test_generation above>${authHeaderOnlyRef} })\n` +
|
|
108
148
|
`From source: requestBody shapes for POST/PUT/PATCH steps; responseBody shapes; authScheme` +
|
|
109
149
|
prereqNote);
|
|
110
150
|
}
|
|
111
151
|
}).join("\n\n");
|
|
112
|
-
// For mixed PRs,
|
|
113
|
-
//
|
|
114
|
-
const
|
|
115
|
-
|
|
152
|
+
// For mixed PRs, reserve slots for UI/E2E additional recommendations — but skip
|
|
153
|
+
// if the GENERATE list already includes a UI/E2E test for the changed frontend flows.
|
|
154
|
+
const hasGeneratedFrontendTest = generateItems.some(item => {
|
|
155
|
+
const tt = item.scenario.testType ?? (item.scenario.steps.length === 1 ? "contract" : "integration");
|
|
156
|
+
return tt === "ui" || tt === "e2e";
|
|
157
|
+
}) || reserveUIGenSlot;
|
|
158
|
+
const needsE2ESlot = hasFrontendChanges && !isUIOnlyPR && !hasGeneratedFrontendTest;
|
|
159
|
+
const needsUISlot = hasFrontendChanges && !isUIOnlyPR && !reserveUIGenSlot && !hasGeneratedFrontendTest;
|
|
116
160
|
const frontendSlots = (needsE2ESlot ? 1 : 0) + (needsUISlot ? 1 : 0);
|
|
117
161
|
const backendAdditionalItems = frontendSlots > 0
|
|
118
162
|
? additionalItems.slice(0, Math.max(additionalItems.length - frontendSlots, 0))
|
|
@@ -141,12 +185,19 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
141
185
|
: "No traces exist yet — record a backend trace via `skyramp_start_trace_collection` + `skyramp_stop_trace_collection` and a UI trace via Playwright browser tools, then call `skyramp_e2e_test_generation`.";
|
|
142
186
|
return `\n\n#${rank} [ADDITIONAL] | E2E | workflow | priority=HIGH | new\n Scenario: e2e-flow-for-changed-feature (frontend + backend files changed in this diff)\n Validates: Full browser-level flow for the changed UI components end-to-end — derive the scenario name and steps from the actual changed frontend files. ${traceNote}`;
|
|
143
187
|
})() : "";
|
|
144
|
-
const
|
|
188
|
+
const reservedUIGenCount = reserveUIGenSlot ? 1 : 0;
|
|
189
|
+
const supplementCount = topN - generateItems.length - reservedUIGenCount - backendAdditionalItems.length - frontendSlots;
|
|
145
190
|
const supplementNote = supplementCount > 0
|
|
146
|
-
? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them from endpoint interactions and source code patterns not yet covered. Use the same
|
|
191
|
+
? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them from endpoint interactions and source code patterns not yet covered. Use the same 6-dimension rubric and quality gate to assign priority (HIGH/MEDIUM/LOW), testType, and category.${hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT produce fewer than ${topN} total. Do NOT supplement with tests whose primary endpoint and test type match a GENERATE item — those flows are already covered.`
|
|
147
192
|
: "";
|
|
148
193
|
return `## Execution Plan
|
|
149
|
-
Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length} generate + ${Math.max(topN - generateItems.length, 0)} additional = ${topN} total
|
|
194
|
+
Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length + (reserveUIGenSlot ? 1 : 0)} generate + ${Math.max(topN - generateItems.length - (reserveUIGenSlot ? 1 : 0), 0)} additional = ${topN} total
|
|
195
|
+
|
|
196
|
+
**Step 0 — Existing-test cross-check (MANDATORY before executing anything)**
|
|
197
|
+
For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
|
|
198
|
+
- **Contract tests**: If an existing contract test already covers that resource path → UPDATE the existing file instead of creating a new one. This does NOT count toward \`newTestsCreated\` — backfill from ADDITIONAL candidates to fill the open ADD slot.
|
|
199
|
+
- **Integration/scenario tests**: Always generate as a new file via the scenario pipeline, even if an existing integration test covers the same resource. A new multi-step scenario is a distinct test. Count it toward \`newTestsCreated\`.
|
|
200
|
+
- **UI tests**: Always generate as a new file. Count toward \`newTestsCreated\`.
|
|
150
201
|
|
|
151
202
|
**Step 1 — Source-Code Enrichment (MANDATORY before executing anything)**
|
|
152
203
|
Read the source code for ALL changed files. Look for:
|
|
@@ -156,19 +207,21 @@ Read the source code for ALL changed files. Look for:
|
|
|
156
207
|
- Validation logic (field constraints, cross-field dependencies)
|
|
157
208
|
- Security boundaries not covered by the structural candidates below
|
|
158
209
|
|
|
159
|
-
For each one found, evaluate it against these
|
|
210
|
+
For each one found, evaluate it against these 6 dimensions and assign priority:
|
|
160
211
|
| Dimension | What to assess |
|
|
161
212
|
| Production Safety | Guards a critical boundary (auth, unique constraint, cascade delete, data integrity, breaking migration)? → HIGH |
|
|
162
213
|
| Bug-Finding Potential | Targets a known failure mode (race condition, data consistency, state transition, cascade effect)? → HIGH |
|
|
214
|
+
| Mutation Side Effects | Does PUT/PATCH modify a collection of child items (line items, cart entries) and trigger recalculation (totals, counts, amounts)? → HIGH — this is the most common source of user-reported bugs |
|
|
163
215
|
| User Journey Relevance | Reflects how real users interact (from traces, business flows, critical paths)? → HIGH or MEDIUM |
|
|
164
216
|
| Coverage Gap | Addresses an area with zero existing test coverage? → bump up one tier |
|
|
165
217
|
| Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
|
|
166
218
|
|
|
167
|
-
Quality gate — ask
|
|
219
|
+
Quality gate — ask all three questions:
|
|
168
220
|
1. "Would this test prevent a production incident?" → YES = HIGH priority regardless of other dimensions
|
|
169
221
|
2. "Does this test exercise a real workflow or catch a real bug?" → YES = at least MEDIUM
|
|
222
|
+
3. "Does this test cover a mutation that modifies child items and triggers total/amount recalculation?" → YES = HIGH priority, and prefer it for GENERATE over simple single-field update tests for the same endpoint
|
|
170
223
|
|
|
171
|
-
Assign category:
|
|
224
|
+
Assign category: ${TEST_CATEGORIES.join(" | ")}
|
|
172
225
|
|
|
173
226
|
${buildTestPatternGuidelines()}
|
|
174
227
|
|
|
@@ -180,8 +233,9 @@ INSERT a source-code-derived candidate into the ranked list **only if ALL three
|
|
|
180
233
|
3. It is not already covered by a structural candidate in the list below
|
|
181
234
|
|
|
182
235
|
If these conditions are not met, add it to ADDITIONAL only — do NOT displace a pre-ranked GENERATE item.
|
|
236
|
+
**CRITICAL-tier items (category: new_endpoint) can NEVER be displaced** — they test the actual endpoints introduced in this PR and must always occupy GENERATE slots.
|
|
183
237
|
|
|
184
|
-
When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW; within the same priority, source-code-derived candidates go BEFORE structural ones. Re-number ranks after insertion. The top ${
|
|
238
|
+
When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW; within the same priority, source-code-derived candidates go BEFORE structural ones. Re-number ranks after insertion. The top ${backendGenCount} become backend GENERATE items.${reserveUIGenSlot ? " The final GENERATE slot is reserved for a UI test and is not taken from this ranked list." : ""}
|
|
185
239
|
|
|
186
240
|
**Cascade vs referential integrity:** If both a \`cascade-delete\` and a \`delete-blocked\` scenario appear for the same resource pair, keep only the one that matches the source code's FK delete policy (e.g. \`ON DELETE CASCADE\`, \`cascade=True\`, or \`onDelete: 'CASCADE'\` → keep cascade-delete; \`RESTRICT\`/\`PROTECT\`/no cascade → keep delete-blocked). Remove the inapplicable variant before executing.
|
|
187
241
|
|
|
@@ -206,23 +260,31 @@ ${buildGenerationRules(isUIOnlyPR)}
|
|
|
206
260
|
|
|
207
261
|
**Critical-category minimum:** At least ${Math.min(MAX_CRITICAL_TESTS, maxGen)} of the ${maxGen} GENERATE items MUST be from HIGH-priority categories (security_boundary, business_rule, data_integrity, breaking_change). The pre-ranked plan below already prioritises this — only override if source-code enrichment reveals a higher-value candidate.
|
|
208
262
|
|
|
209
|
-
### GENERATE (
|
|
263
|
+
### GENERATE (process these EXACTLY as listed, in order — do NOT reorder or replace any item with a different scenario; if Step 0 converts an item to UPDATE, backfill the ADD slot from ADDITIONAL)
|
|
264
|
+
|
|
265
|
+
${generateBlocks || " (no pre-ranked generate items — draft your own based on endpoint analysis)"}${reserveUIGenSlot ? `
|
|
266
|
+
|
|
267
|
+
**#${generateItems.length + 1} — GENERATE** | UI | workflow | priority=HIGH | new
|
|
268
|
+
Scenario: ui-interaction-for-changed-components (frontend files changed in this diff)
|
|
269
|
+
Record a browser trace for the changed UI components, then generate a UI test.
|
|
270
|
+
Steps: browser_navigate → browser_snapshot → interact with changed components → browser_assert → skyramp_export_zip → skyramp_ui_test_generation
|
|
271
|
+
This slot is RESERVED — you MUST attempt a UI test here. Only skip if browser_navigate fails (app unreachable).` : ""}
|
|
210
272
|
|
|
211
|
-
|
|
273
|
+
**COMPLIANCE CHECK**: Before proceeding, verify your generate list matches the items above. If you plan to generate a scenario with a DIFFERENT name than what is listed (e.g. you want to generate "order-update-discount-calculation" but the plan says "orders-patch-add-items-recalculate"), STOP — use the plan's scenario name and steps. Add your alternative to ADDITIONAL instead. One retry on failure then skip to next item.
|
|
212
274
|
|
|
213
275
|
### ADDITIONAL (list in additionalRecommendations in this order after Step 1 insertion)
|
|
214
276
|
|
|
215
277
|
${additionalLines || " (none pre-ranked)"}${uiSlotLine}${e2eSlotLine}
|
|
216
278
|
${supplementNote}
|
|
217
279
|
|
|
218
|
-
**You MUST produce EXACTLY ${topN} total recommendations: ${generateItems.length} to generate + ${Math.max(topN - generateItems.length, 0)} as additionalRecommendations. Do NOT produce fewer. Generate recommendations now.**
|
|
280
|
+
**You MUST produce EXACTLY ${topN} total recommendations: ${generateItems.length + (reserveUIGenSlot ? 1 : 0)} to generate + ${Math.max(topN - generateItems.length - (reserveUIGenSlot ? 1 : 0), 0)} as additionalRecommendations. Do NOT produce fewer. Generate recommendations now.**
|
|
219
281
|
|
|
220
282
|
## Recommendation Stability
|
|
221
283
|
- **Carry forward** previous additionalRecommendations that still apply — match by scenarioName (multi-step) or endpoint (single-endpoint). Re-derive category and priority from test content.
|
|
222
284
|
- **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
|
|
223
285
|
- **Only add** new recommendations for code paths introduced since the last run.`;
|
|
224
286
|
}
|
|
225
|
-
export function buildRecommendationPrompt(analysis, analysisScope = "full_repo", topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType) {
|
|
287
|
+
export function buildRecommendationPrompt(analysis, analysisScope = "full_repo", topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType, maxGenerateOverride) {
|
|
226
288
|
const isDiffScope = analysisScope === "current_branch_diff";
|
|
227
289
|
const diffContext = analysis.branchDiffContext;
|
|
228
290
|
const openApiSpec = analysis.artifacts?.openApiSpecs?.[0];
|
|
@@ -342,7 +404,8 @@ ${detailBlocks}
|
|
|
342
404
|
}
|
|
343
405
|
// ── Scoring ──
|
|
344
406
|
const endpointCount = allEndpoints.reduce((acc, ep) => acc + (ep.methods ?? []).length, 0);
|
|
345
|
-
const
|
|
407
|
+
const baseMaxGen = Math.min(Math.max(maxGenerateOverride ?? (isDiffScope ? MAX_TESTS_TO_GENERATE : topN), 0), topN);
|
|
408
|
+
const maxGen = isUIOnlyPR ? (hasTraces ? baseMaxGen : 0) : baseMaxGen;
|
|
346
409
|
const scenarios = analysis.businessContext.draftedScenarios;
|
|
347
410
|
let scored = [];
|
|
348
411
|
let seed = "";
|
|
@@ -2,7 +2,7 @@ jest.mock("@skyramp/skyramp", () => ({
|
|
|
2
2
|
WorkspaceConfigManager: { create: jest.fn() },
|
|
3
3
|
}));
|
|
4
4
|
import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
|
|
5
|
-
import { PATH_PARAM_UUID_GUIDANCE, MAX_TESTS_TO_GENERATE } from "./recommendationSections.js";
|
|
5
|
+
import { PATH_PARAM_UUID_GUIDANCE, MAX_TESTS_TO_GENERATE, buildTestQualityCriteria } from "./recommendationSections.js";
|
|
6
6
|
// ---------------------------------------------------------------------------
|
|
7
7
|
// Minimal fixtures
|
|
8
8
|
// ---------------------------------------------------------------------------
|
|
@@ -315,7 +315,7 @@ describe("buildRecommendationPrompt — Stability and supplement section", () =>
|
|
|
315
315
|
});
|
|
316
316
|
const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
|
|
317
317
|
expect(prompt).toContain("REQUIRED — You MUST add");
|
|
318
|
-
expect(prompt).toContain("
|
|
318
|
+
expect(prompt).toContain("6-dimension rubric");
|
|
319
319
|
});
|
|
320
320
|
// Verify MAX_TESTS_TO_GENERATE is still exported and equals 3
|
|
321
321
|
it("MAX_TESTS_TO_GENERATE is 3", () => {
|
|
@@ -369,3 +369,240 @@ describe("PATH_PARAM_UUID_GUIDANCE — no hardcoded UUID anchor", () => {
|
|
|
369
369
|
expect(prompt).not.toMatch(UUID_V4_REGEX);
|
|
370
370
|
});
|
|
371
371
|
});
|
|
372
|
+
// ---------------------------------------------------------------------------
|
|
373
|
+
// Tests — maxGenerateOverride parameter in buildRecommendationPrompt
|
|
374
|
+
// ---------------------------------------------------------------------------
|
|
375
|
+
describe("buildRecommendationPrompt — maxGenerateOverride", () => {
|
|
376
|
+
const scenariosForOverride = Array.from({ length: 6 }, (_, i) => minimalScenario({
|
|
377
|
+
scenarioName: `scenario-${i}`,
|
|
378
|
+
description: `Test scenario ${i}`,
|
|
379
|
+
category: i < 2 ? "security_boundary" : "crud",
|
|
380
|
+
priority: i < 2 ? "high" : "low",
|
|
381
|
+
}));
|
|
382
|
+
const analysisWithScenarios = minimalAnalysis({
|
|
383
|
+
businessContext: {
|
|
384
|
+
mainPurpose: "Test API",
|
|
385
|
+
userFlows: [],
|
|
386
|
+
dataFlows: [],
|
|
387
|
+
integrationPatterns: [],
|
|
388
|
+
draftedScenarios: scenariosForOverride,
|
|
389
|
+
},
|
|
390
|
+
});
|
|
391
|
+
it("uses MAX_TESTS_TO_GENERATE as default when maxGenerateOverride is undefined", () => {
|
|
392
|
+
const prompt = buildRecommendationPrompt(analysisWithScenarios, "current_branch_diff", 10);
|
|
393
|
+
expect(prompt).toContain(`Budget: ${MAX_TESTS_TO_GENERATE} generate`);
|
|
394
|
+
});
|
|
395
|
+
it("respects maxGenerateOverride when provided", () => {
|
|
396
|
+
const prompt = buildRecommendationPrompt(analysisWithScenarios, "current_branch_diff", 10, undefined, undefined, undefined, 5);
|
|
397
|
+
expect(prompt).toContain("Budget: 5 generate");
|
|
398
|
+
expect(prompt).toContain("additional = 10 total");
|
|
399
|
+
});
|
|
400
|
+
it("clamps maxGenerateOverride to topN when override exceeds topN", () => {
|
|
401
|
+
const prompt = buildRecommendationPrompt(analysisWithScenarios, "current_branch_diff", 4, undefined, undefined, undefined, 10);
|
|
402
|
+
expect(prompt).toContain("Budget: 4 generate");
|
|
403
|
+
});
|
|
404
|
+
it("clamps maxGenerateOverride to 0 when negative", () => {
|
|
405
|
+
const prompt = buildRecommendationPrompt(analysisWithScenarios, "current_branch_diff", 10, undefined, undefined, undefined, -5);
|
|
406
|
+
expect(prompt).toContain("Budget: 0 generate");
|
|
407
|
+
});
|
|
408
|
+
it("allows maxGenerateOverride of 0 to produce no generate items", () => {
|
|
409
|
+
const prompt = buildRecommendationPrompt(analysisWithScenarios, "current_branch_diff", 10, undefined, undefined, undefined, 0);
|
|
410
|
+
expect(prompt).toContain("Budget: 0 generate");
|
|
411
|
+
expect(prompt).not.toContain("#1 — GENERATE");
|
|
412
|
+
});
|
|
413
|
+
it("uses topN as default maxGen in full_repo scope when maxGenerateOverride is undefined", () => {
|
|
414
|
+
const prompt = buildRecommendationPrompt(analysisWithScenarios, "full_repo", 6);
|
|
415
|
+
expect(prompt).toContain("Budget: 6 generate");
|
|
416
|
+
});
|
|
417
|
+
it("overrides full_repo default when maxGenerateOverride is provided", () => {
|
|
418
|
+
const prompt = buildRecommendationPrompt(analysisWithScenarios, "full_repo", 6, undefined, undefined, undefined, 2);
|
|
419
|
+
expect(prompt).toContain("Budget: 2 generate");
|
|
420
|
+
expect(prompt).toContain("additional = 6 total");
|
|
421
|
+
});
|
|
422
|
+
});
|
|
423
|
+
// ---------------------------------------------------------------------------
|
|
424
|
+
// Tests — Additional recommendation dedup (Fix 1) and E2E slot guard (Fix 2)
|
|
425
|
+
// ---------------------------------------------------------------------------
|
|
426
|
+
describe("buildRecommendationPrompt — additional recommendation dedup", () => {
|
|
427
|
+
function patchOrdersScenario(name, overrides = {}) {
|
|
428
|
+
return {
|
|
429
|
+
scenarioName: name,
|
|
430
|
+
description: `Test ${name}`,
|
|
431
|
+
category: "new_endpoint",
|
|
432
|
+
priority: "high",
|
|
433
|
+
steps: [
|
|
434
|
+
{ order: 1, method: "POST", path: "/api/v1/products", description: "Create product", interactionType: "success", expectedStatusCode: 201 },
|
|
435
|
+
{ order: 2, method: "POST", path: "/api/v1/orders", description: "Create order", interactionType: "success", expectedStatusCode: 201 },
|
|
436
|
+
{ order: 3, method: "PATCH", path: "/api/v1/orders/{order_id}", description: "Patch order", interactionType: "success", expectedStatusCode: 200 },
|
|
437
|
+
],
|
|
438
|
+
chainingKeys: ["id"],
|
|
439
|
+
requiresAuth: false,
|
|
440
|
+
estimatedComplexity: "complex",
|
|
441
|
+
testType: "integration",
|
|
442
|
+
...overrides,
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
function analysisWithPatchScenarios(scenarios) {
|
|
446
|
+
return minimalAnalysis({
|
|
447
|
+
businessContext: {
|
|
448
|
+
mainPurpose: "Order API",
|
|
449
|
+
userFlows: [],
|
|
450
|
+
dataFlows: [],
|
|
451
|
+
integrationPatterns: [],
|
|
452
|
+
draftedScenarios: scenarios,
|
|
453
|
+
},
|
|
454
|
+
branchDiffContext: {
|
|
455
|
+
baseBranch: "main",
|
|
456
|
+
currentBranch: "feature/patch-orders",
|
|
457
|
+
changedFiles: ["backend/src/api/orders.py", "src/frontend/components/OrderDetail.tsx"],
|
|
458
|
+
newEndpoints: [{ path: "/api/v1/orders/{order_id}", methods: [{ method: "PATCH", sourceFile: "routes.py", interactionCount: 0 }] }],
|
|
459
|
+
modifiedEndpoints: [],
|
|
460
|
+
affectedServices: [],
|
|
461
|
+
},
|
|
462
|
+
apiEndpoints: {
|
|
463
|
+
totalCount: 3,
|
|
464
|
+
baseUrl: "http://localhost:8000",
|
|
465
|
+
endpoints: [
|
|
466
|
+
{ path: "/api/v1/products", resourceGroup: "products", pathParams: [], methods: [{ method: "POST", description: "Create product", queryParams: [], authRequired: false, sourceFile: "routes.py", interactions: [] }] },
|
|
467
|
+
{ path: "/api/v1/orders", resourceGroup: "orders", pathParams: [], methods: [{ method: "POST", description: "Create order", queryParams: [], authRequired: false, sourceFile: "routes.py", interactions: [] }] },
|
|
468
|
+
{ path: "/api/v1/orders/{order_id}", resourceGroup: "orders", pathParams: [{ name: "order_id", type: "string", required: true }], methods: [{ method: "PATCH", description: "Update order", queryParams: [], authRequired: false, sourceFile: "routes.py", interactions: [] }] },
|
|
469
|
+
],
|
|
470
|
+
},
|
|
471
|
+
});
|
|
472
|
+
}
|
|
473
|
+
it("filters additional items that share resource and test type with GENERATE items", () => {
|
|
474
|
+
const scenarios = [
|
|
475
|
+
patchOrdersScenario("orders-patch-add-items-recalculate"),
|
|
476
|
+
patchOrdersScenario("orders-patch-new-endpoint-happy-path"),
|
|
477
|
+
patchOrdersScenario("orders-patch-items-cleanup-verification"),
|
|
478
|
+
patchOrdersScenario("orders-patch-discount-fixed"),
|
|
479
|
+
patchOrdersScenario("orders-patch-another-variant"),
|
|
480
|
+
];
|
|
481
|
+
const analysis = analysisWithPatchScenarios(scenarios);
|
|
482
|
+
const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10, undefined, undefined, undefined, 2);
|
|
483
|
+
// First 2 become GENERATE, the remaining share orders::integration → should be filtered
|
|
484
|
+
const additionalMatches = prompt.match(/#\d+ \[ADDITIONAL\]/g) || [];
|
|
485
|
+
const ordersPatchAdditional = (prompt.match(/\[ADDITIONAL\].*orders-patch/g) || []);
|
|
486
|
+
// Same-resource same-type scenarios should NOT appear in ADDITIONAL
|
|
487
|
+
expect(ordersPatchAdditional.length).toBe(0);
|
|
488
|
+
});
|
|
489
|
+
it("preserves additional items with different test type for same endpoint", () => {
|
|
490
|
+
const scenarios = [
|
|
491
|
+
patchOrdersScenario("orders-patch-add-items-recalculate"),
|
|
492
|
+
patchOrdersScenario("orders-patch-new-endpoint-happy-path"),
|
|
493
|
+
// Contract test for same endpoint — different test type, should survive dedup
|
|
494
|
+
{
|
|
495
|
+
...patchOrdersScenario("orders-patch-contract"),
|
|
496
|
+
testType: "contract",
|
|
497
|
+
steps: [{ order: 1, method: "PATCH", path: "/api/v1/orders/{order_id}", description: "Contract test", interactionType: "success", expectedStatusCode: 200 }],
|
|
498
|
+
},
|
|
499
|
+
];
|
|
500
|
+
const analysis = analysisWithPatchScenarios(scenarios);
|
|
501
|
+
const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10, undefined, undefined, undefined, 2);
|
|
502
|
+
// Contract test targets orders but is a different type → should be in ADDITIONAL
|
|
503
|
+
expect(prompt).toContain("orders-patch-contract");
|
|
504
|
+
});
|
|
505
|
+
it("preserves additional items targeting a different resource", () => {
|
|
506
|
+
const scenarios = [
|
|
507
|
+
patchOrdersScenario("orders-patch-add-items-recalculate"),
|
|
508
|
+
patchOrdersScenario("orders-patch-new-endpoint-happy-path"),
|
|
509
|
+
// Different resource entirely
|
|
510
|
+
{
|
|
511
|
+
...patchOrdersScenario("products-unique-constraint"),
|
|
512
|
+
steps: [
|
|
513
|
+
{ order: 1, method: "POST", path: "/api/v1/products", description: "Create product", interactionType: "success", expectedStatusCode: 201 },
|
|
514
|
+
{ order: 2, method: "POST", path: "/api/v1/products", description: "Create duplicate", interactionType: "error", expectedStatusCode: 409 },
|
|
515
|
+
],
|
|
516
|
+
},
|
|
517
|
+
];
|
|
518
|
+
const analysis = analysisWithPatchScenarios(scenarios);
|
|
519
|
+
const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10, undefined, undefined, undefined, 2);
|
|
520
|
+
expect(prompt).toContain("products-unique-constraint");
|
|
521
|
+
});
|
|
522
|
+
});
|
|
523
|
+
describe("buildRecommendationPrompt — E2E slot guard (Fix 2)", () => {
|
|
524
|
+
function uiScenario() {
|
|
525
|
+
return {
|
|
526
|
+
scenarioName: "ui-edit-order-crash",
|
|
527
|
+
description: "UI test for edit order crash",
|
|
528
|
+
category: "new_endpoint",
|
|
529
|
+
priority: "high",
|
|
530
|
+
steps: [
|
|
531
|
+
{ order: 1, method: "GET", path: "/orders/{order_id}", description: "Navigate to order detail", interactionType: "success", expectedStatusCode: 200 },
|
|
532
|
+
],
|
|
533
|
+
chainingKeys: [],
|
|
534
|
+
requiresAuth: false,
|
|
535
|
+
estimatedComplexity: "simple",
|
|
536
|
+
testType: "ui",
|
|
537
|
+
};
|
|
538
|
+
}
|
|
539
|
+
it("suppresses E2E additional slot when UI test is in GENERATE list", () => {
|
|
540
|
+
const scenarios = [
|
|
541
|
+
minimalScenario({ scenarioName: "integration-test-1", category: "new_endpoint" }),
|
|
542
|
+
uiScenario(),
|
|
543
|
+
];
|
|
544
|
+
const analysis = minimalAnalysis({
|
|
545
|
+
businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: scenarios },
|
|
546
|
+
branchDiffContext: {
|
|
547
|
+
baseBranch: "main",
|
|
548
|
+
currentBranch: "feature/test",
|
|
549
|
+
changedFiles: ["backend/routes.py", "src/frontend/components/App.tsx"],
|
|
550
|
+
newEndpoints: [{ path: "/api/items/{id}", methods: [{ method: "PATCH", sourceFile: "routes.py", interactionCount: 0 }] }],
|
|
551
|
+
modifiedEndpoints: [],
|
|
552
|
+
affectedServices: [],
|
|
553
|
+
},
|
|
554
|
+
});
|
|
555
|
+
const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10, undefined, undefined, undefined, 3);
|
|
556
|
+
expect(prompt).not.toContain("e2e-flow-for-changed-feature");
|
|
557
|
+
});
|
|
558
|
+
it("includes E2E additional slot when no UI test is generated and no UI slot reserved", () => {
|
|
559
|
+
const scenarios = [
|
|
560
|
+
minimalScenario({ scenarioName: "integration-test-1", category: "new_endpoint" }),
|
|
561
|
+
minimalScenario({ scenarioName: "integration-test-2", category: "new_endpoint" }),
|
|
562
|
+
];
|
|
563
|
+
const analysis = minimalAnalysis({
|
|
564
|
+
businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: scenarios },
|
|
565
|
+
branchDiffContext: {
|
|
566
|
+
baseBranch: "main",
|
|
567
|
+
currentBranch: "feature/test",
|
|
568
|
+
changedFiles: ["backend/routes.py", "src/frontend/components/App.tsx"],
|
|
569
|
+
newEndpoints: [{ path: "/api/items", methods: [{ method: "POST", sourceFile: "routes.py", interactionCount: 0 }] }],
|
|
570
|
+
modifiedEndpoints: [],
|
|
571
|
+
affectedServices: [],
|
|
572
|
+
},
|
|
573
|
+
});
|
|
574
|
+
// maxGen=1 so reserveUIGenSlot is false (requires maxGen > 1),
|
|
575
|
+
// and no UI scenario in GENERATE → E2E slot should appear
|
|
576
|
+
const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10, undefined, undefined, undefined, 1);
|
|
577
|
+
expect(prompt).toContain("e2e-flow-for-changed-feature");
|
|
578
|
+
});
|
|
579
|
+
});
|
|
580
|
+
// ---------------------------------------------------------------------------
|
|
581
|
+
// Tests — buildTestQualityCriteria contract-test guidance (regression guard)
|
|
582
|
+
// ---------------------------------------------------------------------------
|
|
583
|
+
describe("buildTestQualityCriteria — contract test guidance for error-handling", () => {
|
|
584
|
+
it("includes guidance to use contract tests for single-endpoint error-handling scenarios", () => {
|
|
585
|
+
const criteria = buildTestQualityCriteria();
|
|
586
|
+
expect(criteria).toContain("Contract tests");
|
|
587
|
+
expect(criteria).toContain("error-handling scenarios on a single");
|
|
588
|
+
expect(criteria).toContain("Do NOT add setup steps just to avoid hardcoding an ID");
|
|
589
|
+
});
|
|
590
|
+
it("instructs to use a hardcoded nonexistent ID to keep it a single-step test", () => {
|
|
591
|
+
const criteria = buildTestQualityCriteria();
|
|
592
|
+
expect(criteria).toContain("99999");
|
|
593
|
+
expect(criteria).toContain("single-step contract test");
|
|
594
|
+
});
|
|
595
|
+
it("is included in the recommendation prompt when scored scenarios exist", () => {
|
|
596
|
+
const analysis = minimalAnalysis({
|
|
597
|
+
businessContext: {
|
|
598
|
+
mainPurpose: "Test API",
|
|
599
|
+
userFlows: [],
|
|
600
|
+
dataFlows: [],
|
|
601
|
+
integrationPatterns: [],
|
|
602
|
+
draftedScenarios: [minimalScenario()],
|
|
603
|
+
},
|
|
604
|
+
});
|
|
605
|
+
const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
|
|
606
|
+
expect(prompt).toContain("Do NOT add setup steps just to avoid hardcoding an ID");
|
|
607
|
+
});
|
|
608
|
+
});
|