@skyramp/mcp 0.0.65 → 0.1.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/playwright/traceRecordingPrompt.js +30 -36
- package/build/prompts/architectPersona.js +19 -0
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +11 -6
- package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +49 -0
- package/build/prompts/test-maintenance/driftAnalysisSections.js +4 -2
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +42 -50
- package/build/prompts/test-recommendation/mergeEnrichedScenarios.test.js +125 -0
- package/build/prompts/test-recommendation/recommendationSections.js +121 -4
- package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +151 -9
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +416 -61
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +455 -63
- package/build/prompts/testbot/testbot-prompts.js +111 -100
- package/build/prompts/testbot/testbot-prompts.test.js +142 -0
- package/build/resources/analysisResources.js +13 -5
- package/build/services/ScenarioGenerationService.js +2 -2
- package/build/services/ScenarioGenerationService.test.js +35 -0
- package/build/services/TestExecutionService.js +1 -1
- package/build/tools/code-refactor/modularizationTool.js +2 -2
- package/build/tools/executeSkyrampTestTool.js +4 -3
- package/build/tools/generate-tests/generateBatchScenarioRestTool.js +51 -21
- package/build/tools/generate-tests/generateContractRestTool.js +26 -4
- package/build/tools/generate-tests/generateIntegrationRestTool.js +44 -13
- package/build/tools/generate-tests/generateScenarioRestTool.js +17 -39
- package/build/tools/generate-tests/generateUIRestTool.js +69 -4
- package/build/tools/submitReportTool.js +27 -13
- package/build/tools/test-management/analyzeChangesTool.js +32 -10
- package/build/tools/test-management/analyzeChangesTool.test.js +85 -0
- package/build/types/RepositoryAnalysis.js +25 -3
- package/build/types/TestRecommendation.js +5 -4
- package/build/types/TestTypes.js +44 -9
- package/build/utils/AnalysisStateManager.js +43 -9
- package/build/utils/AnalysisStateManager.test.js +35 -0
- package/build/utils/routeParsers.js +35 -0
- package/build/utils/routeParsers.test.js +66 -1
- package/build/utils/scenarioDrafting.js +207 -360
- package/build/utils/scenarioDrafting.test.js +191 -256
- package/build/utils/trace-parser.js +24 -6
- package/build/utils/trace-parser.test.js +140 -0
- package/node_modules/playwright/lib/mcp/browser/browserServerBackend.js +3 -0
- package/node_modules/playwright/lib/mcp/browser/tab.js +8 -1
- package/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +3 -2
- package/node_modules/playwright/lib/mcp/browser/tools/navigate.js +1 -1
- package/node_modules/playwright/lib/mcp/browser/tools/snapshot.js +4 -4
- package/node_modules/playwright/lib/mcp/browser/tools/tabs.js +5 -4
- package/node_modules/playwright/lib/mcp/browser/tools/wait.js +1 -1
- package/node_modules/playwright/lib/mcp/skyramp/exportTool.js +10 -9
- package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +304 -7
- package/node_modules/playwright/lib/mcp/test/skyRampExport.js +128 -20
- package/package.json +2 -2
- package/node_modules/playwright/lib/mcp/terminal/help.json +0 -32
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as crypto from "crypto";
|
|
2
|
+
import { AnalysisScope, isDiff, } from "../../types/RepositoryAnalysis.js";
|
|
2
3
|
import { WorkspaceAuthType } from "../../utils/workspaceAuth.js";
|
|
3
|
-
import { buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria,
|
|
4
|
+
import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildFewShotExamples, buildVerificationChecklist, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
|
|
4
5
|
import { CATEGORY_PRIORITY, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
|
|
5
6
|
function formatTestLocations(locs) {
|
|
6
7
|
const entries = Object.entries(locs || {});
|
|
@@ -55,15 +56,281 @@ function scenarioCoverageKey(scenario) {
|
|
|
55
56
|
const resource = extractResourceFromPath(primaryStep?.path ?? "");
|
|
56
57
|
return `${resource}::${testType}`;
|
|
57
58
|
}
|
|
59
|
+
// ── Shared budget mandate ──
|
|
60
|
+
function buildBudgetMandate(topN, generateCount) {
|
|
61
|
+
const additionalCount = Math.max(topN - generateCount, 0);
|
|
62
|
+
return `**Produce ${topN} total recommendations: ${generateCount} to generate + ${additionalCount} as additionalRecommendations. Generate recommendations now.**`;
|
|
63
|
+
}
|
|
58
64
|
// ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
|
|
65
|
+
function buildFullRepoRecommendations(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, isFrontendProject = false, isFrontendOnlyProject = false) {
|
|
66
|
+
// Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
|
|
67
|
+
const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
68
|
+
const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
69
|
+
const slotsFloor = Math.floor(topN / 2);
|
|
70
|
+
const minE2ESlots = Math.min(rawE2E, slotsFloor);
|
|
71
|
+
const minUISlots = Math.min(rawUI, Math.max(0, topN - minE2ESlots));
|
|
72
|
+
const authRef = authHeaderValue
|
|
73
|
+
? `, authHeader: "${authHeaderValue}"${authSchemeSnippet}`
|
|
74
|
+
: `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
|
|
75
|
+
const hasWorkspaceAuthType = !!authTypeValue && authTypeValue !== "none";
|
|
76
|
+
const scenarioAuthRef = authRef;
|
|
77
|
+
const authHeaderOnlyRef = hasWorkspaceAuthType
|
|
78
|
+
? ""
|
|
79
|
+
: authHeaderValue
|
|
80
|
+
? `, authHeader: "${authHeaderValue}"`
|
|
81
|
+
: `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
|
|
82
|
+
// Supplement count for full-repo mode
|
|
83
|
+
const supplementCount = topN - Math.min(scored.length, topN);
|
|
84
|
+
const toTitle = (name) => name.replace(/-/g, " ").replace(/\b\w/g, c => c.toUpperCase());
|
|
85
|
+
const TYPE_ORDER = ["e2e", "ui", "integration", "contract"];
|
|
86
|
+
const TYPE_LABEL = {
|
|
87
|
+
e2e: "E2E", ui: "UI", integration: "Integration", contract: "Contract",
|
|
88
|
+
};
|
|
89
|
+
// For full-stack repos, carve out E2E and UI slots before filling with backend tests.
|
|
90
|
+
const backendSlotCount = isFrontendProject
|
|
91
|
+
? Math.max(0, topN - minE2ESlots - minUISlots)
|
|
92
|
+
: topN;
|
|
93
|
+
const allItems = scored.slice(0, backendSlotCount);
|
|
94
|
+
const byType = new Map();
|
|
95
|
+
for (const t of TYPE_ORDER)
|
|
96
|
+
byType.set(t, []);
|
|
97
|
+
for (const item of allItems) {
|
|
98
|
+
const t = item.scenario.testType ?? (item.scenario.steps.length === 1 ? "contract" : "integration");
|
|
99
|
+
if (!byType.has(t))
|
|
100
|
+
byType.set(t, []);
|
|
101
|
+
byType.get(t).push(item);
|
|
102
|
+
}
|
|
103
|
+
const renderItem = (item, rank) => {
|
|
104
|
+
const s = item.scenario;
|
|
105
|
+
const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
|
|
106
|
+
const title = toTitle(s.scenarioName);
|
|
107
|
+
if (testType === "contract") {
|
|
108
|
+
const step = s.steps[0];
|
|
109
|
+
const endpointURL = `${baseUrl}${step.path}`;
|
|
110
|
+
const isBodyMethod = ["POST", "PUT", "PATCH"].includes(step.method);
|
|
111
|
+
const dataParam = isBodyMethod
|
|
112
|
+
? `, requestData: <${step.method} ${step.path} required fields from source code>`
|
|
113
|
+
: "";
|
|
114
|
+
return [
|
|
115
|
+
`**${rank}. ${title}**`,
|
|
116
|
+
` ${s.description}`,
|
|
117
|
+
` ${step.method} ${step.path} \u2192 ${step.expectedStatusCode}`,
|
|
118
|
+
` Tool: \`skyramp_contract_test_generation({ endpointURL: "${endpointURL}", method: "${step.method}"${authRef}${dataParam} })\``,
|
|
119
|
+
` From source: fill in requestData field names and the specific production boundary this validates`,
|
|
120
|
+
].join("\n");
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
const stepLines = s.steps.map(st => {
|
|
124
|
+
const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
|
|
125
|
+
const bodyHint = isBody ? ` \u2014 body: <${st.method} ${st.path} required fields from source>` : "";
|
|
126
|
+
return ` ${st.order}. ${st.method} ${st.path} \u2192 ${st.expectedStatusCode}: ${st.description}${bodyHint}`;
|
|
127
|
+
}).join("\n");
|
|
128
|
+
const isTraceBased = testType === "e2e" || testType === "ui";
|
|
129
|
+
let toolCallsBlock;
|
|
130
|
+
if (isTraceBased) {
|
|
131
|
+
// E2E and UI need browser recording first, then generation
|
|
132
|
+
const frontendUrl = baseUrl.replace(/\/api.*$/, "") || baseUrl;
|
|
133
|
+
const zipPath = `<repositoryPath>/.skyramp/${s.scenarioName}_trace.zip`;
|
|
134
|
+
if (testType === "ui") {
|
|
135
|
+
toolCallsBlock = [
|
|
136
|
+
` 1. browser_navigate({ url: "${frontendUrl}" })`,
|
|
137
|
+
` 2. Interact with the changed components (browser_click, browser_type, browser_fill_form, etc.)`,
|
|
138
|
+
` 3. browser_snapshot() after each key interaction`,
|
|
139
|
+
` 4. skyramp_export_zip({ outputPath: "${zipPath}" }) — use absolute path`,
|
|
140
|
+
` 5. skyramp_ui_test_generation({ playwrightInput: "${zipPath}"${authHeaderOnlyRef} })`,
|
|
141
|
+
].join("\n");
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
toolCallsBlock = [
|
|
145
|
+
` 1. browser_navigate({ url: "${frontendUrl}" }) — record frontend trace`,
|
|
146
|
+
` 2. Interact with the user journey described above`,
|
|
147
|
+
` 3. skyramp_export_zip({ outputPath: "${zipPath}" }) — use absolute path`,
|
|
148
|
+
` 4. Capture backend trace JSON separately (skyramp_start_trace_collection / skyramp_stop_trace_collection)`,
|
|
149
|
+
` 5. skyramp_e2e_test_generation({ playwrightInput: "${zipPath}", trace: "<backend trace path>"${authHeaderOnlyRef} })`,
|
|
150
|
+
].join("\n");
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
// Integration: use batch scenario tool (all steps in one call)
|
|
155
|
+
let destinationHost = s.scenarioName;
|
|
156
|
+
try {
|
|
157
|
+
destinationHost = new URL(baseUrl).hostname;
|
|
158
|
+
}
|
|
159
|
+
catch { /* keep fallback */ }
|
|
160
|
+
const batchSteps = s.steps.map(st => {
|
|
161
|
+
const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
|
|
162
|
+
let dataParam = "";
|
|
163
|
+
if (isBody) {
|
|
164
|
+
if (st.requestBody && Object.keys(st.requestBody).length > 0) {
|
|
165
|
+
const bodyJson = JSON.stringify(st.requestBody).replace(/"/g, '\\"');
|
|
166
|
+
dataParam = `, requestBody: "${bodyJson}"`;
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
dataParam = `, requestBody: <${st.method} ${st.path} required fields from source code>`;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return ` { method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${dataParam} }`;
|
|
173
|
+
}).join(",\n");
|
|
174
|
+
toolCallsBlock = [
|
|
175
|
+
` skyramp_batch_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${destinationHost}", baseURL: "${baseUrl}"${scenarioAuthRef}, steps: [\n${batchSteps}\n ] })`,
|
|
176
|
+
` skyramp_integration_test_generation({ scenarioFile: <filePath returned by skyramp_batch_scenario_test_generation above>${authHeaderOnlyRef} })`,
|
|
177
|
+
].join("\n");
|
|
178
|
+
}
|
|
179
|
+
return [
|
|
180
|
+
`**${rank}. ${title}**`,
|
|
181
|
+
` ${s.description}`,
|
|
182
|
+
` Steps:`,
|
|
183
|
+
stepLines,
|
|
184
|
+
` Tool calls:`,
|
|
185
|
+
toolCallsBlock,
|
|
186
|
+
` From source: fill in requestBody field values and assert all computed response fields`,
|
|
187
|
+
].join("\n");
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
const backendSections = TYPE_ORDER
|
|
191
|
+
.filter(t => (byType.get(t) ?? []).length > 0)
|
|
192
|
+
.map(t => {
|
|
193
|
+
const items = byType.get(t);
|
|
194
|
+
const label = TYPE_LABEL[t];
|
|
195
|
+
let globalRank = 0;
|
|
196
|
+
for (const prev of TYPE_ORDER) {
|
|
197
|
+
if (prev === t)
|
|
198
|
+
break;
|
|
199
|
+
globalRank += (byType.get(prev) ?? []).length;
|
|
200
|
+
}
|
|
201
|
+
const entries = items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n");
|
|
202
|
+
return `### ${label} (${items.length})\n\n${entries}`;
|
|
203
|
+
});
|
|
204
|
+
// Pre-allocate E2E and UI placeholder sections for full-stack repos.
|
|
205
|
+
const e2eSectionParts = [];
|
|
206
|
+
const uiSectionParts = [];
|
|
207
|
+
if (isFrontendProject) {
|
|
208
|
+
for (let i = 0; i < minE2ESlots; i++) {
|
|
209
|
+
const rank = i + 1;
|
|
210
|
+
e2eSectionParts.push(`**${rank}. E2E User Journey ${i + 1}**\n` +
|
|
211
|
+
` End-to-end test covering a complete user journey through the frontend and backend.\n` +
|
|
212
|
+
` To generate: record a browser trace, then call the generation tool.\n` +
|
|
213
|
+
` browser_navigate({ url: "${baseUrl}" }) \u2192 exercise key user flow \u2192 skyramp_export_zip({ outputPath: "<repo>/.skyramp/e2e_journey_${i + 1}.zip" })\n` +
|
|
214
|
+
` Tool: \`skyramp_e2e_test_generation({ playwrightInput: "<repo>/.skyramp/e2e_journey_${i + 1}.zip"${authHeaderOnlyRef} })\`\n` +
|
|
215
|
+
` From source: read frontend components and their API calls to identify the highest-value user journey`);
|
|
216
|
+
}
|
|
217
|
+
for (let i = 0; i < minUISlots; i++) {
|
|
218
|
+
const rank = minE2ESlots + i + 1;
|
|
219
|
+
uiSectionParts.push(`**${rank}. UI Component Test ${i + 1}**\n` +
|
|
220
|
+
` Test key UI component interactions and state changes.\n` +
|
|
221
|
+
` To generate: record a browser trace, then call the generation tool.\n` +
|
|
222
|
+
` browser_navigate({ url: "${baseUrl}" }) \u2192 interact with UI components \u2192 skyramp_export_zip({ outputPath: "<repo>/.skyramp/ui_component_${i + 1}.zip" })\n` +
|
|
223
|
+
` Tool: \`skyramp_ui_test_generation({ playwrightInput: "<repo>/.skyramp/ui_component_${i + 1}.zip"${authHeaderOnlyRef} })\`\n` +
|
|
224
|
+
` From source: read frontend component files to identify interactions, form submissions, and state transitions`);
|
|
225
|
+
}
|
|
226
|
+
// Offset backend section ranks by the number of E2E + UI placeholders
|
|
227
|
+
const offset = minE2ESlots + minUISlots;
|
|
228
|
+
backendSections.forEach((_, idx) => {
|
|
229
|
+
const t = TYPE_ORDER.filter(t => (byType.get(t) ?? []).length > 0)[idx];
|
|
230
|
+
if (!t)
|
|
231
|
+
return;
|
|
232
|
+
const items = byType.get(t);
|
|
233
|
+
const label = TYPE_LABEL[t];
|
|
234
|
+
let globalRank = offset;
|
|
235
|
+
for (const prev of TYPE_ORDER) {
|
|
236
|
+
if (prev === t)
|
|
237
|
+
break;
|
|
238
|
+
globalRank += (byType.get(prev) ?? []).length;
|
|
239
|
+
}
|
|
240
|
+
backendSections[idx] = `### ${label} (${items.length})\n\n${items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n")}`;
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
const allSections = [
|
|
244
|
+
...(e2eSectionParts.length > 0 ? [`### E2E (${e2eSectionParts.length})\n\n${e2eSectionParts.join("\n\n")}`] : []),
|
|
245
|
+
...(uiSectionParts.length > 0 ? [`### UI (${uiSectionParts.length})\n\n${uiSectionParts.join("\n\n")}`] : []),
|
|
246
|
+
...backendSections,
|
|
247
|
+
];
|
|
248
|
+
const sections = allSections.join("\n\n");
|
|
249
|
+
const frontendTierNote = isFrontendOnlyProject
|
|
250
|
+
? `\n\n**Frontend repo:** supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
|
|
251
|
+
: isFrontendProject
|
|
252
|
+
? `\n\n**Full-stack repo:** supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Add these before exhausting backend tiers.`
|
|
253
|
+
: "";
|
|
254
|
+
const repoSupplementNote = supplementCount > 0
|
|
255
|
+
? `
|
|
256
|
+
<supplement_guidance>
|
|
257
|
+
**When to use:** The pre-ranked sections above contain fewer than ${topN} items. Add exactly ${supplementCount} more using the tiers below — exhaust each tier before moving to the next.
|
|
258
|
+
|
|
259
|
+
**Tier 1 — Error paths for endpoints already in the list** (highest value, do first):
|
|
260
|
+
• Auth boundary (no Authorization header → 403/401) → \`testType: contract, category: security_boundary\`
|
|
261
|
+
• Invalid/non-existent IDs (→ 404) → \`testType: contract, category: error_handling\`
|
|
262
|
+
• Missing required fields (→ 422) → \`testType: contract, category: data_validation\`
|
|
263
|
+
• Boundary values for numeric fields → \`testType: integration, category: data_validation\`
|
|
264
|
+
Note: DISCARD unique-constraint scenarios if the storage backend is Redis, MongoDB, or schema-less.
|
|
265
|
+
|
|
266
|
+
**Tier 2 — Auth coverage for any endpoint not yet covered by Tier 1:**
|
|
267
|
+
→ \`testType: contract, category: security_boundary\`
|
|
268
|
+
|
|
269
|
+
**Tier 3 — Cross-resource integration** (only when one resource's POST body contains another's \`_id\` field):
|
|
270
|
+
→ \`testType: integration, category: workflow\`
|
|
271
|
+
|
|
272
|
+
**Tier 4 — CRUD lifecycle** for any resource not yet covered:
|
|
273
|
+
→ \`testType: integration, category: crud\`
|
|
274
|
+
|
|
275
|
+
**How to fill each item:** Use path parameters in \`{param}\` format. Use real field names from the analysis or handler source — no generic placeholders. Describe behavior in API terms (HTTP method, path, status code), not storage internals.${frontendTierNote}
|
|
276
|
+
</supplement_guidance>`
|
|
277
|
+
: "";
|
|
278
|
+
const typeMixText = isFrontendOnlyProject
|
|
279
|
+
? `This is a frontend repo. Focus on E2E and UI tests only. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
|
|
280
|
+
: isFrontendProject
|
|
281
|
+
? `This is a full-stack repo. Coverage ranking: E2E > UI > Integration > Contract. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`), in addition to backend integration and contract tests.`
|
|
282
|
+
: `Focus on integration and contract tests for all API endpoints.`;
|
|
283
|
+
return `## Test Recommendations — ${topN} total (grouped by test type)
|
|
284
|
+
|
|
285
|
+
> Repo mode — no tests are executed. Ranked by risk within each type.
|
|
286
|
+
> To generate any item: read the handler source, fill \`<…from source>\` placeholders with real values, then call the tool.
|
|
287
|
+
|
|
288
|
+
${sections}
|
|
289
|
+
|
|
290
|
+
**Test type mix — MANDATORY. No smoke tests. No fuzz tests. Only: integration, contract, E2E, UI.**
|
|
291
|
+
${typeMixText}
|
|
292
|
+
|
|
293
|
+
${repoSupplementNote}
|
|
294
|
+
|
|
295
|
+
**Present up to ${topN} recommendations.** Prioritize quality — only include a recommendation if it adds genuine new coverage. If fewer than ${topN} high-value tests exist for this codebase, stop at the last useful item rather than padding with trivial ones.
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
<enrichment_notes>
|
|
299
|
+
**Path resolution (do this before filling in any tool call):**
|
|
300
|
+
Cross-check every endpoint path against the Router Mounting / Nesting section in the analysis above. Sub-routers may be mounted at nested prefixes — e.g. a reviews router with \`@router.get("/")\` may actually be \`GET /api/v1/products/{product_id}/reviews\` if mounted under that prefix. Always use the fully-qualified nested path in tool calls, not the path as it appears in the route file alone.
|
|
301
|
+
|
|
302
|
+
**Existing test files (check before assigning output filenames):**
|
|
303
|
+
See the Existing Tests section above. If a recommendation's primary resource already has a test file listed there, prefer passing an explicit \`output\` filename (e.g. \`output: "orders_integration_test.py"\`) to update the existing file rather than creating a duplicate.
|
|
304
|
+
|
|
305
|
+
Before filling in tool call parameters for each item, use the analysis data already provided above (endpoint interactions, source context) first. Only read the route handler source code directly when the analysis data does not contain the specific value you need:
|
|
306
|
+
- Required request body fields (POST/PUT/PATCH) — use field names from the analysis interactions; read source only if they show \`{}\` or are missing
|
|
307
|
+
- Computed/derived response fields and their formulas — assert exact values; read source for formula details not captured in the analysis
|
|
308
|
+
- Auth middleware — set authHeader/authScheme from the repository context above; FastAPI HTTPBearer → 403 not 401
|
|
309
|
+
- Storage backend — if Redis or schema-less, discard unique-constraint and cascade-delete scenarios
|
|
310
|
+
- Delete behavior — hard-delete → 204; soft-delete/cancel → 200
|
|
311
|
+
|
|
312
|
+
${buildTestQualityCriteria()}
|
|
313
|
+
|
|
314
|
+
**5-dimension rubric — use to assign priority for supplement items:**
|
|
315
|
+
| Dimension | What to assess |
|
|
316
|
+
| Production Safety | Guards a critical boundary (auth, unique constraint, cascade delete, data integrity, breaking migration)? → HIGH |
|
|
317
|
+
| Bug-Finding Potential | Targets a known failure mode (race condition, data consistency, state transition, cascade effect)? → HIGH |
|
|
318
|
+
| User Journey Relevance | Reflects how real users interact (from traces, business flows, critical paths)? → HIGH or MEDIUM |
|
|
319
|
+
| Coverage Gap | Addresses an area with zero existing test coverage? → bump up one tier |
|
|
320
|
+
| Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
|
|
321
|
+
</enrichment_notes>`;
|
|
322
|
+
}
|
|
59
323
|
function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
|
|
60
324
|
// For mixed PRs (frontend + backend), reserve the last GENERATE slot for a UI test
|
|
61
325
|
// so the agent has explicit room to record a browser trace and generate it.
|
|
62
326
|
const reserveUIGenSlot = hasFrontendChanges && !isUIOnlyPR && maxGen > 1;
|
|
63
327
|
const backendGenCount = reserveUIGenSlot ? maxGen - 1 : maxGen;
|
|
64
328
|
const backendBudget = reserveUIGenSlot ? Math.max(topN - 1, 0) : topN;
|
|
65
|
-
|
|
329
|
+
let generateItems = scored.slice(0, Math.min(backendGenCount, scored.length));
|
|
66
330
|
const rawAdditionalItems = scored.slice(backendGenCount, backendBudget);
|
|
331
|
+
// For UI-only PRs with no backend scenarios, ensure at least 1 UI generate slot
|
|
332
|
+
// by injecting a placeholder UI scenario that tells the LLM to record a browser trace.
|
|
333
|
+
const needsUIPlaceholder = isUIOnlyPR && generateItems.length === 0 && hasFrontendChanges;
|
|
67
334
|
// Filter additional items whose primary resource + test type already appear in GENERATE
|
|
68
335
|
const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
|
|
69
336
|
const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
|
|
@@ -83,6 +350,23 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
83
350
|
: authHeaderValue
|
|
84
351
|
? `, authHeader: "${authHeaderValue}"`
|
|
85
352
|
: `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
|
|
353
|
+
// If we need a UI placeholder for UI-only PRs, inject it at the start
|
|
354
|
+
let uiPlaceholderBlock = "";
|
|
355
|
+
if (needsUIPlaceholder) {
|
|
356
|
+
uiPlaceholderBlock = `**#1 — GENERATE** | UI | workflow | new
|
|
357
|
+
Scenario: ui-test-for-changed-frontend-components
|
|
358
|
+
This is a UI-only PR with no backend endpoint changes. Generate UI tests for the changed frontend files.
|
|
359
|
+
|
|
360
|
+
Tool workflow:
|
|
361
|
+
1. Navigate to the app: \`browser_navigate({ url: "<frontend_url>" })\`
|
|
362
|
+
2. Interact with the changed components (read the diff to determine which components changed)
|
|
363
|
+
3. Take snapshots and add assertions: \`browser_snapshot()\`, \`browser_assert()\`
|
|
364
|
+
4. Export the trace: \`skyramp_export_zip({ outputPath: ".skyramp/<component_name>_trace.zip" })\`
|
|
365
|
+
5. Generate the UI test: \`skyramp_ui_test_generation({ playwrightInput: ".skyramp/<component_name>_trace.zip" })\`
|
|
366
|
+
|
|
367
|
+
Do NOT skip this step. UI tests are required for UI-only PRs.
|
|
368
|
+
`;
|
|
369
|
+
}
|
|
86
370
|
const generateBlocks = generateItems.map((item, i) => {
|
|
87
371
|
const rank = i + 1;
|
|
88
372
|
const s = item.scenario;
|
|
@@ -92,12 +376,14 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
92
376
|
const endpointURL = `${baseUrl}${step.path}`;
|
|
93
377
|
const isBodyMethod = ["POST", "PUT", "PATCH"].includes(step.method);
|
|
94
378
|
const dataParam = isBodyMethod
|
|
95
|
-
?
|
|
379
|
+
? (step.requestBody && Object.keys(step.requestBody).length > 0
|
|
380
|
+
? `, requestData: "${JSON.stringify(step.requestBody).replace(/"/g, '\\"')}"`
|
|
381
|
+
: `, requestData: <${step.method} ${step.path} body from source code schemas>`)
|
|
96
382
|
: "";
|
|
97
|
-
return (`**#${rank} — GENERATE** | ${testType} | ${s.category} |
|
|
383
|
+
return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | ${item.novelty}\n` +
|
|
98
384
|
`${step.method} ${step.path} → ${step.expectedStatusCode}\n` +
|
|
99
385
|
`Tool: skyramp_contract_test_generation({ endpointURL: "${endpointURL}", method: "${step.method}"${authRef}${dataParam} })\n` + // contract tests always use full authRef
|
|
100
|
-
`From source: authScheme (OpenAPI securitySchemes or auth middleware)${isBodyMethod ? "; requestData field shapes" : ""}`);
|
|
386
|
+
`From source: authScheme (OpenAPI securitySchemes or auth middleware)${isBodyMethod && !(step.requestBody && Object.keys(step.requestBody).length > 0) ? "; requestData field shapes" : ""}`);
|
|
101
387
|
}
|
|
102
388
|
else {
|
|
103
389
|
// integration / e2e / ui — multi-step scenario pipeline
|
|
@@ -119,15 +405,25 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
119
405
|
const isBodyMethod = ["POST", "PUT", "PATCH"].includes(st.method);
|
|
120
406
|
let dataParam = "";
|
|
121
407
|
if (isBodyMethod) {
|
|
122
|
-
if (st.
|
|
408
|
+
if (st.requestBody && Object.keys(st.requestBody).length > 0) {
|
|
409
|
+
// Enriched scenario — use actual field values directly
|
|
410
|
+
const bodyJson = JSON.stringify(st.requestBody).replace(/"/g, '\\"');
|
|
411
|
+
dataParam = `, requestBody: "${bodyJson}"`;
|
|
412
|
+
}
|
|
413
|
+
else if (st.bodyMustInclude && st.bodyMustInclude.length > 0) {
|
|
123
414
|
const fields = st.bodyMustInclude.join(", ");
|
|
124
|
-
dataParam = `, requestBody: <${st.method} ${st.path} body from source code — MUST include
|
|
415
|
+
dataParam = `, requestBody: <${st.method} ${st.path} body from source code — MUST include [${fields}]. Read handler source for field names, types, and FK references.>`;
|
|
125
416
|
}
|
|
126
417
|
else {
|
|
127
418
|
dataParam = `, requestBody: <${st.method} ${st.path} body from source code schemas>`;
|
|
128
419
|
}
|
|
129
420
|
}
|
|
130
|
-
|
|
421
|
+
let responseParam = "";
|
|
422
|
+
if (!isBodyMethod && st.responseBody && Object.keys(st.responseBody).length > 0) {
|
|
423
|
+
const resJson = JSON.stringify(st.responseBody).replace(/"/g, '\\"');
|
|
424
|
+
responseParam = `, responseBody: "${resJson}"`;
|
|
425
|
+
}
|
|
426
|
+
return ` { method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${dataParam}${responseParam} }`;
|
|
131
427
|
}).join(",\n");
|
|
132
428
|
let destinationHost = s.scenarioName;
|
|
133
429
|
try {
|
|
@@ -137,15 +433,21 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
137
433
|
catch { /* use scenarioName as fallback */ }
|
|
138
434
|
const toolCalls = ` skyramp_batch_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${destinationHost}", baseURL: "${baseUrl}"${scenarioAuthRef}, steps: [\n${batchSteps}\n ] })`;
|
|
139
435
|
const prereqNote = s.category === "new_endpoint"
|
|
140
|
-
? `\nPrerequisite discovery (
|
|
436
|
+
? `\nPrerequisite discovery (for new_endpoint): Before executing these tool calls, check the endpoint interactions in the analysis above for FK fields (e.g. \`product_id\`, \`user_id\`, \`order_id\`). If not present in the analysis, read the source code for the new endpoint's request body. For each FK field found, prepend a step to the \`steps\` array in \`skyramp_batch_scenario_test_generation\` to create that prerequisite resource first, then chain its \`id\` into the dependent step. If no FK fields exist, proceed with the steps above as-is.`
|
|
437
|
+
: "";
|
|
438
|
+
const bugLine = s.bugCatchingTarget
|
|
439
|
+
? `Bug to catch: ${s.bugCatchingTarget}\n`
|
|
141
440
|
: "";
|
|
142
|
-
return (`**#${rank} — GENERATE** | ${testType} | ${s.category} |
|
|
441
|
+
return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | ${item.novelty}\n` +
|
|
143
442
|
`Scenario: ${s.scenarioName} (${s.steps.length} steps)\n` +
|
|
443
|
+
bugLine +
|
|
144
444
|
`${stepLines}\n` +
|
|
145
445
|
`Tool calls:\n` +
|
|
146
446
|
`${toolCalls}\n` +
|
|
147
447
|
` skyramp_integration_test_generation({ scenarioFile: <use the filePath returned by skyramp_batch_scenario_test_generation above>${authHeaderOnlyRef} })\n` +
|
|
148
|
-
|
|
448
|
+
(s.source === "agent-enriched"
|
|
449
|
+
? `From source: authScheme (OpenAPI securitySchemes or auth middleware)`
|
|
450
|
+
: `From source: requestBody shapes for POST/PUT/PATCH steps; responseBody shapes; authScheme`) +
|
|
149
451
|
prereqNote);
|
|
150
452
|
}
|
|
151
453
|
}).join("\n\n");
|
|
@@ -169,38 +471,45 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
169
471
|
const target = s.steps.length === 1
|
|
170
472
|
? `${s.steps[0].method} ${s.steps[0].path} → ${s.steps[0].expectedStatusCode}`
|
|
171
473
|
: `Scenario: ${s.scenarioName} (${s.steps.map(st => `${st.method} ${st.path}`).join(" → ")})`;
|
|
172
|
-
return `#${rank} [ADDITIONAL] | ${testType} | ${s.category} |
|
|
474
|
+
return `#${rank} [ADDITIONAL] | ${testType} | ${s.category} | ${item.novelty}\n ${target}\n Validates: ${s.description}`;
|
|
173
475
|
}).join("\n\n");
|
|
174
476
|
const uiSlotLine = needsUISlot ? (() => {
|
|
175
477
|
const rank = maxGen + backendAdditionalItems.length + 1;
|
|
176
478
|
const traceNote = hasTraces
|
|
177
479
|
? "Use an existing Playwright `.zip` trace from the repo."
|
|
178
480
|
: "Record a trace using `browser_navigate` + `browser_snapshot` + `skyramp_export_zip`, then call `skyramp_ui_test_generation`.";
|
|
179
|
-
return `\n\n#${rank} [ADDITIONAL] | UI | workflow |
|
|
481
|
+
return `\n\n#${rank} [ADDITIONAL] | UI | workflow | new\n Scenario: ui-interaction-for-changed-components (frontend files changed in this diff)\n Validates: Component-level interaction flow for the changed UI — derive the scenario name and steps from the actual changed frontend files. ${traceNote}`;
|
|
180
482
|
})() : "";
|
|
181
483
|
const e2eSlotLine = needsE2ESlot ? (() => {
|
|
182
484
|
const rank = maxGen + backendAdditionalItems.length + (needsUISlot ? 1 : 0) + 1;
|
|
183
485
|
const traceNote = hasTraces
|
|
184
486
|
? "Call `skyramp_e2e_test_generation` with the discovered trace/recording files."
|
|
185
487
|
: "No traces exist yet — record a backend trace via `skyramp_start_trace_collection` + `skyramp_stop_trace_collection` and a UI trace via Playwright browser tools, then call `skyramp_e2e_test_generation`.";
|
|
186
|
-
return `\n\n#${rank} [ADDITIONAL] | E2E | workflow |
|
|
488
|
+
return `\n\n#${rank} [ADDITIONAL] | E2E | workflow | new\n Scenario: e2e-flow-for-changed-feature (frontend + backend files changed in this diff)\n Validates: Full browser-level flow for the changed UI components end-to-end — derive the scenario name and steps from the actual changed frontend files. ${traceNote}`;
|
|
187
489
|
})() : "";
|
|
188
490
|
const reservedUIGenCount = reserveUIGenSlot ? 1 : 0;
|
|
189
491
|
const supplementCount = topN - generateItems.length - reservedUIGenCount - backendAdditionalItems.length - frontendSlots;
|
|
190
492
|
const supplementNote = supplementCount > 0
|
|
191
|
-
? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.**
|
|
493
|
+
? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** For each new or changed endpoint in the GENERATE list, first read the diff source code and identify 2–3 boundary or variation scenarios specific to **this PR\'s logic** — e.g. formula parameters (discount math, price calculation), search/filter constraints (matching vs. empty results, missing required param), required field validation, or edge cases visible in the diff. Draft one scenario per variation. Only after exhausting PR-specific variations, add generic patterns (auth boundary → 401, non-existent ID → 404). Use the same 6-dimension rubric and quality gate to assign priority (HIGH/MEDIUM/LOW), testType, and category.${hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT supplement with tests whose primary endpoint and test type match a GENERATE item — those flows are already covered.`
|
|
192
494
|
: "";
|
|
495
|
+
// ── PR / branch-diff mode: execution plan ────────────────────────────────
|
|
193
496
|
return `## Execution Plan
|
|
194
|
-
Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length + (reserveUIGenSlot ? 1 : 0)} generate + ${Math.max(topN - generateItems.length - (reserveUIGenSlot ? 1 : 0), 0)} additional = ${topN} total
|
|
497
|
+
Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length + (reserveUIGenSlot ? 1 : 0) + (needsUIPlaceholder ? 1 : 0)} generate + ${Math.max(topN - generateItems.length - (reserveUIGenSlot ? 1 : 0) - (needsUIPlaceholder ? 1 : 0), 0)} additional = ${topN} total
|
|
195
498
|
|
|
196
|
-
**Step 0 — Existing-test cross-check (
|
|
499
|
+
**Step 0 — Existing-test cross-check (before executing anything)**
|
|
197
500
|
For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
|
|
198
|
-
- **Contract tests**: If an existing contract test already covers that resource path → UPDATE the existing file instead of creating a new one. This does NOT count toward \`newTestsCreated\` — backfill from ADDITIONAL candidates to fill the open ADD slot
|
|
501
|
+
- **Contract tests**: If an existing contract test already covers that resource path → UPDATE the existing file instead of creating a new one. This does NOT count toward \`newTestsCreated\` — backfill from ADDITIONAL candidates to fill the open ADD slot using this priority order:
|
|
502
|
+
1. **BUG-CATCHING TESTS FIRST (CRITICAL)**: If source code analysis revealed a bug, logic error, or incorrect formula (e.g. discount math adding instead of subtracting, off-by-one errors, missing validation), CREATE A TEST THAT EXPOSES IT. The test SHOULD FAIL — that's the point. Document the bug. Example: if discount formula is wrong, test with discount=20% and assert correct math. If no bug found, skip to #2.
|
|
503
|
+
2. **PR-endpoint edge cases**: Look for integration test candidates covering error paths, boundary values, or alternative scenarios for the SAME endpoints changed in the PR diff. If no suitable candidate exists in ADDITIONAL, derive one from your source-code enrichment findings.
|
|
504
|
+
3. **Same-resource other scenarios**: Other HTTP methods or flows on the same resource group touched by the PR.
|
|
505
|
+
4. **Cross-resource workflows involving the PR endpoint**: Integration scenarios that include the PR's changed endpoint as one of the steps.
|
|
506
|
+
5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted or would only produce UPDATEs (not new files).
|
|
507
|
+
**Avoid backfilling with a test for a completely unrelated resource (e.g. \`POST /reviews\` when the PR only changes \`/orders\`) if any PR-endpoint edge-case integration test is feasible.**
|
|
199
508
|
- **Integration/scenario tests**: Always generate as a new file via the scenario pipeline, even if an existing integration test covers the same resource. A new multi-step scenario is a distinct test. Count it toward \`newTestsCreated\`.
|
|
200
509
|
- **UI tests**: Always generate as a new file. Count toward \`newTestsCreated\`.
|
|
201
510
|
|
|
202
|
-
**Step 1 — Source-Code Enrichment (
|
|
203
|
-
Read the source code for ALL changed files. Look for:
|
|
511
|
+
**Step 1 — Source-Code Enrichment (before executing anything)**
|
|
512
|
+
Read the source code for ALL changed files. Before generating each recommendation, quote the relevant source code in a <source_evidence> block — include the route handler signature, request body schema fields, response shape, and any computed field formulas. Use these quotes to derive tool call parameters. Look for:
|
|
204
513
|
- **Auth middleware** (passport, jwt.verify, authMiddleware, @requires_auth, Depends(get_current_user), @UseGuards, EnsureSessionDep, session middleware) — if found, override \`authHeader\` and \`authScheme\` in scenario and contract tool calls even if workspace.yml says authType: none. Exception: for \`skyramp_integration_test_generation\` with \`scenarioFile\`, omit auth params entirely if workspace has \`api.authType\` set (workspace handles it); if workspace has no \`authType\`, pass \`authHeader\` only.
|
|
205
514
|
- Business rules and formulas (e.g. total_cost = compute * rate + memory * rate)
|
|
206
515
|
- State transitions and domain constraints (e.g. budget cannot drop below current spend)
|
|
@@ -225,15 +534,13 @@ Assign category: ${TEST_CATEGORIES.join(" | ")}
|
|
|
225
534
|
|
|
226
535
|
${buildTestPatternGuidelines()}
|
|
227
536
|
|
|
228
|
-
${buildTestExamples()}
|
|
229
|
-
|
|
230
537
|
INSERT a source-code-derived candidate into the ranked list **only if ALL three conditions are met**:
|
|
231
538
|
1. Priority is HIGH (it guards a critical boundary or would prevent a production incident)
|
|
232
539
|
2. It is specific to THIS codebase — derived from a concrete business rule, formula, or constraint found in the changed files (not a general pattern that applies to any API)
|
|
233
540
|
3. It is not already covered by a structural candidate in the list below
|
|
234
541
|
|
|
235
542
|
If these conditions are not met, add it to ADDITIONAL only — do NOT displace a pre-ranked GENERATE item.
|
|
236
|
-
**CRITICAL-tier items (category: new_endpoint)
|
|
543
|
+
**CRITICAL-tier items (category: new_endpoint) should never be displaced** — they test the actual endpoints introduced in this PR and must always occupy GENERATE slots.
|
|
237
544
|
|
|
238
545
|
When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW; within the same priority, source-code-derived candidates go BEFORE structural ones. Re-number ranks after insertion. The top ${backendGenCount} become backend GENERATE items.${reserveUIGenSlot ? " The final GENERATE slot is reserved for a UI test and is not taken from this ranked list." : ""}
|
|
239
546
|
|
|
@@ -241,7 +548,23 @@ When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW;
|
|
|
241
548
|
|
|
242
549
|
**Unique constraints:** Unique-constraint scenarios (duplicate POST → expect 409) are pre-drafted for all resources. Before keeping them, check whether the storage backend actually enforces uniqueness — look for SQL \`UNIQUE\` indexes, Mongoose \`unique: true\`, Prisma \`@unique\`, or explicit duplicate-check logic in the source. If the backend is Redis, an in-memory store, or a schema-less DB with no explicit unique constraint in the changed files, move the unique-constraint scenario to ADDITIONAL with a note that enforcement is unconfirmed — do NOT generate it as a GENERATE item.
|
|
243
550
|
|
|
244
|
-
**Step 2 —
|
|
551
|
+
**Step 2 — Diversity check (using enriched knowledge from Step 1)**
|
|
552
|
+
Review the GENERATE list and verify that each item exercises a **distinct code path** — not just different input values on the same path.
|
|
553
|
+
|
|
554
|
+
**What NOT to do (these are all violations — if you catch yourself doing any of these, STOP and replace one item):**
|
|
555
|
+
- Do NOT generate two integration tests that both send a successful PUT/PATCH to the same endpoint and only differ in the request body values (e.g. 10% discount vs 5% discount vs 100% discount — these are the SAME test with different numbers)
|
|
556
|
+
- Do NOT generate two tests with the same step sequence (e.g. both are POST→PUT→GET or both are POST→PUT) where the only variation is the payload
|
|
557
|
+
- Do NOT count a "boundary value" as a separate test if the code path is identical to the happy path (e.g. discount=100% still returns 200 just like discount=10% — that is the same code path)
|
|
558
|
+
- Do NOT use different scenario names to disguise duplicate tests (e.g. "orders-put-add-items-recalculate" and "orders-put-new-endpoint-happy-path" are duplicates if both POST an order then PUT with items and expect 200)
|
|
559
|
+
|
|
560
|
+
**What TO do — each GENERATE item must exercise a different code path. Good diversity means a mix of:**
|
|
561
|
+
- One **happy-path** integration test (the richest scenario: create prerequisites → call the new endpoint → verify computed fields and child collections)
|
|
562
|
+
- One **error-path** test (trigger a distinct HTTP error status: 404 for non-existent resource, 422 for invalid input, 400 for malformed request — pick whichever the source code actually handles)
|
|
563
|
+
- One **state-variation** test (different operation on the same endpoint that hits different logic: empty items array, removing items instead of adding, updating quantity without changing products)
|
|
564
|
+
|
|
565
|
+
For each duplicate pair found, keep the richer item and replace the other with a test from a different category above. The replacement still targets the same PR endpoint and counts as a GENERATE item. Move the displaced item to ADDITIONAL.
|
|
566
|
+
|
|
567
|
+
**Step 3 — Execute merged plan in rank order**
|
|
245
568
|
Replace any scenario that pairs unrelated resources with one reflecting actual FK relationships in the codebase.
|
|
246
569
|
Use realistic request bodies from source code schemas; verify response data (not just status codes).
|
|
247
570
|
|
|
@@ -258,34 +581,34 @@ ${buildGenerationRules(isUIOnlyPR)}
|
|
|
258
581
|
|
|
259
582
|
**Never mark a recommendation "blocked":** No OpenAPI spec → use source code for shapes. No traces → provide \`skyramp_start_trace_collection\` instructions. No backend trace → use the scenario pipeline.
|
|
260
583
|
|
|
261
|
-
**Critical-category minimum:** At least ${Math.min(MAX_CRITICAL_TESTS, maxGen)} of the ${maxGen} GENERATE items
|
|
584
|
+
**Critical-category minimum:** At least ${Math.min(MAX_CRITICAL_TESTS, maxGen)} of the ${maxGen} GENERATE items should be from HIGH-priority categories (security_boundary, business_rule, data_integrity, breaking_change). The pre-ranked plan below already prioritises this — only override if source-code enrichment reveals a higher-value candidate.
|
|
262
585
|
|
|
263
|
-
### GENERATE (process these EXACTLY as listed, in order —
|
|
586
|
+
### GENERATE (process these EXACTLY as listed, in order — after completing Steps 0–2 above; if Step 0 converts an item to UPDATE, backfill the ADD slot from ADDITIONAL following the priority order in Step 0)
|
|
264
587
|
|
|
265
|
-
${generateBlocks || " (no pre-ranked generate items — draft your own based on endpoint analysis)"}${reserveUIGenSlot ? `
|
|
588
|
+
${generateBlocks || (needsUIPlaceholder ? uiPlaceholderBlock : " (no pre-ranked generate items — draft your own based on endpoint analysis)")}${reserveUIGenSlot ? `
|
|
266
589
|
|
|
267
|
-
**#${generateItems.length + 1} — GENERATE** | UI | workflow |
|
|
590
|
+
**#${generateItems.length + 1} — GENERATE** | UI | workflow | new
|
|
268
591
|
Scenario: ui-interaction-for-changed-components (frontend files changed in this diff)
|
|
269
592
|
Record a browser trace for the changed UI components, then generate a UI test.
|
|
270
593
|
Steps: browser_navigate → browser_snapshot → interact with changed components → browser_assert → skyramp_export_zip → skyramp_ui_test_generation
|
|
271
594
|
This slot is RESERVED — you MUST attempt a UI test here. Only skip if browser_navigate fails (app unreachable).` : ""}
|
|
272
595
|
|
|
273
|
-
**COMPLIANCE CHECK**: Before proceeding, verify your generate list matches the items above. If you plan to generate a scenario with a
|
|
596
|
+
**COMPLIANCE CHECK**: Before proceeding, verify your generate list matches the items above. If you plan to generate a scenario with a different name than what is listed (e.g. you want to generate "order-update-discount-calculation" but the plan says "orders-patch-add-items-recalculate"), STOP — use the plan's scenario name and steps. Add your alternative to ADDITIONAL instead. One retry on failure then skip to next item.
|
|
274
597
|
|
|
275
598
|
### ADDITIONAL (list in additionalRecommendations in this order after Step 1 insertion)
|
|
276
599
|
|
|
277
600
|
${additionalLines || " (none pre-ranked)"}${uiSlotLine}${e2eSlotLine}
|
|
278
601
|
${supplementNote}
|
|
279
602
|
|
|
280
|
-
|
|
603
|
+
${buildBudgetMandate(topN, generateItems.length + (reserveUIGenSlot ? 1 : 0) + (needsUIPlaceholder ? 1 : 0))}
|
|
281
604
|
|
|
282
605
|
## Recommendation Stability
|
|
283
606
|
- **Carry forward** previous additionalRecommendations that still apply — match by scenarioName (multi-step) or endpoint (single-endpoint). Re-derive category and priority from test content.
|
|
284
607
|
- **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
|
|
285
608
|
- **Only add** new recommendations for code paths introduced since the last run.`;
|
|
286
609
|
}
|
|
287
|
-
export function buildRecommendationPrompt(analysis, analysisScope =
|
|
288
|
-
const isDiffScope = analysisScope
|
|
610
|
+
export function buildRecommendationPrompt(analysis, analysisScope = AnalysisScope.FullRepo, topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType, maxGenerateOverride, sessionId) {
|
|
611
|
+
const isDiffScope = isDiff(analysisScope);
|
|
289
612
|
const diffContext = analysis.branchDiffContext;
|
|
290
613
|
const openApiSpec = analysis.artifacts?.openApiSpecs?.[0];
|
|
291
614
|
// ── Filter out bot-generated test files from changedFiles ──
|
|
@@ -365,7 +688,7 @@ Modified endpoints:
|
|
|
365
688
|
${fmtEps(diffContext.modifiedEndpoints, (m) => `${m.sourceFile}, ${m.changeType}`)}
|
|
366
689
|
Affected services: ${diffContext.affectedServices.join(", ") || "N/A"}
|
|
367
690
|
|
|
368
|
-
|
|
691
|
+
Focus on tests that validate these changes and how they interact with existing resources.
|
|
369
692
|
`;
|
|
370
693
|
}
|
|
371
694
|
// ── Interactions ──
|
|
@@ -405,7 +728,7 @@ ${detailBlocks}
|
|
|
405
728
|
// ── Scoring ──
|
|
406
729
|
const endpointCount = allEndpoints.reduce((acc, ep) => acc + (ep.methods ?? []).length, 0);
|
|
407
730
|
const baseMaxGen = Math.min(Math.max(maxGenerateOverride ?? (isDiffScope ? MAX_TESTS_TO_GENERATE : topN), 0), topN);
|
|
408
|
-
const maxGen = isUIOnlyPR ? (
|
|
731
|
+
const maxGen = isUIOnlyPR ? Math.max(baseMaxGen, 1) : baseMaxGen;
|
|
409
732
|
const scenarios = analysis.businessContext.draftedScenarios;
|
|
410
733
|
let scored = [];
|
|
411
734
|
let seed = "";
|
|
@@ -460,7 +783,7 @@ No backend API changes detected. Generate UI/E2E tests from the available traces
|
|
|
460
783
|
1. Call \`skyramp_ui_test_generation\` or \`skyramp_e2e_test_generation\` using the trace files
|
|
461
784
|
2. Generate exactly ${maxGen} tests targeting the changed UI flows
|
|
462
785
|
|
|
463
|
-
|
|
786
|
+
${buildBudgetMandate(topN, maxGen)}
|
|
464
787
|
|
|
465
788
|
Do not churn recommendations without cause.
|
|
466
789
|
` : `
|
|
@@ -468,29 +791,31 @@ Do not churn recommendations without cause.
|
|
|
468
791
|
|
|
469
792
|
**Budget: ${maxGen} generate + ${topN - maxGen} additional = ${topN} total**
|
|
470
793
|
|
|
471
|
-
No backend API changes detected
|
|
472
|
-
All ${topN} recommendations go into additionalRecommendations only.
|
|
794
|
+
No backend API changes detected. Record browser traces now and generate UI tests.
|
|
473
795
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
796
|
+
**Generate ${maxGen} UI test(s) using browser tools (AI-driven recording):**
|
|
797
|
+
1. Read the changed frontend files to identify which components changed and what interactions they support
|
|
798
|
+
2. \`browser_navigate({ url: "<frontend_url_from_workspace_config>" })\`
|
|
799
|
+
3. Interact with the changed UI components (\`browser_click\`, \`browser_type\`, \`browser_fill_form\`, etc.)
|
|
800
|
+
4. \`browser_snapshot()\` after each interaction that changes the page
|
|
801
|
+
5. \`skyramp_export_zip({ outputPath: "<repositoryPath>/.skyramp/<component>_trace.zip" })\` — use an **absolute** path
|
|
802
|
+
6. \`skyramp_ui_test_generation({ playwrightInput: "<absolute_path_to_zip>" })\`
|
|
480
803
|
|
|
481
|
-
|
|
482
|
-
1. Call \`skyramp_start_trace_collection\` (playwright: true)
|
|
483
|
-
2. Exercise the changed UI flows in the browser
|
|
484
|
-
3. Call \`skyramp_stop_trace_collection\`
|
|
485
|
-
4. Generate with \`skyramp_ui_test_generation\` or \`skyramp_e2e_test_generation\`
|
|
486
|
-
|
|
487
|
-
**You MUST produce EXACTLY ${topN} total recommendations in additionalRecommendations. Do NOT produce fewer. Generate recommendations now.**
|
|
804
|
+
${buildBudgetMandate(topN, maxGen)}
|
|
488
805
|
|
|
489
806
|
Do not churn recommendations without cause.
|
|
490
807
|
`;
|
|
491
808
|
}
|
|
492
809
|
else if (scored.length > 0) {
|
|
493
|
-
|
|
810
|
+
const projectType = analysis.projectClassification.projectType;
|
|
811
|
+
const isFrontendProject = projectType === "full-stack" || projectType === "frontend";
|
|
812
|
+
const isFrontendOnlyProject = projectType === "frontend";
|
|
813
|
+
if (!isDiffScope) {
|
|
814
|
+
mainSection = buildFullRepoRecommendations(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, isFrontendProject, isFrontendOnlyProject);
|
|
815
|
+
}
|
|
816
|
+
else {
|
|
817
|
+
mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces);
|
|
818
|
+
}
|
|
494
819
|
}
|
|
495
820
|
else {
|
|
496
821
|
mainSection = `
|
|
@@ -502,7 +827,7 @@ No pre-drafted scenarios available. Draft ${maxGen} tests from your analysis of
|
|
|
502
827
|
|
|
503
828
|
For each test: pick the highest-impact endpoint(s), draft a realistic scenario with actual request/response shapes from source code, and execute the same pipeline described in Tool Workflows below.
|
|
504
829
|
|
|
505
|
-
|
|
830
|
+
${buildBudgetMandate(topN, maxGen)}
|
|
506
831
|
|
|
507
832
|
## Recommendation Stability
|
|
508
833
|
- **Carry forward** previous additionalRecommendations that still apply — match by scenarioName (multi-step) or endpoint (single-endpoint). Re-derive category and priority from test content.
|
|
@@ -592,34 +917,64 @@ coverage and to fill gaps:
|
|
|
592
917
|
${historyBody}`;
|
|
593
918
|
}
|
|
594
919
|
// ── Compose all sections ──
|
|
920
|
+
// Long-context best practice: all data at top, all instructions + query at bottom.
|
|
921
|
+
// See: https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/claude-prompting-best-practices#long-context-prompting
|
|
595
922
|
const scopeNote = isDiffScope
|
|
596
923
|
? "Scoped to current branch changes."
|
|
597
924
|
: "Covers the full repository.";
|
|
598
925
|
return `
|
|
926
|
+
${buildArchitectPreamble(isDiffScope)}
|
|
927
|
+
|
|
599
928
|
${modePreamble}
|
|
600
929
|
|
|
601
930
|
Scope: ${scopeNote}
|
|
602
931
|
|
|
603
932
|
${sourcePriority}
|
|
604
933
|
|
|
934
|
+
<repository_context>
|
|
605
935
|
## Repository Context
|
|
606
936
|
|
|
607
937
|
${repoContext}
|
|
608
938
|
${specNote}
|
|
609
|
-
|
|
939
|
+
</repository_context>
|
|
940
|
+
${diffSection ? `<branch_diff>\n${diffSection}\n</branch_diff>` : ""}
|
|
941
|
+
<endpoint_interactions>
|
|
610
942
|
${interactionSection}
|
|
611
|
-
|
|
612
|
-
|
|
943
|
+
</endpoint_interactions>
|
|
944
|
+
|
|
945
|
+
<existing_tests>
|
|
613
946
|
## Existing Tests
|
|
614
947
|
- Frameworks: ${analysis.existingTests.frameworks.join(", ") || "none"}
|
|
615
948
|
${formatTestLocations(analysis.existingTests.testLocations)}
|
|
949
|
+
</existing_tests>
|
|
950
|
+
${prHistorySection ? `<pr_history>\n${prHistorySection}\n</pr_history>` : ""}
|
|
951
|
+
|
|
952
|
+
<instructions>
|
|
953
|
+
|
|
954
|
+
${buildContextFetchingGuidance(sessionId)}
|
|
955
|
+
|
|
956
|
+
${buildReasoningProtocol()}
|
|
957
|
+
|
|
958
|
+
${isDiffScope ? buildFewShotExamples() : ""}
|
|
616
959
|
|
|
617
|
-
${
|
|
618
|
-
?
|
|
960
|
+
${isDiffScope
|
|
961
|
+
? (isUIOnlyPR
|
|
962
|
+
? `## How to Generate Tests — Tool Workflows
|
|
963
|
+
|
|
964
|
+
**For UI tests (AI-driven recording):**
|
|
965
|
+
1. \`browser_navigate({ url: "<frontend_url>" })\`
|
|
966
|
+
2. Interact with changed components (\`browser_click\`, \`browser_type\`, \`browser_fill_form\`, etc.)
|
|
967
|
+
3. \`browser_snapshot()\` after each interaction
|
|
968
|
+
4. \`skyramp_export_zip({ outputPath: "<repositoryPath>/.skyramp/<component>_trace.zip" })\` — absolute path
|
|
969
|
+
5. \`skyramp_ui_test_generation({ playwrightInput: "<absolute_path_to_zip>" })\`
|
|
970
|
+
|
|
971
|
+
**For E2E tests:** Same browser recording flow, then call \`skyramp_e2e_test_generation\` with the zip.`
|
|
972
|
+
: buildToolWorkflows(authHeaderValue, authTypeValue))
|
|
973
|
+
: ""}
|
|
974
|
+
|
|
975
|
+
${mainSection}
|
|
619
976
|
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
Without traces, list as additionalRecommendations with instructions to record traces first.`
|
|
623
|
-
: buildToolWorkflows(authHeaderValue, authTypeValue)}
|
|
977
|
+
${isDiffScope ? buildVerificationChecklist(topN, maxGen) : ""}
|
|
978
|
+
</instructions>
|
|
624
979
|
`;
|
|
625
980
|
}
|